aboutsummaryrefslogtreecommitdiff
path: root/src/format.rs
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-11-27 17:26:59 +0100
committerAlex Auvolat <alex@adnab.me>2023-11-27 17:26:59 +0100
commitd2a46c25219c21ac4f128da8512302935654d38e (patch)
treea6d66ac4639e4d68fe57f9e8da72b08ecfb14d9f /src/format.rs
parentb15723f33b486124a50408873d30998bb9d31b3b (diff)
downloaddatagengo-d2a46c25219c21ac4f128da8512302935654d38e.tar.gz
datagengo-d2a46c25219c21ac4f128da8512302935654d38e.zip
split code into several files
Diffstat (limited to 'src/format.rs')
-rw-r--r--src/format.rs349
1 files changed, 349 insertions, 0 deletions
diff --git a/src/format.rs b/src/format.rs
new file mode 100644
index 0000000..1cdde1b
--- /dev/null
+++ b/src/format.rs
@@ -0,0 +1,349 @@
+use std::fs;
+
+use anyhow::Result;
+
+use crate::charset::Charset;
+use crate::*;
+
+// =====================================================================
+// FORMATTING TO HTML
+// =====================================================================
+
+pub fn format_batch<'a>(dict_idx: &DictIndex<'a>, count: usize, (i, batch): (usize, &Batch)) {
+ format_batch_aux(dict_idx, count, i, batch).expect("format batch");
+}
+
+fn format_batch_aux<'a>(
+ dict_idx: &DictIndex<'a>,
+ count: usize,
+ i: usize,
+ batch: &Batch,
+) -> Result<()> {
+ let mut f = io::BufWriter::new(fs::File::create(format!("public/{:03}.html", i))?);
+ write!(
+ f,
+ r#"<!DOCTYPE html>
+ <html>
+ <head>
+ <meta charset=\"UTF-8\" />
+ <title>Batch #{:03}</title>
+ <link rel="stylesheet" type="text/css" href="style.css" />
+ </head>
+ <body><div class="batch_page">"#,
+ i
+ )?;
+
+ writeln!(f, r#"<p><a href="index.html">index</a>"#)?;
+ for j in 0..count {
+ if j != i {
+ writeln!(f, r#" <a href="{:03}.html">{:03}</a>"#, j, j)?;
+ } else {
+ writeln!(f, " {:03}", j)?;
+ }
+ }
+ writeln!(f, r#"</p>"#)?;
+ writeln!(f, "<p>Level: {}</p>", batch.level)?;
+
+ write!(f, r#"<p class="ja">"#)?;
+ let mut ex_prev = Charset::default();
+ for ex in batch.examples.iter() {
+ let ex_chars = ex.chars.inter(&batch.chars);
+ for c in ex_chars.diff(&ex_prev).chars().iter() {
+ write!(
+ f,
+ r#"<a href="https://jisho.org/search/{}%20%23kanji">{}</a>"#,
+ c, c
+ )?;
+ }
+ ex_prev = ex_prev.union(&ex_chars);
+ }
+ writeln!(f, r#"</p>"#)?;
+
+ for ex in batch.examples.iter() {
+ writeln!(f, "<hr />")?;
+ write!(f, r#"<p class="ja">"#)?;
+ for c in ex.ja.chars() {
+ if batch.chars.contains(c) {
+ write!(f, r#"<span class="char_cur">{}</span>"#, c)?;
+ } else if batch.chars_p1.contains(c) {
+ write!(f, r#"<span class="char_p1">{}</span>"#, c)?;
+ } else if batch.chars_p2.contains(c) {
+ write!(f, r#"<span class="char_p2">{}</span>"#, c)?;
+ } else if batch.chars_bad.contains(c) {
+ write!(f, r#"<span class="char_bad">{}</span>"#, c)?;
+ } else {
+ write!(f, "{}", c)?;
+ }
+ }
+ writeln!(f, "</p>")?;
+ writeln!(f, r#"<p class="en">{}</p>"#, ex.en)?;
+
+ writeln!(f, r#"<details><summary>Explanation</summary>"#)?;
+ let mut expl_batch = Vec::new();
+ let mut expl_all = Vec::new();
+ for word in ex.expl.split(|c| c == ' ' || c == '~') {
+ let (keb, reb) = expl_clean_word(word);
+ let wchars = Charset::new(keb);
+ if !wchars.intersects(&ex.chars) {
+ continue;
+ }
+ if let Some(ents) = dict_idx.get(keb) {
+ for ent in ents.iter() {
+ if let Some(s) = dict_str(keb, reb, ent) {
+ if wchars.intersects(&batch.chars) {
+ expl_batch.push(s);
+ } else {
+ expl_all.push(s);
+ }
+ }
+ }
+ }
+ }
+ for be in expl_batch {
+ writeln!(f, r#"<p>{}</p>"#, be)?;
+ }
+ writeln!(f, r#"<p class="chars">"#)?;
+ for c in ex.chars.inter(&batch.chars).chars().iter() {
+ writeln!(
+ f,
+ r#"<a href="https://jisho.org/search/{}%20%23kanji">{}</a>"#,
+ c, c
+ )?;
+ }
+ writeln!(f, r#"</p>"#)?;
+ for be in expl_all {
+ writeln!(f, r#"<p>{}</p>"#, be)?;
+ }
+ writeln!(f, r#"</details>"#)?;
+ }
+
+ writeln!(f, "<hr />")?;
+ format_vocab(
+ &mut f,
+ &batch
+ .extra_vocab
+ .iter()
+ .filter(|v| batch.level.contains(&v.level))
+ .collect::<Vec<_>>(),
+ "Extra vocabulary (this level)",
+ )?;
+ format_vocab(
+ &mut f,
+ &batch
+ .extra_vocab
+ .iter()
+ .filter(|v| !batch.level.contains(&v.level))
+ .collect::<Vec<_>>(),
+ "Extra vocabulary (previous levels)",
+ )?;
+
+ writeln!(
+ f,
+ r#"<details><summary>Extra examples (reading practice)</summary><table class="extratable">"#
+ )?;
+ for ex in batch.extra_examples.iter() {
+ let mut expl1 = Vec::new();
+ let mut expl2 = Vec::new();
+ for word in ex.expl.split(|c| c == ' ' || c == '~') {
+ let (keb, reb) = expl_clean_word(word);
+ let wchars = Charset::new(keb);
+ if !wchars.intersects(&ex.chars) {
+ continue;
+ }
+ if let Some(ents) = dict_idx.get(keb) {
+ for ent in ents.iter() {
+ if let Some(s) = dict_str_short(keb, reb, ent) {
+ if wchars.intersects(&batch.chars) {
+ expl1.push(s);
+ } else {
+ expl2.push(s);
+ }
+ }
+ }
+ }
+ }
+ expl1.extend(expl2.into_iter());
+ let expl = expl1.join("<br />");
+ writeln!(
+ f,
+ r#"<tr><td><details><summary class="tab_large2 font_ja">&nbsp;&nbsp;{}&nbsp;&nbsp;</summary><div style="text-align: center">{}<br />{}</div></details></td></tr>"#,
+ ex.ja, ex.en, expl
+ )?;
+ }
+ writeln!(f, r#"</table></details>"#)?;
+
+ writeln!(f, "<hr />")?;
+ writeln!(f, "<p>\(≧▽≦)/</p>")?;
+
+ write!(f, "<div></body></html>")?;
+ f.flush()?;
+ Ok(())
+}
+
+fn format_vocab(f: &mut impl Write, vocab: &[&JlptVocab], t: &str) -> Result<()> {
+ if !vocab.is_empty() {
+ writeln!(
+ f,
+ r#"<details><summary>{}</summary><table class="vocabtable">"#,
+ t
+ )?;
+ for v in vocab {
+ writeln!(
+ f,
+ r#"<tr><td>{}</td><td>&nbsp;&nbsp;<span class="tab_large font_ja">{}</span>&nbsp;&nbsp;</td><td>{}</td><td class="font_ja">{}</td></tr>"#,
+ v.level, v.kanji, v.en, v.kana
+ )?;
+ }
+ writeln!(f, "</table></details>")?;
+ }
+ Ok(())
+}
+
+fn expl_clean_word(w: &str) -> (&str, Option<&str>) {
+ let mut ret = w;
+ for delim in ['(', '{', '['] {
+ if let Some((s, _)) = ret.split_once(delim) {
+ ret = s;
+ }
+ }
+ let p = w
+ .split_once('(')
+ .and_then(|(_, r)| r.split_once(')'))
+ .map(|(p, _)| p);
+ (ret, p)
+}
+
+fn dict_str_short<'a>(
+ qkeb: &str,
+ qreb: Option<&str>,
+ ent: &roxmltree::Node<'a, 'a>,
+) -> Option<String> {
+ let r_ele = ent.children().find(|x| x.has_tag_name("r_ele")).unwrap();
+ let reb = r_ele.children().find(|x| x.has_tag_name("reb")).unwrap();
+ let reb = reb.text().unwrap().trim();
+
+ if qreb.map(|x| x != reb).unwrap_or(false) {
+ return None;
+ }
+
+ Some(format!(
+ r#"<span class="font_ja">{} 【{}】</span>"#,
+ qkeb, reb
+ ))
+}
+
+fn dict_str<'a>(qkeb: &str, qreb: Option<&str>, ent: &roxmltree::Node<'a, 'a>) -> Option<String> {
+ let mut ret = dict_str_short(qkeb, qreb, ent)?;
+
+ for sense in ent.children().filter(|x| x.has_tag_name("sense")) {
+ if let Some(s) = sense.children().find(|x| x.has_tag_name("gloss")) {
+ ret.extend(format!(" {};", s.text().unwrap().trim()).chars());
+ }
+ }
+
+ if ret.chars().rev().next() == Some(';') {
+ ret.pop();
+ }
+ Some(ret)
+}
+
+pub fn format_index(batches: &[Batch], kanji_levels: &[(String, String)]) -> Result<()> {
+ let mut f = io::BufWriter::new(fs::File::create("public/index.html")?);
+ write!(
+ f,
+ r#"<!DOCTYPE html>
+ <html>
+ <head>
+ <meta charset=\"UTF-8\" />
+ <title>List of batches</title>
+ <link rel="stylesheet" type="text/css" href="style.css" />
+ </head>
+ <body><div class="index_page">"#
+ )?;
+
+ writeln!(f, r#"<p><a href="about.html">About / How-to</a></p><hr />"#)?;
+
+ writeln!(f, "<table>")?;
+ writeln!(f, "<tr><th>Num</th><th>Level</th><th>Kanji</th><th>Examples</th><th>Lesson-1</th><th>Lesson-2</th><th>Ignore</th></tr>")?;
+ for (i, batch) in batches.iter().enumerate() {
+ writeln!(
+ f,
+ r#"<tr><td><a href="{:03}.html">{:03}</a></td><td>{}</td><td class="font_ja">{}</td><td>&nbsp;&nbsp;{}</td><td class="font_ja">{}</td><td class="font_ja">{}</td><td class="font_ja">{}</td></tr>"#,
+ i,
+ i,
+ batch.level,
+ batch.chars.to_string(),
+ batch.examples.len(),
+ batch.chars_p1.to_string(),
+ batch.chars_p2.to_string(),
+ batch.chars_bad.to_string()
+ )?;
+ }
+ writeln!(f, r#"</table>"#)?;
+
+ writeln!(f, "<hr />")?;
+
+ let all_chars = Charset::from_iter(
+ batches
+ .iter()
+ .map(|x| x.chars.chars().iter().copied())
+ .flatten(),
+ );
+ writeln!(f, "<table>")?;
+ writeln!(
+ f,
+ r#"<tr><th>Level</th><th>Count</th><th width="60%">Kanji</th><th>Missing kanji</th></tr>"#
+ )?;
+ for (lvl, chars) in kanji_levels.iter() {
+ if lvl == "N0+" || lvl.ends_with("-10") {
+ continue;
+ }
+ let chars = Charset::new(chars);
+ let missing = chars.diff(&all_chars);
+ writeln!(
+ f,
+ r#"<tr><td>{}</td><td>{}</td><td class="font_ja">{}</td><td><span class="font_ja">{}</span> ({})</td></tr>"#,
+ lvl,
+ chars.len(),
+ chars.to_string(),
+ missing.to_string(),
+ missing.len()
+ )?;
+ }
+ writeln!(f, "</table>")?;
+
+ write!(f, "</div></body></html>")?;
+ f.flush()?;
+ Ok(())
+}
+
+pub fn format_about() -> Result<()> {
+ let mut f = io::BufWriter::new(fs::File::create("public/about.html")?);
+ write!(
+ f,
+ r#"<!DOCTYPE html>
+ <html>
+ <head>
+ <meta charset=\"UTF-8\" />
+ <title>Datagengo README</title>
+ <link rel="stylesheet" type="text/css" href="style.css" />
+ </head>
+ <body>"#
+ )?;
+
+ writeln!(f, r#"<div class="about_page">"#)?;
+ writeln!(
+ f,
+ r#"<p><a href="index.html">Back to lessons</a></p><hr />"#
+ )?;
+
+ writeln!(
+ f,
+ "{}",
+ markdown::to_html(&fs::read_to_string("README.md")?)
+ )?;
+
+ writeln!(f, r#"</div></body></html>"#)?;
+
+ Ok(())
+}