diff options
author | Alex Auvolat <alex@adnab.me> | 2023-11-27 17:26:59 +0100 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2023-11-27 17:26:59 +0100 |
commit | d2a46c25219c21ac4f128da8512302935654d38e (patch) | |
tree | a6d66ac4639e4d68fe57f9e8da72b08ecfb14d9f /src/format.rs | |
parent | b15723f33b486124a50408873d30998bb9d31b3b (diff) | |
download | datagengo-d2a46c25219c21ac4f128da8512302935654d38e.tar.gz datagengo-d2a46c25219c21ac4f128da8512302935654d38e.zip |
split code into several files
Diffstat (limited to 'src/format.rs')
-rw-r--r-- | src/format.rs | 349 |
1 files changed, 349 insertions, 0 deletions
diff --git a/src/format.rs b/src/format.rs new file mode 100644 index 0000000..1cdde1b --- /dev/null +++ b/src/format.rs @@ -0,0 +1,349 @@ +use std::fs; + +use anyhow::Result; + +use crate::charset::Charset; +use crate::*; + +// ===================================================================== +// FORMATTING TO HTML +// ===================================================================== + +pub fn format_batch<'a>(dict_idx: &DictIndex<'a>, count: usize, (i, batch): (usize, &Batch)) { + format_batch_aux(dict_idx, count, i, batch).expect("format batch"); +} + +fn format_batch_aux<'a>( + dict_idx: &DictIndex<'a>, + count: usize, + i: usize, + batch: &Batch, +) -> Result<()> { + let mut f = io::BufWriter::new(fs::File::create(format!("public/{:03}.html", i))?); + write!( + f, + r#"<!DOCTYPE html> + <html> + <head> + <meta charset=\"UTF-8\" /> + <title>Batch #{:03}</title> + <link rel="stylesheet" type="text/css" href="style.css" /> + </head> + <body><div class="batch_page">"#, + i + )?; + + writeln!(f, r#"<p><a href="index.html">index</a>"#)?; + for j in 0..count { + if j != i { + writeln!(f, r#" <a href="{:03}.html">{:03}</a>"#, j, j)?; + } else { + writeln!(f, " {:03}", j)?; + } + } + writeln!(f, r#"</p>"#)?; + writeln!(f, "<p>Level: {}</p>", batch.level)?; + + write!(f, r#"<p class="ja">"#)?; + let mut ex_prev = Charset::default(); + for ex in batch.examples.iter() { + let ex_chars = ex.chars.inter(&batch.chars); + for c in ex_chars.diff(&ex_prev).chars().iter() { + write!( + f, + r#"<a href="https://jisho.org/search/{}%20%23kanji">{}</a>"#, + c, c + )?; + } + ex_prev = ex_prev.union(&ex_chars); + } + writeln!(f, r#"</p>"#)?; + + for ex in batch.examples.iter() { + writeln!(f, "<hr />")?; + write!(f, r#"<p class="ja">"#)?; + for c in ex.ja.chars() { + if batch.chars.contains(c) { + write!(f, r#"<span class="char_cur">{}</span>"#, c)?; + } else if batch.chars_p1.contains(c) { + write!(f, r#"<span class="char_p1">{}</span>"#, c)?; + } else if batch.chars_p2.contains(c) { + write!(f, r#"<span class="char_p2">{}</span>"#, c)?; + } else if batch.chars_bad.contains(c) { + write!(f, r#"<span class="char_bad">{}</span>"#, c)?; + } else { + write!(f, "{}", c)?; + } + } + writeln!(f, "</p>")?; + writeln!(f, r#"<p class="en">{}</p>"#, ex.en)?; + + writeln!(f, r#"<details><summary>Explanation</summary>"#)?; + let mut expl_batch = Vec::new(); + let mut expl_all = Vec::new(); + for word in ex.expl.split(|c| c == ' ' || c == '~') { + let (keb, reb) = expl_clean_word(word); + let wchars = Charset::new(keb); + if !wchars.intersects(&ex.chars) { + continue; + } + if let Some(ents) = dict_idx.get(keb) { + for ent in ents.iter() { + if let Some(s) = dict_str(keb, reb, ent) { + if wchars.intersects(&batch.chars) { + expl_batch.push(s); + } else { + expl_all.push(s); + } + } + } + } + } + for be in expl_batch { + writeln!(f, r#"<p>{}</p>"#, be)?; + } + writeln!(f, r#"<p class="chars">"#)?; + for c in ex.chars.inter(&batch.chars).chars().iter() { + writeln!( + f, + r#"<a href="https://jisho.org/search/{}%20%23kanji">{}</a>"#, + c, c + )?; + } + writeln!(f, r#"</p>"#)?; + for be in expl_all { + writeln!(f, r#"<p>{}</p>"#, be)?; + } + writeln!(f, r#"</details>"#)?; + } + + writeln!(f, "<hr />")?; + format_vocab( + &mut f, + &batch + .extra_vocab + .iter() + .filter(|v| batch.level.contains(&v.level)) + .collect::<Vec<_>>(), + "Extra vocabulary (this level)", + )?; + format_vocab( + &mut f, + &batch + .extra_vocab + .iter() + .filter(|v| !batch.level.contains(&v.level)) + .collect::<Vec<_>>(), + "Extra vocabulary (previous levels)", + )?; + + writeln!( + f, + r#"<details><summary>Extra examples (reading practice)</summary><table class="extratable">"# + )?; + for ex in batch.extra_examples.iter() { + let mut expl1 = Vec::new(); + let mut expl2 = Vec::new(); + for word in ex.expl.split(|c| c == ' ' || c == '~') { + let (keb, reb) = expl_clean_word(word); + let wchars = Charset::new(keb); + if !wchars.intersects(&ex.chars) { + continue; + } + if let Some(ents) = dict_idx.get(keb) { + for ent in ents.iter() { + if let Some(s) = dict_str_short(keb, reb, ent) { + if wchars.intersects(&batch.chars) { + expl1.push(s); + } else { + expl2.push(s); + } + } + } + } + } + expl1.extend(expl2.into_iter()); + let expl = expl1.join("<br />"); + writeln!( + f, + r#"<tr><td><details><summary class="tab_large2 font_ja"> {} </summary><div style="text-align: center">{}<br />{}</div></details></td></tr>"#, + ex.ja, ex.en, expl + )?; + } + writeln!(f, r#"</table></details>"#)?; + + writeln!(f, "<hr />")?; + writeln!(f, "<p>\(≧▽≦)/</p>")?; + + write!(f, "<div></body></html>")?; + f.flush()?; + Ok(()) +} + +fn format_vocab(f: &mut impl Write, vocab: &[&JlptVocab], t: &str) -> Result<()> { + if !vocab.is_empty() { + writeln!( + f, + r#"<details><summary>{}</summary><table class="vocabtable">"#, + t + )?; + for v in vocab { + writeln!( + f, + r#"<tr><td>{}</td><td> <span class="tab_large font_ja">{}</span> </td><td>{}</td><td class="font_ja">{}</td></tr>"#, + v.level, v.kanji, v.en, v.kana + )?; + } + writeln!(f, "</table></details>")?; + } + Ok(()) +} + +fn expl_clean_word(w: &str) -> (&str, Option<&str>) { + let mut ret = w; + for delim in ['(', '{', '['] { + if let Some((s, _)) = ret.split_once(delim) { + ret = s; + } + } + let p = w + .split_once('(') + .and_then(|(_, r)| r.split_once(')')) + .map(|(p, _)| p); + (ret, p) +} + +fn dict_str_short<'a>( + qkeb: &str, + qreb: Option<&str>, + ent: &roxmltree::Node<'a, 'a>, +) -> Option<String> { + let r_ele = ent.children().find(|x| x.has_tag_name("r_ele")).unwrap(); + let reb = r_ele.children().find(|x| x.has_tag_name("reb")).unwrap(); + let reb = reb.text().unwrap().trim(); + + if qreb.map(|x| x != reb).unwrap_or(false) { + return None; + } + + Some(format!( + r#"<span class="font_ja">{} 【{}】</span>"#, + qkeb, reb + )) +} + +fn dict_str<'a>(qkeb: &str, qreb: Option<&str>, ent: &roxmltree::Node<'a, 'a>) -> Option<String> { + let mut ret = dict_str_short(qkeb, qreb, ent)?; + + for sense in ent.children().filter(|x| x.has_tag_name("sense")) { + if let Some(s) = sense.children().find(|x| x.has_tag_name("gloss")) { + ret.extend(format!(" {};", s.text().unwrap().trim()).chars()); + } + } + + if ret.chars().rev().next() == Some(';') { + ret.pop(); + } + Some(ret) +} + +pub fn format_index(batches: &[Batch], kanji_levels: &[(String, String)]) -> Result<()> { + let mut f = io::BufWriter::new(fs::File::create("public/index.html")?); + write!( + f, + r#"<!DOCTYPE html> + <html> + <head> + <meta charset=\"UTF-8\" /> + <title>List of batches</title> + <link rel="stylesheet" type="text/css" href="style.css" /> + </head> + <body><div class="index_page">"# + )?; + + writeln!(f, r#"<p><a href="about.html">About / How-to</a></p><hr />"#)?; + + writeln!(f, "<table>")?; + writeln!(f, "<tr><th>Num</th><th>Level</th><th>Kanji</th><th>Examples</th><th>Lesson-1</th><th>Lesson-2</th><th>Ignore</th></tr>")?; + for (i, batch) in batches.iter().enumerate() { + writeln!( + f, + r#"<tr><td><a href="{:03}.html">{:03}</a></td><td>{}</td><td class="font_ja">{}</td><td> {}</td><td class="font_ja">{}</td><td class="font_ja">{}</td><td class="font_ja">{}</td></tr>"#, + i, + i, + batch.level, + batch.chars.to_string(), + batch.examples.len(), + batch.chars_p1.to_string(), + batch.chars_p2.to_string(), + batch.chars_bad.to_string() + )?; + } + writeln!(f, r#"</table>"#)?; + + writeln!(f, "<hr />")?; + + let all_chars = Charset::from_iter( + batches + .iter() + .map(|x| x.chars.chars().iter().copied()) + .flatten(), + ); + writeln!(f, "<table>")?; + writeln!( + f, + r#"<tr><th>Level</th><th>Count</th><th width="60%">Kanji</th><th>Missing kanji</th></tr>"# + )?; + for (lvl, chars) in kanji_levels.iter() { + if lvl == "N0+" || lvl.ends_with("-10") { + continue; + } + let chars = Charset::new(chars); + let missing = chars.diff(&all_chars); + writeln!( + f, + r#"<tr><td>{}</td><td>{}</td><td class="font_ja">{}</td><td><span class="font_ja">{}</span> ({})</td></tr>"#, + lvl, + chars.len(), + chars.to_string(), + missing.to_string(), + missing.len() + )?; + } + writeln!(f, "</table>")?; + + write!(f, "</div></body></html>")?; + f.flush()?; + Ok(()) +} + +pub fn format_about() -> Result<()> { + let mut f = io::BufWriter::new(fs::File::create("public/about.html")?); + write!( + f, + r#"<!DOCTYPE html> + <html> + <head> + <meta charset=\"UTF-8\" /> + <title>Datagengo README</title> + <link rel="stylesheet" type="text/css" href="style.css" /> + </head> + <body>"# + )?; + + writeln!(f, r#"<div class="about_page">"#)?; + writeln!( + f, + r#"<p><a href="index.html">Back to lessons</a></p><hr />"# + )?; + + writeln!( + f, + "{}", + markdown::to_html(&fs::read_to_string("README.md")?) + )?; + + writeln!(f, r#"</div></body></html>"#)?; + + Ok(()) +} |