use std::collections::HashSet; use std::fs; use anyhow::Result; use crate::charset::Charset; use crate::*; // ===================================================================== // FORMATTING TO HTML // ===================================================================== pub fn format_batch<'a>(dict_idx: &DictIndex<'a>, count: usize, (i, batch): (usize, &Batch)) { format_batch_aux(dict_idx, count, i, batch).expect("format batch"); } fn format_batch_aux<'a>( dict_idx: &DictIndex<'a>, count: usize, i: usize, batch: &Batch, ) -> Result<()> { let mut f = io::BufWriter::new(fs::File::create(format!("public/{:03}.html", i))?); write!( f, r#" Batch #{:03}
"#, i )?; writeln!(f, r#"

index"#)?; for j in 0..count { if j != i { writeln!(f, r#" {:03}"#, j, j)?; } else { writeln!(f, " {:03}", j)?; } } writeln!(f, r#"

"#)?; writeln!(f, "

Level: {}

", batch.level)?; write!(f, r#"

"#)?; let mut ex_prev = Charset::default(); for ex in batch.examples.iter() { let ex_chars = ex.chars.inter(&batch.chars); for c in ex_chars.diff(&ex_prev).chars().iter() { write!( f, r#"{}"#, c, c )?; } ex_prev = ex_prev.union(&ex_chars); } writeln!(f, r#"

"#)?; for ex in batch.examples.iter() { writeln!(f, "
")?; write!(f, r#"

"#)?; for c in ex.ja.chars() { if batch.chars.contains(c) { write!(f, r#"{}"#, c)?; } else if batch.chars_p1.contains(c) { write!(f, r#"{}"#, c)?; } else if batch.chars_p2.contains(c) { write!(f, r#"{}"#, c)?; } else if batch.chars_bad.contains(c) { write!(f, r#"{}"#, c)?; } else { write!(f, "{}", c)?; } } writeln!(f, "

")?; writeln!(f, r#"

{}

"#, ex.en)?; writeln!(f, r#"
Explanation"#)?; let mut expl_batch = Vec::new(); let mut expl_all = Vec::new(); for word in ex.expl.split(|c| c == ' ' || c == '~') { let (keb, reb) = expl_clean_word(word); let wchars = Charset::new(keb); if !wchars.intersects(&ex.chars) { continue; } if let Some(ents) = dict_idx.get(keb) { for ent in ents.iter() { if let Some(s) = dict_str(keb, reb, ent) { if wchars.intersects(&batch.chars) { expl_batch.push(s); } else { expl_all.push(s); } } } } } for be in expl_batch { writeln!(f, r#"

{}

"#, be)?; } writeln!(f, r#"

"#)?; for c in ex.chars.inter(&batch.chars).chars().iter() { writeln!( f, r#"{}"#, c, c )?; } writeln!(f, r#"

"#)?; for be in expl_all { writeln!(f, r#"

{}

"#, be)?; } writeln!(f, r#"
"#)?; } writeln!(f, "
")?; format_vocab( &mut f, &batch .extra_vocab .iter() .filter(|v| batch.level.contains(&v.level)) .collect::>(), "Extra vocabulary (this level)", )?; format_vocab( &mut f, &batch .extra_vocab .iter() .filter(|v| !batch.level.contains(&v.level)) .collect::>(), "Extra vocabulary (previous levels)", )?; writeln!( f, r#"
Extra examples (reading practice)"# )?; for ex in batch.extra_examples.iter() { let furi = format_ex_furigana(dict_idx, ex); // println!( // "FURIGANA: {}\n => {}", // ex.ja, // format_ex_furigana(dict_idx, ex) // ); writeln!( f, r#""#, furi.replace("[[", "") .replace("||", "") .replace("]]", ""), ex.en )?; } writeln!(f, r#"
{}
{}
"#)?; writeln!(f, "
")?; writeln!(f, "

\(≧▽≦)/

")?; write!(f, "
")?; f.flush()?; Ok(()) } fn format_vocab(f: &mut impl Write, vocab: &[&JlptVocab], t: &str) -> Result<()> { if !vocab.is_empty() { writeln!( f, r#"
{}"#, t )?; for v in vocab { writeln!( f, r#""#, v.level, v.kanji, v.en, v.kana )?; } writeln!(f, "
{}  {}  {}{}
")?; } Ok(()) } fn format_ex_furigana<'a>(dict_idx: &DictIndex<'a>, ex: &Example) -> String { use std::fmt::Write; let mut remainder = ex.ja.as_str(); let mut ret = String::new(); for word in ex.expl.split(|c| c == ' ' || c == '~') { let (keb, reb) = expl_clean_word(word); let word = word .split_once('{') .and_then(|(_, r)| r.split_once('}')) .map(|(p, _)| p) .unwrap_or(keb); if let Some(i) = remainder.find(word) { ret += &remainder[..i]; remainder = &remainder[i..]; } let mut new_word = String::new(); for c in word.chars() { if remainder.starts_with(c) { remainder = remainder.strip_prefix(c).unwrap(); new_word.push(c); } else { eprintln!("!!!! Char {} is not in remainder !!!!", c); } } let word = &new_word; if !Charset::new(word).intersects(&ex.chars) { ret += word; continue; } let reb = match reb { Some(reb) => reb, None => { let ents = dict_idx.get(keb).map(|x| &x[..]).unwrap_or_default(); let matches = ents .iter() .map(|ent| { let r_ele = ent.children().find(|x| x.has_tag_name("r_ele")).unwrap(); let reb = r_ele.children().find(|x| x.has_tag_name("reb")).unwrap(); reb.text().unwrap().trim() }) .collect::>(); if matches.len() == 1 { *matches.iter().next().unwrap() } else { println!("- word without reb: {}", word); ret += &word; continue; } } }; //println!("+ word: {}, keb: {}, reb: {}", word, keb, reb); let common_cnt = word .chars() .zip(keb.chars()) .take_while(|(x, y)| x == y) .count(); if common_cnt == 0 { // Strange cases write!(&mut ret, "[[{}||{}]]", word, reb).unwrap(); continue; } let keb_suffix = keb.chars().skip(common_cnt).collect::(); let word_suffix = word.chars().skip(common_cnt).collect::(); let reb = reb .strip_suffix(&keb_suffix) .or(reb.strip_suffix(&word_suffix)) .unwrap_or(reb); //println!(" common reb: {}, common word: {}", reb, word.chars().take(common_cnt).collect::()); let wchars = Vec::from_iter(word.chars().take(common_cnt)); let rchars = Vec::from_iter(reb.chars()); // We shall invoke Levhenstein distance let mut dynrow0 = vec![(0, 0, 0, false)]; for ri in 0..rchars.len() { dynrow0.push((0, ri, 100 + ri + 1, false)); } let mut dyntab = vec![dynrow0]; for (wi, wc) in wchars.iter().enumerate() { let mut dynrow = vec![(wi, 0, 100 + wi + 1, false)]; for (ri, rc) in rchars.iter().enumerate() { let mut x = vec![]; if dyntab[wi][ri + 1].3 { x.push((wi, ri + 1, dyntab[wi][ri + 1].2 + 1, true)); } if dynrow[ri].3 { x.push((wi + 1, ri, dynrow[ri].2 + 1, true)); } if wc == rc { x.push((wi, ri, dyntab[wi][ri].2, false)); } else { x.push((wi, ri, dyntab[wi][ri].2 + 1, true)); } dynrow.push(x.into_iter().min_by_key(|(_, _, w, _)| *w).unwrap()); } dyntab.push(dynrow); } //eprintln!("DYN TAB: {:?}", dyntab); let mut path = vec![(wchars.len(), rchars.len())]; loop { let (wi, ri) = *path.last().unwrap(); let (wi2, ri2, _, _) = dyntab[wi][ri]; path.push((wi2, ri2)); if wi2 == 0 && ri2 == 0 { break; } } path.reverse(); //eprintln!("DYN PATH: {:?}", path); let mut wbuf = String::new(); let mut rbuf = String::new(); for ((wi1, ri1), (wi2, ri2)) in path.iter().copied().zip(path.iter().copied().skip(1)) { if wi2 > wi1 && ri2 > ri1 && wchars[wi1] == rchars[ri1] { if !wbuf.is_empty() || !rbuf.is_empty() { write!(&mut ret, "[[{}||{}]]", wbuf, rbuf).unwrap(); wbuf.clear(); rbuf.clear(); } ret.push(wchars[wi1]); } else { if wi2 > wi1 { wbuf.push(wchars[wi1]); } if ri2 > ri1 { rbuf.push(rchars[ri1]); } } } if !wbuf.is_empty() || !rbuf.is_empty() { write!(&mut ret, "[[{}||{}]]", wbuf, rbuf).unwrap(); } ret += &word_suffix; } ret } fn expl_clean_word(w: &str) -> (&str, Option<&str>) { let mut ret = w; for delim in ['(', '{', '['] { if let Some((s, _)) = ret.split_once(delim) { ret = s; } } let p = w .split_once('(') .and_then(|(_, r)| r.split_once(')')) .map(|(p, _)| p); (ret, p) } fn dict_str_short<'a>( qkeb: &str, qreb: Option<&str>, ent: &roxmltree::Node<'a, 'a>, ) -> Option { let r_ele = ent.children().find(|x| x.has_tag_name("r_ele")).unwrap(); let reb = r_ele.children().find(|x| x.has_tag_name("reb")).unwrap(); let reb = reb.text().unwrap().trim(); if qreb.map(|x| x != reb).unwrap_or(false) { return None; } Some(format!( r#"{} 【{}】"#, qkeb, reb )) } fn dict_str<'a>(qkeb: &str, qreb: Option<&str>, ent: &roxmltree::Node<'a, 'a>) -> Option { let mut ret = dict_str_short(qkeb, qreb, ent)?; for sense in ent.children().filter(|x| x.has_tag_name("sense")) { if let Some(s) = sense.children().find(|x| x.has_tag_name("gloss")) { ret.extend(format!(" {};", s.text().unwrap().trim()).chars()); } } if ret.chars().rev().next() == Some(';') { ret.pop(); } Some(ret) } pub fn format_index(batches: &[Batch], kanji_levels: &[(String, String)]) -> Result<()> { let mut f = io::BufWriter::new(fs::File::create("public/index.html")?); write!( f, r#" List of batches
"# )?; writeln!(f, r#"

About / How-to


"#)?; writeln!(f, "")?; writeln!(f, "")?; for (i, batch) in batches.iter().enumerate() { writeln!( f, r#""#, i, i, batch.level, batch.chars.to_string(), batch.examples.len(), batch.chars_p1.to_string(), batch.chars_p2.to_string(), batch.chars_bad.to_string() )?; } writeln!(f, r#"
NumLevelKanjiExamplesLesson-1Lesson-2Ignore
{:03}{}{}  {}{}{}{}
"#)?; writeln!(f, "
")?; let all_chars = Charset::from_iter( batches .iter() .map(|x| x.chars.chars().iter().copied()) .flatten(), ); writeln!(f, "")?; writeln!( f, r#""# )?; for (lvl, chars) in kanji_levels.iter() { if lvl == "N0+" || lvl.ends_with("-10") { continue; } let chars = Charset::new(chars); let missing = chars.diff(&all_chars); writeln!( f, r#""#, lvl, chars.len(), chars.to_string(), missing.to_string(), missing.len() )?; } writeln!(f, "
LevelCountKanjiMissing kanji
{}{}{}{} ({})
")?; write!(f, "
")?; f.flush()?; Ok(()) } pub fn format_about() -> Result<()> { let mut f = io::BufWriter::new(fs::File::create("public/about.html")?); write!( f, r#" Datagengo README "# )?; writeln!(f, r#"
"#)?; writeln!( f, r#"

Back to lessons


"# )?; writeln!( f, "{}", markdown::to_html(&fs::read_to_string("README.md")?) )?; writeln!(f, r#"
"#)?; Ok(()) }