use std::collections::HashSet;
use std::fs;
use anyhow::Result;
use crate::charset::Charset;
use crate::*;
// =====================================================================
// FORMATTING TO HTML
// =====================================================================
pub fn format_batch<'a>(dict_idx: &DictIndex<'a>, count: usize, (i, batch): (usize, &Batch)) {
format_batch_aux(dict_idx, count, i, batch).expect("format batch");
}
fn format_batch_aux<'a>(
dict_idx: &DictIndex<'a>,
count: usize,
i: usize,
batch: &Batch,
) -> Result<()> {
let mut f = io::BufWriter::new(fs::File::create(format!("public/{:03}.html", i))?);
write!(
f,
r#"
"#,
i
)?;
writeln!(f, r#"
index "#)?;
for j in 0..count {
if j != i {
writeln!(f, r#" {:03} "#, j, j)?;
} else {
writeln!(f, " {:03}", j)?;
}
}
writeln!(f, r#"
"#)?;
writeln!(f, "
Level: {}
", batch.level)?;
write!(f, r#"
"#)?;
let mut ex_prev = Charset::default();
for ex in batch.examples.iter() {
let ex_chars = ex.chars.inter(&batch.chars);
for c in ex_chars.diff(&ex_prev).chars().iter() {
write!(
f,
r#"{} "#,
c, c
)?;
}
ex_prev = ex_prev.union(&ex_chars);
}
writeln!(f, r#"
"#)?;
for ex in batch.examples.iter() {
writeln!(f, "
")?;
write!(f, r#"
"#)?;
for c in ex.ja.chars() {
if batch.chars.contains(c) {
write!(f, r#"{} "#, c)?;
} else if batch.chars_p1.contains(c) {
write!(f, r#"{} "#, c)?;
} else if batch.chars_p2.contains(c) {
write!(f, r#"{} "#, c)?;
} else if batch.chars_bad.contains(c) {
write!(f, r#"{} "#, c)?;
} else {
write!(f, "{}", c)?;
}
}
writeln!(f, "
")?;
writeln!(f, r#"
{}
"#, ex.en)?;
writeln!(f, r#"
Explanation "#)?;
let mut expl_batch = Vec::new();
let mut expl_all = Vec::new();
for word in ex.expl.split(|c| c == ' ' || c == '~') {
let (keb, reb) = expl_clean_word(word);
let wchars = Charset::new(keb);
if !wchars.intersects(&ex.chars) {
continue;
}
if let Some(ents) = dict_idx.get(keb) {
for ent in ents.iter() {
if let Some(s) = dict_str(keb, reb, ent) {
if wchars.intersects(&batch.chars) {
expl_batch.push(s);
} else {
expl_all.push(s);
}
}
}
}
}
for be in expl_batch {
writeln!(f, r#"{}
"#, be)?;
}
writeln!(f, r#""#)?;
for c in ex.chars.inter(&batch.chars).chars().iter() {
writeln!(
f,
r#"{} "#,
c, c
)?;
}
writeln!(f, r#"
"#)?;
for be in expl_all {
writeln!(f, r#"{}
"#, be)?;
}
writeln!(f, r#""#)?;
}
writeln!(f, "
")?;
format_vocab(
&mut f,
&batch
.extra_vocab
.iter()
.filter(|v| batch.level.contains(&v.level))
.collect::
>(),
"Extra vocabulary (this level)",
)?;
format_vocab(
&mut f,
&batch
.extra_vocab
.iter()
.filter(|v| !batch.level.contains(&v.level))
.collect::>(),
"Extra vocabulary (previous levels)",
)?;
writeln!(
f,
r#"Extra examples (reading practice) "#)?;
writeln!(f, " ")?;
writeln!(f, "\(≧▽≦)/
")?;
write!(f, "")?;
f.flush()?;
Ok(())
}
fn format_vocab(f: &mut impl Write, vocab: &[&JlptVocab], t: &str) -> Result<()> {
if !vocab.is_empty() {
writeln!(
f,
r#"
{} "#,
t
)?;
for v in vocab {
writeln!(
f,
r#"{} {} {} {} "#,
v.level, v.kanji, v.en, v.kana
)?;
}
writeln!(f, "
")?;
}
Ok(())
}
fn format_ex_furigana<'a>(dict_idx: &DictIndex<'a>, ex: &Example) -> String {
use std::fmt::Write;
let mut remainder = ex.ja.as_str();
let mut ret = String::new();
for word in ex.expl.split(|c| c == ' ' || c == '~') {
let (keb, reb) = expl_clean_word(word);
let word = word
.split_once('{')
.and_then(|(_, r)| r.split_once('}'))
.map(|(p, _)| p)
.unwrap_or(keb);
if let Some(i) = remainder.find(word) {
ret += &remainder[..i];
remainder = &remainder[i..];
}
let mut new_word = String::new();
for c in word.chars() {
if remainder.starts_with(c) {
remainder = remainder.strip_prefix(c).unwrap();
new_word.push(c);
} else {
eprintln!("!!!! Char {} is not in remainder !!!!", c);
}
}
let word = &new_word;
if !Charset::new(word).intersects(&ex.chars) {
ret += word;
continue;
}
let reb = match reb {
Some(reb) => reb,
None => {
let ents = dict_idx.get(keb).map(|x| &x[..]).unwrap_or_default();
let matches = ents
.iter()
.map(|ent| {
let r_ele = ent.children().find(|x| x.has_tag_name("r_ele")).unwrap();
let reb = r_ele.children().find(|x| x.has_tag_name("reb")).unwrap();
reb.text().unwrap().trim()
})
.collect::
>();
if matches.len() == 1 {
*matches.iter().next().unwrap()
} else {
println!("- word without reb: {}", word);
ret += &word;
continue;
}
}
};
//println!("+ word: {}, keb: {}, reb: {}", word, keb, reb);
let common_cnt = word
.chars()
.zip(keb.chars())
.take_while(|(x, y)| x == y)
.count();
if common_cnt == 0 {
// Strange cases
write!(&mut ret, "[[{}||{}]]", word, reb).unwrap();
continue;
}
let keb_suffix = keb.chars().skip(common_cnt).collect::();
let word_suffix = word.chars().skip(common_cnt).collect::();
let reb = reb
.strip_suffix(&keb_suffix)
.or(reb.strip_suffix(&word_suffix))
.unwrap_or(reb);
//println!(" common reb: {}, common word: {}", reb, word.chars().take(common_cnt).collect::());
let wchars = Vec::from_iter(word.chars().take(common_cnt));
let rchars = Vec::from_iter(reb.chars());
// We shall invoke Levhenstein distance
let mut dynrow0 = vec![(0, 0, 0, false)];
for ri in 0..rchars.len() {
dynrow0.push((0, ri, 100 + ri + 1, false));
}
let mut dyntab = vec![dynrow0];
for (wi, wc) in wchars.iter().enumerate() {
let mut dynrow = vec![(wi, 0, 100 + wi + 1, false)];
for (ri, rc) in rchars.iter().enumerate() {
let mut x = vec![];
if dyntab[wi][ri + 1].3 {
x.push((wi, ri + 1, dyntab[wi][ri + 1].2 + 1, true));
}
if dynrow[ri].3 {
x.push((wi + 1, ri, dynrow[ri].2 + 1, true));
}
if wc == rc {
x.push((wi, ri, dyntab[wi][ri].2, false));
} else {
x.push((wi, ri, dyntab[wi][ri].2 + 1, true));
}
dynrow.push(x.into_iter().min_by_key(|(_, _, w, _)| *w).unwrap());
}
dyntab.push(dynrow);
}
//eprintln!("DYN TAB: {:?}", dyntab);
let mut path = vec![(wchars.len(), rchars.len())];
loop {
let (wi, ri) = *path.last().unwrap();
let (wi2, ri2, _, _) = dyntab[wi][ri];
path.push((wi2, ri2));
if wi2 == 0 && ri2 == 0 {
break;
}
}
path.reverse();
//eprintln!("DYN PATH: {:?}", path);
let mut wbuf = String::new();
let mut rbuf = String::new();
for ((wi1, ri1), (wi2, ri2)) in path.iter().copied().zip(path.iter().copied().skip(1)) {
if wi2 > wi1 && ri2 > ri1 && wchars[wi1] == rchars[ri1] {
if !wbuf.is_empty() || !rbuf.is_empty() {
write!(&mut ret, "[[{}||{}]]", wbuf, rbuf).unwrap();
wbuf.clear();
rbuf.clear();
}
ret.push(wchars[wi1]);
} else {
if wi2 > wi1 {
wbuf.push(wchars[wi1]);
}
if ri2 > ri1 {
rbuf.push(rchars[ri1]);
}
}
}
if !wbuf.is_empty() || !rbuf.is_empty() {
write!(&mut ret, "[[{}||{}]]", wbuf, rbuf).unwrap();
}
ret += &word_suffix;
}
ret
}
fn expl_clean_word(w: &str) -> (&str, Option<&str>) {
let mut ret = w;
for delim in ['(', '{', '['] {
if let Some((s, _)) = ret.split_once(delim) {
ret = s;
}
}
let p = w
.split_once('(')
.and_then(|(_, r)| r.split_once(')'))
.map(|(p, _)| p);
(ret, p)
}
fn dict_str_short<'a>(
qkeb: &str,
qreb: Option<&str>,
ent: &roxmltree::Node<'a, 'a>,
) -> Option {
let r_ele = ent.children().find(|x| x.has_tag_name("r_ele")).unwrap();
let reb = r_ele.children().find(|x| x.has_tag_name("reb")).unwrap();
let reb = reb.text().unwrap().trim();
if qreb.map(|x| x != reb).unwrap_or(false) {
return None;
}
Some(format!(
r#"{} 【{}】 "#,
qkeb, reb
))
}
fn dict_str<'a>(qkeb: &str, qreb: Option<&str>, ent: &roxmltree::Node<'a, 'a>) -> Option {
let mut ret = dict_str_short(qkeb, qreb, ent)?;
for sense in ent.children().filter(|x| x.has_tag_name("sense")) {
if let Some(s) = sense.children().find(|x| x.has_tag_name("gloss")) {
ret.extend(format!(" {};", s.text().unwrap().trim()).chars());
}
}
if ret.chars().rev().next() == Some(';') {
ret.pop();
}
Some(ret)
}
pub fn format_index(batches: &[Batch], kanji_levels: &[(String, String)]) -> Result<()> {
let mut f = io::BufWriter::new(fs::File::create("public/index.html")?);
write!(
f,
r#"
List of batches
"#
)?;
writeln!(f, r#"
About / How-to
"#)?;
writeln!(f, "
")?;
writeln!(f, "Num Level Kanji Examples Lesson-1 Lesson-2 Ignore ")?;
for (i, batch) in batches.iter().enumerate() {
writeln!(
f,
r#"{:03} {} {} {} {} {} {} "#,
i,
i,
batch.level,
batch.chars.to_string(),
batch.examples.len(),
batch.chars_p1.to_string(),
batch.chars_p2.to_string(),
batch.chars_bad.to_string()
)?;
}
writeln!(f, r#"
"#)?;
writeln!(f, "
")?;
let all_chars = Charset::from_iter(
batches
.iter()
.map(|x| x.chars.chars().iter().copied())
.flatten(),
);
writeln!(f, "
")?;
writeln!(
f,
r#"Level Count Kanji Missing kanji "#
)?;
for (lvl, chars) in kanji_levels.iter() {
if lvl == "N0+" || lvl.ends_with("-10") {
continue;
}
let chars = Charset::new(chars);
let missing = chars.diff(&all_chars);
writeln!(
f,
r#"{} {} {} {} ({}) "#,
lvl,
chars.len(),
chars.to_string(),
missing.to_string(),
missing.len()
)?;
}
writeln!(f, "
")?;
write!(f, "
")?;
f.flush()?;
Ok(())
}
pub fn format_about() -> Result<()> {
let mut f = io::BufWriter::new(fs::File::create("public/about.html")?);
write!(
f,
r#"
Datagengo README
"#
)?;
writeln!(f, r#""#)?;
writeln!(
f,
r#"
Back to lessons
"#
)?;
writeln!(
f,
"{}",
markdown::to_html(&fs::read_to_string("README.md")?)
)?;
writeln!(f, r#"
"#)?;
Ok(())
}