From 903cc6a3711d7b501371ee3ef55ae0f50d6cd63d Mon Sep 17 00:00:00 2001
From: Alex Auvolat
Date: Fri, 21 Jul 2023 21:50:02 +0200
Subject: Add dictionnary entries
---
html/style.css | 12 +++----
src/main.rs | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 97 insertions(+), 14 deletions(-)
diff --git a/html/style.css b/html/style.css
index e312f06..7fdbc5e 100644
--- a/html/style.css
+++ b/html/style.css
@@ -12,7 +12,7 @@ td {
.ja {
text-align: center;
- font-size: 2em;
+ font-size: 2rem;
}
.ja:hover .char_cur {
@@ -33,13 +33,9 @@ td {
.en {
text-align: center;
- font-size: 1.2em;
+ font-size: 1.2rem;
}
-.en .expl {
- color: transparent;
-}
-
-.en:hover .expl {
- color: black;
+details .chars {
+ font-size: 3rem;
}
diff --git a/src/main.rs b/src/main.rs
index 6f5ca4e..1572e2e 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -61,13 +61,24 @@ fn main() {
fs::write("data/batches.json", serde_json::to_string_pretty(&batches).expect("serialize").as_bytes()).expect("save");
}
Cmd::Format => {
+ let jmdict = fs::read_to_string("data/JMdict_e.xml")
+ .expect("read_jmdict");
+ let jmdict = roxmltree::Document::parse_with_options(
+ &jmdict,
+ roxmltree::ParsingOptions {
+ allow_dtd: true,
+ ..Default::default()
+ })
+ .expect("parse_jmdict");
+ let jmdict_idx = index_jmdict(&jmdict);
+
let batches = fs::read("data/batches.json")
.map_err(anyhow::Error::from)
.and_then(|x| Ok(serde_json::from_slice::>(&x)?))
.expect("read/parse");
- batches.par_iter()
+ batches.iter()
.enumerate()
- .for_each(|x| format_batch(batches.len(), x));
+ .for_each(|x| format_batch(&jmdict_idx, batches.len(), x));
let kanji_levels = read_kanji_levels().expect("read_kanji_levels");
format_index(&batches, &kanji_levels).expect("format_index");
@@ -75,6 +86,23 @@ fn main() {
}
}
+type DictIndex<'a> = HashMap<&'a str, Vec>>;
+fn index_jmdict<'a>(dict: &'a roxmltree::Document) -> DictIndex<'a> {
+ let dict = dict.root().children().find(|x| x.has_tag_name("JMdict")).unwrap();
+
+ let mut ret: DictIndex<'a> = HashMap::new();
+ for x in dict.children().filter(|x| x.has_tag_name("entry")) {
+ for r in x.children().filter(|x| x.has_tag_name("k_ele")) {
+ if let Some(keb) = r.children().find(|x| x.has_tag_name("keb")) {
+ let txt = keb.text().unwrap().trim();
+ ret.entry(txt).or_default().push(x);
+ }
+ }
+ }
+
+ ret
+}
+
fn parse_kanjidic() -> Result> {
let file = fs::read_to_string("data/kanjidic2.xml")?;
let xml = roxmltree::Document::parse(&file)?;
@@ -279,11 +307,11 @@ fn gen_batch(previous: &[Batch], kanji_levels: &[(String, Charset)], examples: &
Ok(batch)
}
-fn format_batch(count: usize, (i, batch): (usize, &Batch)) {
- format_batch_aux(count, i, batch).expect("format batch");
+fn format_batch<'a>(dict_idx: &DictIndex<'a>, count: usize, (i, batch): (usize, &Batch)) {
+ format_batch_aux(dict_idx, count, i, batch).expect("format batch");
}
-fn format_batch_aux(count: usize, i: usize, batch: &Batch) -> Result<()> {
+fn format_batch_aux<'a>(dict_idx: &DictIndex<'a>, count: usize, i: usize, batch: &Batch) -> Result<()> {
let mut f = io::BufWriter::new(fs::File::create(format!("html/{:03}.html", i))?);
write!(f, r#"
@@ -323,7 +351,38 @@ fn format_batch_aux(count: usize, i: usize, batch: &Batch) -> Result<()> {
}
}
writeln!(f, "
")?;
- writeln!(f, r#"{}
{}
"#, ex.expl, ex.en)?;
+ writeln!(f, r#"{}
"#, ex.en)?;
+
+ writeln!(f, r#"Explanation
"#)?;
+ let mut expl_batch = Vec::new();
+ let mut expl_all = Vec::new();
+ for w in ex.expl.split(|c| c == ' ' || c == '~') {
+ let w = expl_clean_word(w);
+ let wchars = Charset::new(w);
+ if !wchars.intersects(&ex.chars) {
+ continue;
+ }
+ println!("{}", w);
+ if let Some(ents) = dict_idx.get(w) {
+ for ent in ents.iter() {
+ let s = dict_str(w, ent);
+ println!("{}: {}", w, s);
+ if wchars.intersects(&batch.chars) {
+ expl_batch.push(s);
+ } else {
+ expl_all.push(s);
+ }
+ }
+ }
+ }
+ for be in expl_batch {
+ writeln!(f, r#"{}
"#, be)?;
+ }
+ writeln!(f, r#"{}
"#, ex.chars.inter(&batch.chars).to_string())?;
+ for be in expl_all {
+ writeln!(f, r#"{}
"#, be)?;
+ }
+ writeln!(f, r#" "#)?;
}
write!(f, "