From b78034ad5bf65f1dfe390861f72bed827e2ab1b8 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 28 Nov 2023 16:22:16 +0100 Subject: add furigana to main examples and persist furigana in batches.json --- src/main.rs | 120 +++++++++++++++++++++++++++++++----------------------------- 1 file changed, 62 insertions(+), 58 deletions(-) (limited to 'src/main.rs') diff --git a/src/main.rs b/src/main.rs index b8996e8..85b278a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,6 +10,7 @@ use structopt::StructOpt; mod charset; mod datafiles; +mod example; mod format; use charset::Charset; use datafiles::*; @@ -36,6 +37,7 @@ enum Cmd { Cleanup, AddVocab, AddExamples, + AddFurigana, Format, } @@ -70,73 +72,46 @@ fn main() { .collect::>(); let mut ex = read_examples(&all_kanji).expect("read_examples"); ex.retain(|e| (5..=25).contains(&e.ja.chars().count())); - let mut batches: Vec = fs::read("data/batches.json") - .map_err(anyhow::Error::from) - .and_then(|x| Ok(serde_json::from_slice(&x)?)) - .unwrap_or_default(); + + let mut batches = read_batches().unwrap_or_default(); + if let Some(t) = truncate { batches.truncate(t); } println!("---- starting after {} batches ----", batches.len()); let target_len = batches.len() + count; gen_batches(&mut batches, target_len, &kanji_levels, &ex); - fs::write( - "data/batches.json", - serde_json::to_string_pretty(&batches) - .expect("serialize") - .as_bytes(), - ) - .expect("save"); + + save_batches(batches).expect("save_batches"); } Cmd::Simplify => { - let mut batches: Vec = fs::read("data/batches.json") - .map_err(anyhow::Error::from) - .and_then(|x| Ok(serde_json::from_slice(&x)?)) - .expect("failed to decode batches.json"); + let mut batches = read_batches().expect("read_batches"); + for batch in batches.iter_mut() { simplify_batch(batch); } - fs::write( - "data/batches.json", - serde_json::to_string_pretty(&batches) - .expect("serialize") - .as_bytes(), - ) - .expect("save"); + + save_batches(batches).expect("save_batches"); } Cmd::Cleanup => { - let mut batches: Vec = fs::read("data/batches.json") - .map_err(anyhow::Error::from) - .and_then(|x| Ok(serde_json::from_slice(&x)?)) - .expect("failed to decode batches.json"); + let mut batches = read_batches().expect("read_batches"); + let kanji_levels = read_kanji_levels().expect("read_kanji_levels"); let kanji_levels = kanji_levels .into_iter() .map(|(l, x)| (l, Charset::new(x))) .collect::>(); cleanup_batches(&mut batches, &kanji_levels); - fs::write( - "data/batches.json", - serde_json::to_string_pretty(&batches) - .expect("serialize") - .as_bytes(), - ) - .expect("save"); + + save_batches(batches).expect("save_batches"); } Cmd::AddVocab => { - let mut batches: Vec = fs::read("data/batches.json") - .map_err(anyhow::Error::from) - .and_then(|x| Ok(serde_json::from_slice(&x)?)) - .expect("failed to decode batches.json"); + let mut batches = read_batches().expect("read_batches"); + let jlpt_vocab = load_jlpt_vocab().expect("load_jlpt_vocab"); add_vocab(&mut batches, &jlpt_vocab); - fs::write( - "data/batches.json", - serde_json::to_string_pretty(&batches) - .expect("serialize") - .as_bytes(), - ) - .expect("save"); + + save_batches(batches).expect("save_batches"); } Cmd::AddExamples => { let kanji_levels = read_kanji_levels().expect("read_kanji_levels"); @@ -151,20 +126,37 @@ fn main() { let mut ex = read_examples(&all_kanji).expect("read_examples"); ex.retain(|e| (5..=25).contains(&e.ja.chars().count())); - let mut batches: Vec = fs::read("data/batches.json") - .map_err(anyhow::Error::from) - .and_then(|x| Ok(serde_json::from_slice(&x)?)) - .expect("failed to decode batches.json"); + let mut batches = read_batches().expect("read_batches"); add_extra_examples(&mut batches, &ex); - fs::write( - "data/batches.json", - serde_json::to_string_pretty(&batches) - .expect("serialize") - .as_bytes(), + save_batches(batches).expect("save_batches"); + } + Cmd::AddFurigana => { + let mut batches = read_batches().expect("read_batches"); + + let jmdict = fs::read_to_string("data/JMdict_e.xml").expect("read_jmdict"); + let jmdict = roxmltree::Document::parse_with_options( + &jmdict, + roxmltree::ParsingOptions { + allow_dtd: true, + ..Default::default() + }, ) - .expect("save"); + .expect("parse_jmdict"); + let jmdict_idx = index_jmdict(&jmdict); + + for batch in batches.iter_mut() { + for ex in batch + .examples + .iter_mut() + .chain(batch.extra_examples.iter_mut()) + { + ex.gen_furigana(&jmdict_idx); + } + } + + save_batches(batches).expect("save_batches"); } Cmd::Format => { let jmdict = fs::read_to_string("data/JMdict_e.xml").expect("read_jmdict"); @@ -178,10 +170,7 @@ fn main() { .expect("parse_jmdict"); let jmdict_idx = index_jmdict(&jmdict); - let batches = fs::read("data/batches.json") - .map_err(anyhow::Error::from) - .and_then(|x| Ok(serde_json::from_slice::>(&x)?)) - .expect("read/parse"); + let batches = read_batches().expect("read/parse"); fs::create_dir_all("public").expect("mkdir public"); fs::copy("static/style.css", "public/style.css").expect("copy style.css"); @@ -200,6 +189,21 @@ fn main() { } } +// ---- + +fn read_batches() -> anyhow::Result> { + let json = fs::read("data/batches.json")?; + Ok(serde_json::from_slice::>(&json)?) +} + +fn save_batches(batches: Vec) -> anyhow::Result<()> { + fs::write( + "data/batches.json", + serde_json::to_string_pretty(&batches)?.as_bytes(), + )?; + Ok(()) +} + // ===================================================================== // BATCH STRUCTURES AND GENERATION // ===================================================================== -- cgit v1.2.3