From 5aae4a8185d1417028a4b22d43fbac851d51a843 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 6 Oct 2023 15:32:59 +0200 Subject: add extra examples --- src/main.rs | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'src') diff --git a/src/main.rs b/src/main.rs index ce352d4..8668a82 100644 --- a/src/main.rs +++ b/src/main.rs @@ -31,6 +31,7 @@ enum Cmd { Simplify, Cleanup, AddVocab, + AddExamples, Format, } @@ -133,6 +134,34 @@ fn main() { ) .expect("save"); } + Cmd::AddExamples => { + let kanji_levels = read_kanji_levels().expect("read_kanji_levels"); + let all_kanji = Charset::new( + kanji_levels + .iter() + .map(|(_, x)| x.to_string()) + .collect::>() + .join(""), + ); + + let mut ex = read_examples(&all_kanji).expect("read_examples"); + ex.retain(|e| (5..=25).contains(&e.ja.chars().count())); + + let mut batches: Vec = fs::read("data/batches.json") + .map_err(anyhow::Error::from) + .and_then(|x| Ok(serde_json::from_slice(&x)?)) + .unwrap_or_default(); + + add_examples(&mut batches, &ex); + + fs::write( + "data/batches.json", + serde_json::to_string_pretty(&batches) + .expect("serialize") + .as_bytes(), + ) + .expect("save"); + } Cmd::Format => { let jmdict = fs::read_to_string("data/JMdict_e.xml").expect("read_jmdict"); let jmdict = roxmltree::Document::parse_with_options( @@ -495,6 +524,8 @@ struct Batch { examples: Vec, #[serde(default)] extra_vocab: Vec, + #[serde(default)] + extra_examples: Vec, } fn gen_batches( @@ -953,6 +984,32 @@ fn add_vocab(all_batches: &mut [Batch], vocab: &[JlptVocab]) { } } +fn add_examples(all_batches: &mut [Batch], examples: &[Example]) { + let mut chars = Charset::default(); + for (i, batch) in all_batches.iter_mut().enumerate() { + chars = chars.union(&batch.chars); + + let candidates = examples.iter() + .filter(|x| x.chars.inter_len(&batch.chars) > 0) + .filter(|x| x.chars.diff(&chars).len() == 0) + .filter(|x| batch.examples.iter().all(|y| y.ja != x.ja)); + let mut cand_by_chars = HashMap::new(); + for c in candidates { + cand_by_chars.insert(c.chars.to_string(), c.clone()); + } + let mut candidates = cand_by_chars.into_iter().map(|(_, ex)| ex).collect::>(); + candidates.shuffle(&mut thread_rng()); + candidates.truncate(20); + batch.extra_examples = candidates; + + + println!("---- BATCH #{:03} ----", i); + for ex in batch.extra_examples.iter() { + println!("{} - {}", ex.ja, ex.en); + } + } +} + // ===================================================================== // FORMATTING TO HTML // ===================================================================== @@ -1075,6 +1132,16 @@ fn format_batch_aux<'a>( &batch.extra_vocab.iter().filter(|v| !batch.level.contains(&v.level)).collect::>(), "Extra vocabulary (previous levels)")?; + writeln!(f, r#"
Extra examples (reading practice)"#)?; + for ex in batch.extra_examples.iter() { + writeln!( + f, + r#""#, + ex.ja, ex.en + )?; + } + writeln!(f, r#"
  {}  
{}
"#)?; + writeln!(f, "
")?; writeln!(f, "

\(≧▽≦)/

")?; -- cgit v1.2.3