From e0f80b10d7874a8317ea0ae4a621fc3556eac491 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Sun, 24 Sep 2023 18:27:18 +0200 Subject: merge elementary school kanji levels --- src/main.rs | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/main.rs b/src/main.rs index 0459bf0..126f4de 100644 --- a/src/main.rs +++ b/src/main.rs @@ -23,6 +23,8 @@ enum Cmd { New { #[structopt(default_value = "10")] count: usize, + #[structopt(long = "truncate")] + truncate: Option, }, Simplify, Rebalance { @@ -42,7 +44,7 @@ fn main() { println!("{}: {}", level, chars.to_string()); } } - Cmd::New { count } => { + Cmd::New { truncate, count } => { let kanji_levels = read_kanji_levels().expect("read_kanji_levels"); let all_kanji = Charset::new( kanji_levels @@ -61,6 +63,9 @@ fn main() { .map_err(anyhow::Error::from) .and_then(|x| Ok(serde_json::from_slice(&x)?)) .unwrap_or_default(); + if let Some(t) = truncate { + batches.truncate(t); + } println!("---- starting after {} batches ----", batches.len()); for _ in 0..count { let batch = gen_batch(&batches, &kanji_levels, &ex).expect("gen_batch"); @@ -196,6 +201,10 @@ fn parse_kanjidic() -> Result> { } } } + match grade { + Some(i) if i <= 6 => grade = Some(7), + _ => (), + } if let Some(lit) = literal { levels .entry((jlpt, grade)) @@ -216,8 +225,12 @@ fn parse_kanjidic() -> Result> { let mut pc = Charset::default(); for ((j, g), chars) in levels.into_iter() { let name = match (j, g) { + (Some(j), Some(7)) => format!("N{}a", j), + (Some(j), Some(8)) => format!("N{}b", j), (Some(j), Some(g)) => format!("N{}-{}", j, g), (Some(j), None) => format!("N{}+", j), + (None, Some(7)) => format!("N0a"), + (None, Some(8)) => format!("N0b"), (None, Some(g)) => format!("N0-{}", g), (None, None) => format!("N0+"), }; @@ -505,8 +518,14 @@ fn rebalance_level(level: &str, batches: &mut [Batch]) { if i_batch.len() < 2 { return; } - println!("Level {}: {} batches, {} examples, avg {:.2}", level, i_batch.len(), n_ex, n_ex as f32 / i_batch.len() as f32); - todo!() + println!( + "Level {}: {} batches, {} examples, avg {:.2}", + level, + i_batch.len(), + n_ex, + n_ex as f32 / i_batch.len() as f32 + ); + //todo!() } // ===================================================================== -- cgit v1.2.3