diff options
author | Alex Auvolat <alex@adnab.me> | 2023-09-24 18:27:18 +0200 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2023-09-24 18:27:18 +0200 |
commit | e0f80b10d7874a8317ea0ae4a621fc3556eac491 (patch) | |
tree | 714a4141cc1b728339f5dd028eb90c5a4171a320 /src | |
parent | b22e04ddaf86d38635efc859fd338113ff94eb62 (diff) | |
download | datagengo-e0f80b10d7874a8317ea0ae4a621fc3556eac491.tar.gz datagengo-e0f80b10d7874a8317ea0ae4a621fc3556eac491.zip |
merge elementary school kanji levels
Diffstat (limited to 'src')
-rw-r--r-- | src/main.rs | 25 |
1 files changed, 22 insertions, 3 deletions
diff --git a/src/main.rs b/src/main.rs index 0459bf0..126f4de 100644 --- a/src/main.rs +++ b/src/main.rs @@ -23,6 +23,8 @@ enum Cmd { New { #[structopt(default_value = "10")] count: usize, + #[structopt(long = "truncate")] + truncate: Option<usize>, }, Simplify, Rebalance { @@ -42,7 +44,7 @@ fn main() { println!("{}: {}", level, chars.to_string()); } } - Cmd::New { count } => { + Cmd::New { truncate, count } => { let kanji_levels = read_kanji_levels().expect("read_kanji_levels"); let all_kanji = Charset::new( kanji_levels @@ -61,6 +63,9 @@ fn main() { .map_err(anyhow::Error::from) .and_then(|x| Ok(serde_json::from_slice(&x)?)) .unwrap_or_default(); + if let Some(t) = truncate { + batches.truncate(t); + } println!("---- starting after {} batches ----", batches.len()); for _ in 0..count { let batch = gen_batch(&batches, &kanji_levels, &ex).expect("gen_batch"); @@ -196,6 +201,10 @@ fn parse_kanjidic() -> Result<Vec<(String, Charset)>> { } } } + match grade { + Some(i) if i <= 6 => grade = Some(7), + _ => (), + } if let Some(lit) = literal { levels .entry((jlpt, grade)) @@ -216,8 +225,12 @@ fn parse_kanjidic() -> Result<Vec<(String, Charset)>> { let mut pc = Charset::default(); for ((j, g), chars) in levels.into_iter() { let name = match (j, g) { + (Some(j), Some(7)) => format!("N{}a", j), + (Some(j), Some(8)) => format!("N{}b", j), (Some(j), Some(g)) => format!("N{}-{}", j, g), (Some(j), None) => format!("N{}+", j), + (None, Some(7)) => format!("N0a"), + (None, Some(8)) => format!("N0b"), (None, Some(g)) => format!("N0-{}", g), (None, None) => format!("N0+"), }; @@ -505,8 +518,14 @@ fn rebalance_level(level: &str, batches: &mut [Batch]) { if i_batch.len() < 2 { return; } - println!("Level {}: {} batches, {} examples, avg {:.2}", level, i_batch.len(), n_ex, n_ex as f32 / i_batch.len() as f32); - todo!() + println!( + "Level {}: {} batches, {} examples, avg {:.2}", + level, + i_batch.len(), + n_ex, + n_ex as f32 / i_batch.len() as f32 + ); + //todo!() } // ===================================================================== |