From b22e04ddaf86d38635efc859fd338113ff94eb62 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Sun, 24 Sep 2023 17:45:55 +0200 Subject: rebalance skeleton (but is it correct?) --- src/main.rs | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'src') diff --git a/src/main.rs b/src/main.rs index a283891..0459bf0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,6 +25,10 @@ enum Cmd { count: usize, }, Simplify, + Rebalance { + #[structopt(default_value = "5")] + start: usize, + }, Format, } @@ -89,6 +93,23 @@ fn main() { ) .expect("save"); } + Cmd::Rebalance { start } => { + let mut batches: Vec = fs::read("data/batches.json") + .map_err(anyhow::Error::from) + .and_then(|x| Ok(serde_json::from_slice(&x)?)) + .unwrap_or_default(); + let kanji_levels = read_kanji_levels().expect("read_kanji_levels"); + for (level, _) in kanji_levels.iter() { + rebalance_level(level, &mut batches[start..]); + } + fs::write( + "data/batches.json", + serde_json::to_string_pretty(&batches) + .expect("serialize") + .as_bytes(), + ) + .expect("save"); + } Cmd::Format => { let jmdict = fs::read_to_string("data/JMdict_e.xml").expect("read_jmdict"); let jmdict = roxmltree::Document::parse_with_options( @@ -472,6 +493,22 @@ fn simplify_batch(batch: &mut Batch) { } } +fn rebalance_level(level: &str, batches: &mut [Batch]) { + let mut i_batch = vec![]; + let mut n_ex = 0; + for (i, b) in batches.iter().enumerate() { + if b.level == level { + i_batch.push(i); + n_ex += b.examples.len(); + } + } + if i_batch.len() < 2 { + return; + } + println!("Level {}: {} batches, {} examples, avg {:.2}", level, i_batch.len(), n_ex, n_ex as f32 / i_batch.len() as f32); + todo!() +} + // ===================================================================== // FORMATTING TO HTML // ===================================================================== -- cgit v1.2.3