diff options
author | Alex Auvolat <alex@adnab.me> | 2023-09-24 17:45:55 +0200 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2023-09-24 17:45:55 +0200 |
commit | b22e04ddaf86d38635efc859fd338113ff94eb62 (patch) | |
tree | 2832b4ca3d7fde0e0cbcf6a76c6b37fb1b0a1063 /src/main.rs | |
parent | a9de8d71a0fecbd483cbdc084ba109cb96250aaa (diff) | |
download | datagengo-b22e04ddaf86d38635efc859fd338113ff94eb62.tar.gz datagengo-b22e04ddaf86d38635efc859fd338113ff94eb62.zip |
rebalance skeleton (but is it correct?)
Diffstat (limited to 'src/main.rs')
-rw-r--r-- | src/main.rs | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/src/main.rs b/src/main.rs index a283891..0459bf0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,6 +25,10 @@ enum Cmd { count: usize, }, Simplify, + Rebalance { + #[structopt(default_value = "5")] + start: usize, + }, Format, } @@ -89,6 +93,23 @@ fn main() { ) .expect("save"); } + Cmd::Rebalance { start } => { + let mut batches: Vec<Batch> = fs::read("data/batches.json") + .map_err(anyhow::Error::from) + .and_then(|x| Ok(serde_json::from_slice(&x)?)) + .unwrap_or_default(); + let kanji_levels = read_kanji_levels().expect("read_kanji_levels"); + for (level, _) in kanji_levels.iter() { + rebalance_level(level, &mut batches[start..]); + } + fs::write( + "data/batches.json", + serde_json::to_string_pretty(&batches) + .expect("serialize") + .as_bytes(), + ) + .expect("save"); + } Cmd::Format => { let jmdict = fs::read_to_string("data/JMdict_e.xml").expect("read_jmdict"); let jmdict = roxmltree::Document::parse_with_options( @@ -472,6 +493,22 @@ fn simplify_batch(batch: &mut Batch) { } } +fn rebalance_level(level: &str, batches: &mut [Batch]) { + let mut i_batch = vec![]; + let mut n_ex = 0; + for (i, b) in batches.iter().enumerate() { + if b.level == level { + i_batch.push(i); + n_ex += b.examples.len(); + } + } + if i_batch.len() < 2 { + return; + } + println!("Level {}: {} batches, {} examples, avg {:.2}", level, i_batch.len(), n_ex, n_ex as f32 / i_batch.len() as f32); + todo!() +} + // ===================================================================== // FORMATTING TO HTML // ===================================================================== |