aboutsummaryrefslogtreecommitdiff
path: root/src/main.rs
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-09-24 17:45:55 +0200
committerAlex Auvolat <alex@adnab.me>2023-09-24 17:45:55 +0200
commitb22e04ddaf86d38635efc859fd338113ff94eb62 (patch)
tree2832b4ca3d7fde0e0cbcf6a76c6b37fb1b0a1063 /src/main.rs
parenta9de8d71a0fecbd483cbdc084ba109cb96250aaa (diff)
downloaddatagengo-b22e04ddaf86d38635efc859fd338113ff94eb62.tar.gz
datagengo-b22e04ddaf86d38635efc859fd338113ff94eb62.zip
rebalance skeleton (but is it correct?)
Diffstat (limited to 'src/main.rs')
-rw-r--r--src/main.rs37
1 files changed, 37 insertions, 0 deletions
diff --git a/src/main.rs b/src/main.rs
index a283891..0459bf0 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -25,6 +25,10 @@ enum Cmd {
count: usize,
},
Simplify,
+ Rebalance {
+ #[structopt(default_value = "5")]
+ start: usize,
+ },
Format,
}
@@ -89,6 +93,23 @@ fn main() {
)
.expect("save");
}
+ Cmd::Rebalance { start } => {
+ let mut batches: Vec<Batch> = fs::read("data/batches.json")
+ .map_err(anyhow::Error::from)
+ .and_then(|x| Ok(serde_json::from_slice(&x)?))
+ .unwrap_or_default();
+ let kanji_levels = read_kanji_levels().expect("read_kanji_levels");
+ for (level, _) in kanji_levels.iter() {
+ rebalance_level(level, &mut batches[start..]);
+ }
+ fs::write(
+ "data/batches.json",
+ serde_json::to_string_pretty(&batches)
+ .expect("serialize")
+ .as_bytes(),
+ )
+ .expect("save");
+ }
Cmd::Format => {
let jmdict = fs::read_to_string("data/JMdict_e.xml").expect("read_jmdict");
let jmdict = roxmltree::Document::parse_with_options(
@@ -472,6 +493,22 @@ fn simplify_batch(batch: &mut Batch) {
}
}
+fn rebalance_level(level: &str, batches: &mut [Batch]) {
+ let mut i_batch = vec![];
+ let mut n_ex = 0;
+ for (i, b) in batches.iter().enumerate() {
+ if b.level == level {
+ i_batch.push(i);
+ n_ex += b.examples.len();
+ }
+ }
+ if i_batch.len() < 2 {
+ return;
+ }
+ println!("Level {}: {} batches, {} examples, avg {:.2}", level, i_batch.len(), n_ex, n_ex as f32 / i_batch.len() as f32);
+ todo!()
+}
+
// =====================================================================
// FORMATTING TO HTML
// =====================================================================