aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-09-26 10:37:06 +0200
committerAlex Auvolat <alex@adnab.me>2023-09-26 10:37:06 +0200
commitd0ff72e3e49bae91699756ec4304f3aaf83ee0a6 (patch)
treebf9bd65d40bbc89d2e9959a8caf7ea43015cd6d1
parent8bc57d9a984ce48c4e5525dc114f2ed788703636 (diff)
downloaddatagengo-d0ff72e3e49bae91699756ec4304f3aaf83ee0a6.tar.gz
datagengo-d0ff72e3e49bae91699756ec4304f3aaf83ee0a6.zip
remove old algorithm that was commented
-rw-r--r--src/main.rs150
1 files changed, 0 insertions, 150 deletions
diff --git a/src/main.rs b/src/main.rs
index 26e3980..1022b10 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -829,156 +829,6 @@ fn level_examples<'a>(
examples
}
-/*
-
-let (mut target_i, target_level, mut target_chars) = kanji_levels
- .iter()
- .enumerate()
- .map(|(i, (l, c))| (i, l, c.diff(&prev_chars)))
- .find(|(_, _, c)| !c.is_empty())
- .ok_or(anyhow!("no more batches to make!"))?;
-
-let chars_p1 = previous
- .iter()
- .rev()
- .next()
- .map(|b| b.chars.clone())
- .unwrap_or(Charset::default());
-
-let chars_p2 = previous
- .iter()
- .rev()
- .skip(1)
- .next()
- .map(|b| b.chars.clone())
- .unwrap_or(Charset::default());
-
-let mut chars_late = Charset::default();
-
-let mut chars_bad = Charset::from_iter(
- kanji_levels
- .iter()
- .skip(target_i + 1)
- .map(|(_, c)| c.chars().iter().copied())
- .flatten(),
-);
-let mut chars_bad_avoid = Charset::from_iter(
- kanji_levels
- .iter()
- .skip(target_i + 1)
- .filter(|(l, _)| !l.ends_with("-9") && !l.ends_with("-10"))
- .map(|(_, c)| c.chars().iter().copied())
- .flatten(),
-);
-
-let mut batch = Batch {
- level: target_level.to_string(),
- chars: Charset::default(),
- chars_p1: Charset::default(),
- chars_p2: Charset::default(),
- chars_bad: Charset::default(),
- examples: Vec::new(),
-};
-let mut batch_chars = Charset::default();
-
-eprintln!("----");
-eprintln!("Level : {}", batch.level);
-eprintln!("Target : {}", target_chars.to_string());
-eprintln!("Prev1 : {}", chars_p1.to_string());
-eprintln!("Prev2 : {}", chars_p2.to_string());
-eprintln!("Bad : {} characters", chars_bad.len());
-
-let batch_len = 20;
-let mut stalled = false;
-while batch.chars.len() < batch_len && !target_chars.is_empty() {
- let need = batch_len - batch.chars.len();
- let should_add = need > target_chars.len() && target_chars.len() <= 3;
- if target_i + 1 < kanji_levels.len() && (should_add || stalled) {
- // upgrade to next level
- target_i += 1;
- chars_late = chars_late.union(&target_chars);
- target_chars = target_chars.union(&kanji_levels[target_i].1.diff(&prev_chars));
- chars_bad = chars_bad.diff(&target_chars);
- chars_bad_avoid = chars_bad_avoid.diff(&target_chars);
- if batch.examples.is_empty() {
- batch.level = kanji_levels[target_i].0.to_string();
- } else {
- batch.level = format!("{} + {}", batch.level, kanji_levels[target_i].0);
- }
- eprintln!("Level : {}", batch.level);
- eprintln!("Target: {}", target_chars.to_string());
- eprintln!("Late : {}", chars_late.to_string());
- eprintln!("Bad : {} characters", chars_bad.len());
- stalled = false;
- }
- /* this one works well enough
- let cost = |ex: &Example, ex_tgt_inter: usize| {
- 20i32 * ex_tgt_inter as i32
- + 30i32 * ex.chars.inter_len(&chars_late) as i32
- + 6i32 * ex.chars.inter_len(&batch.chars) as i32
- + 4i32 * ex.chars.inter_len(&chars_p1) as i32
- + 3i32 * ex.chars.inter_len(&chars_p2) as i32
- - 40i32 * ex.chars.inter_len(&chars_bad) as i32
- };
- */
- let cost = |ex: &Example, ex_tgt_inter: usize| {
- (
- -(ex.chars.inter_len(&chars_bad_avoid) as i32),
- ex_tgt_inter,
- ex.chars.inter_len(&chars_late),
- 2 * ex.chars.inter_len(&chars_p1) + ex.chars.inter_len(&chars_p2),
- -(ex.ja.chars().count() as i32),
- )
- };
- let cand_1 = examples
- .par_iter()
- .map(|ex| (ex, ex.chars.inter_len(&target_chars)))
- .filter(|(_, ex_tgt_inter)| {
- (1..=4).contains(ex_tgt_inter) && *ex_tgt_inter + batch.chars.len() <= batch_len
- })
- .max_by_key(|(ex, ex_tgt_inter)| cost(ex, *ex_tgt_inter));
- let cand = cand_1.or_else(|| {
- examples
- .par_iter()
- .map(|ex| (ex, ex.chars.inter_len(&target_chars)))
- .filter(|(_, ex_tgt_inter)| *ex_tgt_inter > 0)
- .max_by_key(|(ex, ex_tgt_inter)| cost(ex, *ex_tgt_inter))
- });
- if let Some((ex, _)) = cand {
- eprintln!(
- "* add {} (rep: {}, p1: {}, p2: {}, bad: {}) {}",
- ex.chars.inter(&target_chars).to_string(),
- ex.chars.inter(&batch.chars).to_string(),
- ex.chars.inter(&chars_p1).to_string(),
- ex.chars.inter(&chars_p2).to_string(),
- ex.chars.inter(&chars_bad).to_string(),
- ex.ja
- );
- batch.chars = batch.chars.union(&ex.chars.inter(&target_chars));
- target_chars = target_chars.diff(&ex.chars);
- chars_late = chars_late.diff(&ex.chars);
- batch.examples.push(ex.clone());
- batch_chars = batch_chars.union(&ex.chars);
- stalled = false;
- } else {
- if stalled {
- eprintln!(
- "could not find suitable sentence, stopping batch now (need {})",
- need
- );
- break;
- }
- stalled = true;
- }
-}
-
-batch.chars_p1 = chars_p1.inter(&batch_chars);
-batch.chars_p2 = chars_p2.inter(&batch_chars);
-batch.chars_bad = chars_bad.inter(&batch_chars);
-
-Ok(batch)
- */
-
fn simplify_batch(batch: &mut Batch) {
let mut char_cnt = HashMap::<char, usize>::new();
for ex in batch.examples.iter() {