diff options
author | Alex Auvolat <alex@adnab.me> | 2023-09-26 10:37:06 +0200 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2023-09-26 10:37:06 +0200 |
commit | d0ff72e3e49bae91699756ec4304f3aaf83ee0a6 (patch) | |
tree | bf9bd65d40bbc89d2e9959a8caf7ea43015cd6d1 /src | |
parent | 8bc57d9a984ce48c4e5525dc114f2ed788703636 (diff) | |
download | datagengo-d0ff72e3e49bae91699756ec4304f3aaf83ee0a6.tar.gz datagengo-d0ff72e3e49bae91699756ec4304f3aaf83ee0a6.zip |
remove old algorithm that was commented
Diffstat (limited to 'src')
-rw-r--r-- | src/main.rs | 150 |
1 files changed, 0 insertions, 150 deletions
diff --git a/src/main.rs b/src/main.rs index 26e3980..1022b10 100644 --- a/src/main.rs +++ b/src/main.rs @@ -829,156 +829,6 @@ fn level_examples<'a>( examples } -/* - -let (mut target_i, target_level, mut target_chars) = kanji_levels - .iter() - .enumerate() - .map(|(i, (l, c))| (i, l, c.diff(&prev_chars))) - .find(|(_, _, c)| !c.is_empty()) - .ok_or(anyhow!("no more batches to make!"))?; - -let chars_p1 = previous - .iter() - .rev() - .next() - .map(|b| b.chars.clone()) - .unwrap_or(Charset::default()); - -let chars_p2 = previous - .iter() - .rev() - .skip(1) - .next() - .map(|b| b.chars.clone()) - .unwrap_or(Charset::default()); - -let mut chars_late = Charset::default(); - -let mut chars_bad = Charset::from_iter( - kanji_levels - .iter() - .skip(target_i + 1) - .map(|(_, c)| c.chars().iter().copied()) - .flatten(), -); -let mut chars_bad_avoid = Charset::from_iter( - kanji_levels - .iter() - .skip(target_i + 1) - .filter(|(l, _)| !l.ends_with("-9") && !l.ends_with("-10")) - .map(|(_, c)| c.chars().iter().copied()) - .flatten(), -); - -let mut batch = Batch { - level: target_level.to_string(), - chars: Charset::default(), - chars_p1: Charset::default(), - chars_p2: Charset::default(), - chars_bad: Charset::default(), - examples: Vec::new(), -}; -let mut batch_chars = Charset::default(); - -eprintln!("----"); -eprintln!("Level : {}", batch.level); -eprintln!("Target : {}", target_chars.to_string()); -eprintln!("Prev1 : {}", chars_p1.to_string()); -eprintln!("Prev2 : {}", chars_p2.to_string()); -eprintln!("Bad : {} characters", chars_bad.len()); - -let batch_len = 20; -let mut stalled = false; -while batch.chars.len() < batch_len && !target_chars.is_empty() { - let need = batch_len - batch.chars.len(); - let should_add = need > target_chars.len() && target_chars.len() <= 3; - if target_i + 1 < kanji_levels.len() && (should_add || stalled) { - // upgrade to next level - target_i += 1; - chars_late = chars_late.union(&target_chars); - target_chars = target_chars.union(&kanji_levels[target_i].1.diff(&prev_chars)); - chars_bad = chars_bad.diff(&target_chars); - chars_bad_avoid = chars_bad_avoid.diff(&target_chars); - if batch.examples.is_empty() { - batch.level = kanji_levels[target_i].0.to_string(); - } else { - batch.level = format!("{} + {}", batch.level, kanji_levels[target_i].0); - } - eprintln!("Level : {}", batch.level); - eprintln!("Target: {}", target_chars.to_string()); - eprintln!("Late : {}", chars_late.to_string()); - eprintln!("Bad : {} characters", chars_bad.len()); - stalled = false; - } - /* this one works well enough - let cost = |ex: &Example, ex_tgt_inter: usize| { - 20i32 * ex_tgt_inter as i32 - + 30i32 * ex.chars.inter_len(&chars_late) as i32 - + 6i32 * ex.chars.inter_len(&batch.chars) as i32 - + 4i32 * ex.chars.inter_len(&chars_p1) as i32 - + 3i32 * ex.chars.inter_len(&chars_p2) as i32 - - 40i32 * ex.chars.inter_len(&chars_bad) as i32 - }; - */ - let cost = |ex: &Example, ex_tgt_inter: usize| { - ( - -(ex.chars.inter_len(&chars_bad_avoid) as i32), - ex_tgt_inter, - ex.chars.inter_len(&chars_late), - 2 * ex.chars.inter_len(&chars_p1) + ex.chars.inter_len(&chars_p2), - -(ex.ja.chars().count() as i32), - ) - }; - let cand_1 = examples - .par_iter() - .map(|ex| (ex, ex.chars.inter_len(&target_chars))) - .filter(|(_, ex_tgt_inter)| { - (1..=4).contains(ex_tgt_inter) && *ex_tgt_inter + batch.chars.len() <= batch_len - }) - .max_by_key(|(ex, ex_tgt_inter)| cost(ex, *ex_tgt_inter)); - let cand = cand_1.or_else(|| { - examples - .par_iter() - .map(|ex| (ex, ex.chars.inter_len(&target_chars))) - .filter(|(_, ex_tgt_inter)| *ex_tgt_inter > 0) - .max_by_key(|(ex, ex_tgt_inter)| cost(ex, *ex_tgt_inter)) - }); - if let Some((ex, _)) = cand { - eprintln!( - "* add {} (rep: {}, p1: {}, p2: {}, bad: {}) {}", - ex.chars.inter(&target_chars).to_string(), - ex.chars.inter(&batch.chars).to_string(), - ex.chars.inter(&chars_p1).to_string(), - ex.chars.inter(&chars_p2).to_string(), - ex.chars.inter(&chars_bad).to_string(), - ex.ja - ); - batch.chars = batch.chars.union(&ex.chars.inter(&target_chars)); - target_chars = target_chars.diff(&ex.chars); - chars_late = chars_late.diff(&ex.chars); - batch.examples.push(ex.clone()); - batch_chars = batch_chars.union(&ex.chars); - stalled = false; - } else { - if stalled { - eprintln!( - "could not find suitable sentence, stopping batch now (need {})", - need - ); - break; - } - stalled = true; - } -} - -batch.chars_p1 = chars_p1.inter(&batch_chars); -batch.chars_p2 = chars_p2.inter(&batch_chars); -batch.chars_bad = chars_bad.inter(&batch_chars); - -Ok(batch) - */ - fn simplify_batch(batch: &mut Batch) { let mut char_cnt = HashMap::<char, usize>::new(); for ex in batch.examples.iter() { |