From 2a919c1f028d891744d37ad45664986b6cba4a5d Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 18 Oct 2023 21:59:16 +0200 Subject: re-allow more diversity in examples --- src/main.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/main.rs b/src/main.rs index 04a9e80..598f147 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1025,6 +1025,7 @@ fn add_extra_examples(all_batches: &mut [Batch], examples: &[Example]) { .map(|x| x.chars.chars().iter().copied()) .flatten(), ); + let mut in_batch_extra = Charset::default(); while batch.extra_examples.len() < 40 { let best = candidates .iter() @@ -1051,10 +1052,11 @@ fn add_extra_examples(all_batches: &mut [Batch], examples: &[Example]) { }) .max_by_key(|(_, _, w1, w2, w3)| (-(*w1 as i64), *w2, -(*w3 * 100_000f32) as i64)); if let Some((i, ex, w1, w2, w3)) = best { - if w2 > 0 || batch.extra_examples.len() < 20 { + if ex.chars.diff(&in_batch_extra).len() > 0 || batch.extra_examples.len() < 20 { println!("{}\t{}\t{:.2}\t{} - {}", w1, w2, w3, ex.ja, ex.en); batch.extra_examples.push(ex.clone()); in_batch = in_batch.union(&ex.chars); + in_batch_extra = in_batch_extra.union(&ex.chars); for c in ex.chars.chars().iter() { *char_seen_count.entry(*c).or_default() += 1; if batch.chars.chars().contains(c) { -- cgit v1.2.3