diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/main.rs | 20 |
1 files changed, 18 insertions, 2 deletions
diff --git a/src/main.rs b/src/main.rs index 4addd02..b69a2da 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1003,8 +1003,24 @@ fn add_examples(all_batches: &mut [Batch], examples: &[Example]) { .map(|(_, ex)| ex) .collect::<Vec<_>>(); candidates.shuffle(&mut thread_rng()); - candidates.truncate(20); - batch.extra_examples = candidates; + + batch.extra_examples.clear(); + let mut in_batch = Charset::from_iter(batch.examples.iter().map(|x| x.chars.chars().iter().copied()).flatten()); + let mut in_extra = Charset::default(); + while batch.extra_examples.len() < 20 { + let best = candidates.iter().enumerate() + .max_by_key(|(_, ex)| (ex.chars.diff(&in_batch).len(), ex.chars.diff(&in_extra).len())); + if let Some((i, ex)) = best { + batch.extra_examples.push(ex.clone()); + in_batch = in_batch.union(&ex.chars); + in_extra = in_extra.union(&ex.chars); + candidates.remove(i); + } else { + break; + } + } + + batch.extra_examples.shuffle(&mut thread_rng()); println!("---- BATCH #{:03} ----", i); for ex in batch.extra_examples.iter() { |