aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/main.rs20
1 files changed, 18 insertions, 2 deletions
diff --git a/src/main.rs b/src/main.rs
index 4addd02..b69a2da 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1003,8 +1003,24 @@ fn add_examples(all_batches: &mut [Batch], examples: &[Example]) {
.map(|(_, ex)| ex)
.collect::<Vec<_>>();
candidates.shuffle(&mut thread_rng());
- candidates.truncate(20);
- batch.extra_examples = candidates;
+
+ batch.extra_examples.clear();
+ let mut in_batch = Charset::from_iter(batch.examples.iter().map(|x| x.chars.chars().iter().copied()).flatten());
+ let mut in_extra = Charset::default();
+ while batch.extra_examples.len() < 20 {
+ let best = candidates.iter().enumerate()
+ .max_by_key(|(_, ex)| (ex.chars.diff(&in_batch).len(), ex.chars.diff(&in_extra).len()));
+ if let Some((i, ex)) = best {
+ batch.extra_examples.push(ex.clone());
+ in_batch = in_batch.union(&ex.chars);
+ in_extra = in_extra.union(&ex.chars);
+ candidates.remove(i);
+ } else {
+ break;
+ }
+ }
+
+ batch.extra_examples.shuffle(&mut thread_rng());
println!("---- BATCH #{:03} ----", i);
for ex in batch.extra_examples.iter() {