aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-10-18 21:59:16 +0200
committerAlex Auvolat <alex@adnab.me>2023-10-18 21:59:16 +0200
commit2a919c1f028d891744d37ad45664986b6cba4a5d (patch)
treecd231964696421f56b843bde109a193107c0382b /src
parentfab4731ad5a4ca26beb1a342ba85eec92014c04b (diff)
downloaddatagengo-2a919c1f028d891744d37ad45664986b6cba4a5d.tar.gz
datagengo-2a919c1f028d891744d37ad45664986b6cba4a5d.zip
re-allow more diversity in examples
Diffstat (limited to 'src')
-rw-r--r--src/main.rs4
1 files changed, 3 insertions, 1 deletions
diff --git a/src/main.rs b/src/main.rs
index 04a9e80..598f147 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1025,6 +1025,7 @@ fn add_extra_examples(all_batches: &mut [Batch], examples: &[Example]) {
.map(|x| x.chars.chars().iter().copied())
.flatten(),
);
+ let mut in_batch_extra = Charset::default();
while batch.extra_examples.len() < 40 {
let best = candidates
.iter()
@@ -1051,10 +1052,11 @@ fn add_extra_examples(all_batches: &mut [Batch], examples: &[Example]) {
})
.max_by_key(|(_, _, w1, w2, w3)| (-(*w1 as i64), *w2, -(*w3 * 100_000f32) as i64));
if let Some((i, ex, w1, w2, w3)) = best {
- if w2 > 0 || batch.extra_examples.len() < 20 {
+ if ex.chars.diff(&in_batch_extra).len() > 0 || batch.extra_examples.len() < 20 {
println!("{}\t{}\t{:.2}\t{} - {}", w1, w2, w3, ex.ja, ex.en);
batch.extra_examples.push(ex.clone());
in_batch = in_batch.union(&ex.chars);
+ in_batch_extra = in_batch_extra.union(&ex.chars);
for c in ex.chars.chars().iter() {
*char_seen_count.entry(*c).or_default() += 1;
if batch.chars.chars().contains(c) {