aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-10-06 15:32:59 +0200
committerAlex Auvolat <alex@adnab.me>2023-10-06 15:32:59 +0200
commit5aae4a8185d1417028a4b22d43fbac851d51a843 (patch)
treea70b8fcea5e5aedfb72a9689bb2864ecbf74de0c /src
parentd602f00607aa23cf49485637fc88f0484746a80d (diff)
downloaddatagengo-5aae4a8185d1417028a4b22d43fbac851d51a843.tar.gz
datagengo-5aae4a8185d1417028a4b22d43fbac851d51a843.zip
add extra examples
Diffstat (limited to 'src')
-rw-r--r--src/main.rs67
1 files changed, 67 insertions, 0 deletions
diff --git a/src/main.rs b/src/main.rs
index ce352d4..8668a82 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -31,6 +31,7 @@ enum Cmd {
Simplify,
Cleanup,
AddVocab,
+ AddExamples,
Format,
}
@@ -133,6 +134,34 @@ fn main() {
)
.expect("save");
}
+ Cmd::AddExamples => {
+ let kanji_levels = read_kanji_levels().expect("read_kanji_levels");
+ let all_kanji = Charset::new(
+ kanji_levels
+ .iter()
+ .map(|(_, x)| x.to_string())
+ .collect::<Vec<_>>()
+ .join(""),
+ );
+
+ let mut ex = read_examples(&all_kanji).expect("read_examples");
+ ex.retain(|e| (5..=25).contains(&e.ja.chars().count()));
+
+ let mut batches: Vec<Batch> = fs::read("data/batches.json")
+ .map_err(anyhow::Error::from)
+ .and_then(|x| Ok(serde_json::from_slice(&x)?))
+ .unwrap_or_default();
+
+ add_examples(&mut batches, &ex);
+
+ fs::write(
+ "data/batches.json",
+ serde_json::to_string_pretty(&batches)
+ .expect("serialize")
+ .as_bytes(),
+ )
+ .expect("save");
+ }
Cmd::Format => {
let jmdict = fs::read_to_string("data/JMdict_e.xml").expect("read_jmdict");
let jmdict = roxmltree::Document::parse_with_options(
@@ -495,6 +524,8 @@ struct Batch {
examples: Vec<Example>,
#[serde(default)]
extra_vocab: Vec<JlptVocab>,
+ #[serde(default)]
+ extra_examples: Vec<Example>,
}
fn gen_batches(
@@ -953,6 +984,32 @@ fn add_vocab(all_batches: &mut [Batch], vocab: &[JlptVocab]) {
}
}
+fn add_examples(all_batches: &mut [Batch], examples: &[Example]) {
+ let mut chars = Charset::default();
+ for (i, batch) in all_batches.iter_mut().enumerate() {
+ chars = chars.union(&batch.chars);
+
+ let candidates = examples.iter()
+ .filter(|x| x.chars.inter_len(&batch.chars) > 0)
+ .filter(|x| x.chars.diff(&chars).len() == 0)
+ .filter(|x| batch.examples.iter().all(|y| y.ja != x.ja));
+ let mut cand_by_chars = HashMap::new();
+ for c in candidates {
+ cand_by_chars.insert(c.chars.to_string(), c.clone());
+ }
+ let mut candidates = cand_by_chars.into_iter().map(|(_, ex)| ex).collect::<Vec<_>>();
+ candidates.shuffle(&mut thread_rng());
+ candidates.truncate(20);
+ batch.extra_examples = candidates;
+
+
+ println!("---- BATCH #{:03} ----", i);
+ for ex in batch.extra_examples.iter() {
+ println!("{} - {}", ex.ja, ex.en);
+ }
+ }
+}
+
// =====================================================================
// FORMATTING TO HTML
// =====================================================================
@@ -1075,6 +1132,16 @@ fn format_batch_aux<'a>(
&batch.extra_vocab.iter().filter(|v| !batch.level.contains(&v.level)).collect::<Vec<_>>(),
"Extra vocabulary (previous levels)")?;
+ writeln!(f, r#"<details><summary>Extra examples (reading practice)</summary><table class="extratable">"#)?;
+ for ex in batch.extra_examples.iter() {
+ writeln!(
+ f,
+ r#"<tr><td style="text-align: center"><span style="font-size: 1.2em">&nbsp;&nbsp;{}&nbsp;&nbsp;</span><br />{}</td></tr>"#,
+ ex.ja, ex.en
+ )?;
+ }
+ writeln!(f, r#"</table></details>"#)?;
+
writeln!(f, "<hr />")?;
writeln!(f, "<p>\(≧▽≦)/</p>")?;