aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-07-21 22:17:50 +0200
committerAlex Auvolat <alex@adnab.me>2023-07-21 22:17:50 +0200
commita359c9da4ed354f0b0061be88a2376fb34d6348f (patch)
tree0c34200482aeb2be828e02a0474c7c84b1622711
parenta5b0ba4f4013fedf7b291b5801c0fcce22a35a60 (diff)
downloaddatagengo-a359c9da4ed354f0b0061be88a2376fb34d6348f.tar.gz
datagengo-a359c9da4ed354f0b0061be88a2376fb34d6348f.zip
filter dictionnary entries to limit to those with correct reading
-rw-r--r--src/main.rs37
1 files changed, 23 insertions, 14 deletions
diff --git a/src/main.rs b/src/main.rs
index e081454..8a74d82 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -356,19 +356,20 @@ fn format_batch_aux<'a>(dict_idx: &DictIndex<'a>, count: usize, i: usize, batch:
writeln!(f, r#"<details><summary>Explanation</summary>"#)?;
let mut expl_batch = Vec::new();
let mut expl_all = Vec::new();
- for w in ex.expl.split(|c| c == ' ' || c == '~') {
- let w = expl_clean_word(w);
- let wchars = Charset::new(w);
+ for word in ex.expl.split(|c| c == ' ' || c == '~') {
+ let (keb, reb) = expl_clean_word(word);
+ let wchars = Charset::new(keb);
if !wchars.intersects(&ex.chars) {
continue;
}
- if let Some(ents) = dict_idx.get(w) {
+ if let Some(ents) = dict_idx.get(keb) {
for ent in ents.iter() {
- let s = dict_str(w, ent);
- if wchars.intersects(&batch.chars) {
- expl_batch.push(s);
- } else {
- expl_all.push(s);
+ if let Some(s) = dict_str(keb, reb, ent) {
+ if wchars.intersects(&batch.chars) {
+ expl_batch.push(s);
+ } else {
+ expl_all.push(s);
+ }
}
}
}
@@ -392,21 +393,29 @@ fn format_batch_aux<'a>(dict_idx: &DictIndex<'a>, count: usize, i: usize, batch:
Ok(())
}
-fn expl_clean_word(w: &str) -> &str {
+fn expl_clean_word(w: &str) -> (&str, Option<&str>) {
let mut ret = w;
for delim in ['(', '{', '['] {
if let Some((s, _)) = ret.split_once(delim) {
ret = s;
}
}
- ret
+ let p = w.split_once('(')
+ .and_then(|(_, r)| r.split_once(')'))
+ .map(|(p, _)| p);
+ (ret, p)
}
-fn dict_str<'a>(w: &str, ent: &roxmltree::Node<'a, 'a>) -> String {
+fn dict_str<'a>(qkeb: &str, qreb: Option<&str>, ent: &roxmltree::Node<'a, 'a>) -> Option<String> {
let r_ele = ent.children().find(|x| x.has_tag_name("r_ele")).unwrap();
let reb = r_ele.children().find(|x| x.has_tag_name("reb")).unwrap();
+ let reb = reb.text().unwrap().trim();
- let mut ret = format!("{} [{}]", w, reb.text().unwrap().trim());
+ if qreb.map(|x| x != reb).unwrap_or(false) {
+ return None;
+ }
+
+ let mut ret = format!("{} [{}]", qkeb, reb);
for sense in ent.children().filter(|x| x.has_tag_name("sense")) {
if let Some(s) = sense.children().find(|x| x.has_tag_name("gloss")) {
@@ -417,7 +426,7 @@ fn dict_str<'a>(w: &str, ent: &roxmltree::Node<'a, 'a>) -> String {
if ret.chars().rev().next() == Some(';') {
ret.pop();
}
- ret
+ Some(ret)
}
fn format_index(batches: &[Batch], kanji_levels: &[(String, String)]) -> Result<()> {