aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-11-27 20:27:01 +0100
committerAlex Auvolat <alex@adnab.me>2023-11-27 20:27:01 +0100
commit12690a6afef96aa165f56762689fca682b76f9a0 (patch)
treeaffc0bc1dec23ac5414e92b44d2610dc519adf27
parent061d9d3cddbd2673e4601c335b362bb1435b59b8 (diff)
downloaddatagengo-12690a6afef96aa165f56762689fca682b76f9a0.tar.gz
datagengo-12690a6afef96aa165f56762689fca682b76f9a0.zip
furigana: fix edge case
-rw-r--r--src/format.rs10
-rw-r--r--src/main.rs2
2 files changed, 8 insertions, 4 deletions
diff --git a/src/format.rs b/src/format.rs
index 88c81c3..83c63a1 100644
--- a/src/format.rs
+++ b/src/format.rs
@@ -257,8 +257,12 @@ fn format_ex_furigana<'a>(dict_idx: &DictIndex<'a>, ex: &Example) -> String {
}
let keb_suffix = keb.chars().skip(common_cnt).collect::<String>();
- let reb = reb.strip_suffix(&keb_suffix).unwrap_or(reb);
- //println!(" >> common reb: {}, common_word: {}", reb, word.chars().take(common_cnt).collect::<String>());
+ let word_suffix = word.chars().skip(common_cnt).collect::<String>();
+ let reb = reb
+ .strip_suffix(&keb_suffix)
+ .or(reb.strip_suffix(&word_suffix))
+ .unwrap_or(reb);
+ //println!(" common reb: {}, common word: {}", reb, word.chars().take(common_cnt).collect::<String>());
let wchars = Vec::from_iter(word.chars().take(common_cnt));
let rchars = Vec::from_iter(reb.chars());
@@ -327,7 +331,7 @@ fn format_ex_furigana<'a>(dict_idx: &DictIndex<'a>, ex: &Example) -> String {
write!(&mut ret, "[[{}||{}]]", wbuf, rbuf).unwrap();
}
- ret.extend(word.chars().skip(common_cnt));
+ ret += &word_suffix;
}
ret
}
diff --git a/src/main.rs b/src/main.rs
index f6f286a..b8996e8 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -189,7 +189,7 @@ fn main() {
batches
.iter()
.enumerate()
- //.skip(23)
+ //.skip(25)
//.take(1)
.for_each(|x| format_batch(&jmdict_idx, batches.len(), x));