diff options
author | Alex Auvolat <alex@adnab.me> | 2023-11-27 20:27:01 +0100 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2023-11-27 20:27:01 +0100 |
commit | 12690a6afef96aa165f56762689fca682b76f9a0 (patch) | |
tree | affc0bc1dec23ac5414e92b44d2610dc519adf27 | |
parent | 061d9d3cddbd2673e4601c335b362bb1435b59b8 (diff) | |
download | datagengo-12690a6afef96aa165f56762689fca682b76f9a0.tar.gz datagengo-12690a6afef96aa165f56762689fca682b76f9a0.zip |
furigana: fix edge case
-rw-r--r-- | src/format.rs | 10 | ||||
-rw-r--r-- | src/main.rs | 2 |
2 files changed, 8 insertions, 4 deletions
diff --git a/src/format.rs b/src/format.rs index 88c81c3..83c63a1 100644 --- a/src/format.rs +++ b/src/format.rs @@ -257,8 +257,12 @@ fn format_ex_furigana<'a>(dict_idx: &DictIndex<'a>, ex: &Example) -> String { } let keb_suffix = keb.chars().skip(common_cnt).collect::<String>(); - let reb = reb.strip_suffix(&keb_suffix).unwrap_or(reb); - //println!(" >> common reb: {}, common_word: {}", reb, word.chars().take(common_cnt).collect::<String>()); + let word_suffix = word.chars().skip(common_cnt).collect::<String>(); + let reb = reb + .strip_suffix(&keb_suffix) + .or(reb.strip_suffix(&word_suffix)) + .unwrap_or(reb); + //println!(" common reb: {}, common word: {}", reb, word.chars().take(common_cnt).collect::<String>()); let wchars = Vec::from_iter(word.chars().take(common_cnt)); let rchars = Vec::from_iter(reb.chars()); @@ -327,7 +331,7 @@ fn format_ex_furigana<'a>(dict_idx: &DictIndex<'a>, ex: &Example) -> String { write!(&mut ret, "[[{}||{}]]", wbuf, rbuf).unwrap(); } - ret.extend(word.chars().skip(common_cnt)); + ret += &word_suffix; } ret } diff --git a/src/main.rs b/src/main.rs index f6f286a..b8996e8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -189,7 +189,7 @@ fn main() { batches .iter() .enumerate() - //.skip(23) + //.skip(25) //.take(1) .for_each(|x| format_batch(&jmdict_idx, batches.len(), x)); |