diff options
author | Alex Auvolat <alex@adnab.me> | 2023-11-28 16:31:28 +0100 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2023-11-28 16:31:28 +0100 |
commit | ab232ceb32b51ac8553692cf8a2b1f86fa975f7d (patch) | |
tree | b159521d949ebe8e2e7913a8a7c39d116a32ffb2 /src/datafiles.rs | |
parent | b78034ad5bf65f1dfe390861f72bed827e2ab1b8 (diff) | |
download | datagengo-ab232ceb32b51ac8553692cf8a2b1f86fa975f7d.tar.gz datagengo-ab232ceb32b51ac8553692cf8a2b1f86fa975f7d.zip |
add furigana override file to fix edge cases
Diffstat (limited to 'src/datafiles.rs')
-rw-r--r-- | src/datafiles.rs | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/src/datafiles.rs b/src/datafiles.rs index 0e526ef..3065fbf 100644 --- a/src/datafiles.rs +++ b/src/datafiles.rs @@ -176,6 +176,26 @@ pub fn read_examples(all_kanji: &Charset) -> Result<Vec<Example>> { Ok(ret) } +pub fn read_furigana_overrides() -> Result<HashMap<String, String>> { + let file = fs::File::open("data/furigana_overrides")?; + + let mut ret = HashMap::new(); + let re = regex::Regex::new(r#"\|\|\w+\]\]"#)?; + + for line in io::BufReader::new(file).lines() { + let line = line?; + let line = line.trim(); + if !line.is_empty() { + let clean = re.replace_all(line, "").replace("[[", ""); + if clean != line { + ret.insert(clean, line.to_string()); + } + } + } + + Ok(ret) +} + #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] pub struct JlptVocab { pub level: String, |