From ab232ceb32b51ac8553692cf8a2b1f86fa975f7d Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 28 Nov 2023 16:31:28 +0100 Subject: add furigana override file to fix edge cases --- src/datafiles.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'src/datafiles.rs') diff --git a/src/datafiles.rs b/src/datafiles.rs index 0e526ef..3065fbf 100644 --- a/src/datafiles.rs +++ b/src/datafiles.rs @@ -176,6 +176,26 @@ pub fn read_examples(all_kanji: &Charset) -> Result> { Ok(ret) } +pub fn read_furigana_overrides() -> Result> { + let file = fs::File::open("data/furigana_overrides")?; + + let mut ret = HashMap::new(); + let re = regex::Regex::new(r#"\|\|\w+\]\]"#)?; + + for line in io::BufReader::new(file).lines() { + let line = line?; + let line = line.trim(); + if !line.is_empty() { + let clean = re.replace_all(line, "").replace("[[", ""); + if clean != line { + ret.insert(clean, line.to_string()); + } + } + } + + Ok(ret) +} + #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] pub struct JlptVocab { pub level: String, -- cgit v1.2.3