aboutsummaryrefslogtreecommitdiff
path: root/src/datafiles.rs
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-11-28 16:31:28 +0100
committerAlex Auvolat <alex@adnab.me>2023-11-28 16:31:28 +0100
commitab232ceb32b51ac8553692cf8a2b1f86fa975f7d (patch)
treeb159521d949ebe8e2e7913a8a7c39d116a32ffb2 /src/datafiles.rs
parentb78034ad5bf65f1dfe390861f72bed827e2ab1b8 (diff)
downloaddatagengo-ab232ceb32b51ac8553692cf8a2b1f86fa975f7d.tar.gz
datagengo-ab232ceb32b51ac8553692cf8a2b1f86fa975f7d.zip
add furigana override file to fix edge cases
Diffstat (limited to 'src/datafiles.rs')
-rw-r--r--src/datafiles.rs20
1 files changed, 20 insertions, 0 deletions
diff --git a/src/datafiles.rs b/src/datafiles.rs
index 0e526ef..3065fbf 100644
--- a/src/datafiles.rs
+++ b/src/datafiles.rs
@@ -176,6 +176,26 @@ pub fn read_examples(all_kanji: &Charset) -> Result<Vec<Example>> {
Ok(ret)
}
+pub fn read_furigana_overrides() -> Result<HashMap<String, String>> {
+ let file = fs::File::open("data/furigana_overrides")?;
+
+ let mut ret = HashMap::new();
+ let re = regex::Regex::new(r#"\|\|\w+\]\]"#)?;
+
+ for line in io::BufReader::new(file).lines() {
+ let line = line?;
+ let line = line.trim();
+ if !line.is_empty() {
+ let clean = re.replace_all(line, "").replace("[[", "");
+ if clean != line {
+ ret.insert(clean, line.to_string());
+ }
+ }
+ }
+
+ Ok(ret)
+}
+
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct JlptVocab {
pub level: String,