diff options
Diffstat (limited to 'src/datafiles.rs')
-rw-r--r-- | src/datafiles.rs | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/src/datafiles.rs b/src/datafiles.rs index fc6194f..d4f948d 100644 --- a/src/datafiles.rs +++ b/src/datafiles.rs @@ -23,9 +23,10 @@ pub struct Example { // PARSING DATA FILES // ===================================================================== +#[derive(Serialize, Deserialize)] pub struct DictEntry { pub reb: String, - pub ent_seq: String, + pub ent_seq: u64, pub sense: Box<[String]>, } @@ -44,7 +45,7 @@ pub fn index_jmdict(dict: &roxmltree::Document) -> DictIndex { let reb = reb.text().unwrap().trim().to_string(); let ent_seq = ent.children().find(|x| x.has_tag_name("ent_seq")).unwrap(); - let ent_seq = ent_seq.text().unwrap().trim().to_string(); + let ent_seq = ent_seq.text().unwrap().trim().parse().unwrap(); let sense = ent .children() @@ -70,6 +71,11 @@ pub fn index_jmdict(dict: &roxmltree::Document) -> DictIndex { ret } +pub fn read_jmdict_idx() -> Result<DictIndex> { + let file = fs::read("data/jmdict_idx.json")?; + Ok(serde_json::from_slice::<DictIndex>(&file)?) +} + pub fn parse_kanjidic() -> Result<Vec<(String, Charset)>> { let n3_kanji = Charset::new(&fs::read_to_string("data/n3_kanji.txt")?.trim()); @@ -196,7 +202,7 @@ pub fn read_examples(all_kanji: &Charset) -> Result<Vec<Example>> { } } if i % 10000 == 0 { - eprintln!("read examples: {}/300 (x1000)", i / 1000); + info!("read examples: {}/300 (x1000)", i / 1000); } } |