aboutsummaryrefslogtreecommitdiff
path: root/src/datafiles.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/datafiles.rs')
-rw-r--r--src/datafiles.rs12
1 files changed, 9 insertions, 3 deletions
diff --git a/src/datafiles.rs b/src/datafiles.rs
index fc6194f..d4f948d 100644
--- a/src/datafiles.rs
+++ b/src/datafiles.rs
@@ -23,9 +23,10 @@ pub struct Example {
// PARSING DATA FILES
// =====================================================================
+#[derive(Serialize, Deserialize)]
pub struct DictEntry {
pub reb: String,
- pub ent_seq: String,
+ pub ent_seq: u64,
pub sense: Box<[String]>,
}
@@ -44,7 +45,7 @@ pub fn index_jmdict(dict: &roxmltree::Document) -> DictIndex {
let reb = reb.text().unwrap().trim().to_string();
let ent_seq = ent.children().find(|x| x.has_tag_name("ent_seq")).unwrap();
- let ent_seq = ent_seq.text().unwrap().trim().to_string();
+ let ent_seq = ent_seq.text().unwrap().trim().parse().unwrap();
let sense = ent
.children()
@@ -70,6 +71,11 @@ pub fn index_jmdict(dict: &roxmltree::Document) -> DictIndex {
ret
}
+pub fn read_jmdict_idx() -> Result<DictIndex> {
+ let file = fs::read("data/jmdict_idx.json")?;
+ Ok(serde_json::from_slice::<DictIndex>(&file)?)
+}
+
pub fn parse_kanjidic() -> Result<Vec<(String, Charset)>> {
let n3_kanji = Charset::new(&fs::read_to_string("data/n3_kanji.txt")?.trim());
@@ -196,7 +202,7 @@ pub fn read_examples(all_kanji: &Charset) -> Result<Vec<Example>> {
}
}
if i % 10000 == 0 {
- eprintln!("read examples: {}/300 (x1000)", i / 1000);
+ info!("read examples: {}/300 (x1000)", i / 1000);
}
}