aboutsummaryrefslogtreecommitdiff
path: root/src/main.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/main.rs')
-rw-r--r--src/main.rs120
1 files changed, 62 insertions, 58 deletions
diff --git a/src/main.rs b/src/main.rs
index b8996e8..85b278a 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -10,6 +10,7 @@ use structopt::StructOpt;
mod charset;
mod datafiles;
+mod example;
mod format;
use charset::Charset;
use datafiles::*;
@@ -36,6 +37,7 @@ enum Cmd {
Cleanup,
AddVocab,
AddExamples,
+ AddFurigana,
Format,
}
@@ -70,73 +72,46 @@ fn main() {
.collect::<Vec<_>>();
let mut ex = read_examples(&all_kanji).expect("read_examples");
ex.retain(|e| (5..=25).contains(&e.ja.chars().count()));
- let mut batches: Vec<Batch> = fs::read("data/batches.json")
- .map_err(anyhow::Error::from)
- .and_then(|x| Ok(serde_json::from_slice(&x)?))
- .unwrap_or_default();
+
+ let mut batches = read_batches().unwrap_or_default();
+
if let Some(t) = truncate {
batches.truncate(t);
}
println!("---- starting after {} batches ----", batches.len());
let target_len = batches.len() + count;
gen_batches(&mut batches, target_len, &kanji_levels, &ex);
- fs::write(
- "data/batches.json",
- serde_json::to_string_pretty(&batches)
- .expect("serialize")
- .as_bytes(),
- )
- .expect("save");
+
+ save_batches(batches).expect("save_batches");
}
Cmd::Simplify => {
- let mut batches: Vec<Batch> = fs::read("data/batches.json")
- .map_err(anyhow::Error::from)
- .and_then(|x| Ok(serde_json::from_slice(&x)?))
- .expect("failed to decode batches.json");
+ let mut batches = read_batches().expect("read_batches");
+
for batch in batches.iter_mut() {
simplify_batch(batch);
}
- fs::write(
- "data/batches.json",
- serde_json::to_string_pretty(&batches)
- .expect("serialize")
- .as_bytes(),
- )
- .expect("save");
+
+ save_batches(batches).expect("save_batches");
}
Cmd::Cleanup => {
- let mut batches: Vec<Batch> = fs::read("data/batches.json")
- .map_err(anyhow::Error::from)
- .and_then(|x| Ok(serde_json::from_slice(&x)?))
- .expect("failed to decode batches.json");
+ let mut batches = read_batches().expect("read_batches");
+
let kanji_levels = read_kanji_levels().expect("read_kanji_levels");
let kanji_levels = kanji_levels
.into_iter()
.map(|(l, x)| (l, Charset::new(x)))
.collect::<Vec<_>>();
cleanup_batches(&mut batches, &kanji_levels);
- fs::write(
- "data/batches.json",
- serde_json::to_string_pretty(&batches)
- .expect("serialize")
- .as_bytes(),
- )
- .expect("save");
+
+ save_batches(batches).expect("save_batches");
}
Cmd::AddVocab => {
- let mut batches: Vec<Batch> = fs::read("data/batches.json")
- .map_err(anyhow::Error::from)
- .and_then(|x| Ok(serde_json::from_slice(&x)?))
- .expect("failed to decode batches.json");
+ let mut batches = read_batches().expect("read_batches");
+
let jlpt_vocab = load_jlpt_vocab().expect("load_jlpt_vocab");
add_vocab(&mut batches, &jlpt_vocab);
- fs::write(
- "data/batches.json",
- serde_json::to_string_pretty(&batches)
- .expect("serialize")
- .as_bytes(),
- )
- .expect("save");
+
+ save_batches(batches).expect("save_batches");
}
Cmd::AddExamples => {
let kanji_levels = read_kanji_levels().expect("read_kanji_levels");
@@ -151,20 +126,37 @@ fn main() {
let mut ex = read_examples(&all_kanji).expect("read_examples");
ex.retain(|e| (5..=25).contains(&e.ja.chars().count()));
- let mut batches: Vec<Batch> = fs::read("data/batches.json")
- .map_err(anyhow::Error::from)
- .and_then(|x| Ok(serde_json::from_slice(&x)?))
- .expect("failed to decode batches.json");
+ let mut batches = read_batches().expect("read_batches");
add_extra_examples(&mut batches, &ex);
- fs::write(
- "data/batches.json",
- serde_json::to_string_pretty(&batches)
- .expect("serialize")
- .as_bytes(),
+ save_batches(batches).expect("save_batches");
+ }
+ Cmd::AddFurigana => {
+ let mut batches = read_batches().expect("read_batches");
+
+ let jmdict = fs::read_to_string("data/JMdict_e.xml").expect("read_jmdict");
+ let jmdict = roxmltree::Document::parse_with_options(
+ &jmdict,
+ roxmltree::ParsingOptions {
+ allow_dtd: true,
+ ..Default::default()
+ },
)
- .expect("save");
+ .expect("parse_jmdict");
+ let jmdict_idx = index_jmdict(&jmdict);
+
+ for batch in batches.iter_mut() {
+ for ex in batch
+ .examples
+ .iter_mut()
+ .chain(batch.extra_examples.iter_mut())
+ {
+ ex.gen_furigana(&jmdict_idx);
+ }
+ }
+
+ save_batches(batches).expect("save_batches");
}
Cmd::Format => {
let jmdict = fs::read_to_string("data/JMdict_e.xml").expect("read_jmdict");
@@ -178,10 +170,7 @@ fn main() {
.expect("parse_jmdict");
let jmdict_idx = index_jmdict(&jmdict);
- let batches = fs::read("data/batches.json")
- .map_err(anyhow::Error::from)
- .and_then(|x| Ok(serde_json::from_slice::<Vec<Batch>>(&x)?))
- .expect("read/parse");
+ let batches = read_batches().expect("read/parse");
fs::create_dir_all("public").expect("mkdir public");
fs::copy("static/style.css", "public/style.css").expect("copy style.css");
@@ -200,6 +189,21 @@ fn main() {
}
}
+// ----
+
+fn read_batches() -> anyhow::Result<Vec<Batch>> {
+ let json = fs::read("data/batches.json")?;
+ Ok(serde_json::from_slice::<Vec<Batch>>(&json)?)
+}
+
+fn save_batches(batches: Vec<Batch>) -> anyhow::Result<()> {
+ fs::write(
+ "data/batches.json",
+ serde_json::to_string_pretty(&batches)?.as_bytes(),
+ )?;
+ Ok(())
+}
+
// =====================================================================
// BATCH STRUCTURES AND GENERATION
// =====================================================================