aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-11-28 16:22:16 +0100
committerAlex Auvolat <alex@adnab.me>2023-11-28 16:22:16 +0100
commitb78034ad5bf65f1dfe390861f72bed827e2ab1b8 (patch)
treeaa42b4edc8d2a44bb5c41890089cb754ca7d40d7 /src
parent64568528b13d08ceaa0c36c20b3aa20d966cfdcb (diff)
downloaddatagengo-b78034ad5bf65f1dfe390861f72bed827e2ab1b8.tar.gz
datagengo-b78034ad5bf65f1dfe390861f72bed827e2ab1b8.zip
add furigana to main examples and persist furigana in batches.json
Diffstat (limited to 'src')
-rw-r--r--src/datafiles.rs4
-rw-r--r--src/example.rs203
-rw-r--r--src/format.rs233
-rw-r--r--src/main.rs120
4 files changed, 293 insertions, 267 deletions
diff --git a/src/datafiles.rs b/src/datafiles.rs
index 629badf..0e526ef 100644
--- a/src/datafiles.rs
+++ b/src/datafiles.rs
@@ -12,6 +12,8 @@ pub struct Example {
pub ja: String,
pub en: String,
pub expl: String,
+ #[serde(default)]
+ pub furigana: Option<String>,
pub id: Option<String>,
pub chars: Charset,
}
@@ -151,6 +153,7 @@ pub fn read_examples(all_kanji: &Charset) -> Result<Vec<Example>> {
expl: b.to_string(),
id: Some(id.to_string()),
chars: Charset::new(ja).inter(all_kanji),
+ furigana: None,
});
} else {
ret.push(Example {
@@ -159,6 +162,7 @@ pub fn read_examples(all_kanji: &Charset) -> Result<Vec<Example>> {
expl: b.to_string(),
id: None,
chars: Charset::new(ja).inter(all_kanji),
+ furigana: None,
});
}
}
diff --git a/src/example.rs b/src/example.rs
new file mode 100644
index 0000000..71f3f13
--- /dev/null
+++ b/src/example.rs
@@ -0,0 +1,203 @@
+use std::collections::HashSet;
+
+use crate::charset::Charset;
+use crate::*;
+
+impl Example {
+ pub fn gen_furigana<'a>(&mut self, dict_idx: &DictIndex<'a>) {
+ use std::fmt::Write;
+
+ let mut remainder = self.ja.as_str();
+ let mut ret = String::new();
+
+ for word in self.expl.split(|c| c == ' ' || c == '~') {
+ let (keb, reb) = expl_clean_word(word);
+ let word = word
+ .split_once('{')
+ .and_then(|(_, r)| r.split_once('}'))
+ .map(|(p, _)| p)
+ .unwrap_or(keb);
+
+ if let Some(i) = remainder.find(word) {
+ ret += &remainder[..i];
+ remainder = &remainder[i..];
+ }
+
+ let mut new_word = String::new();
+ for c in word.chars() {
+ if remainder.starts_with(c) {
+ remainder = remainder.strip_prefix(c).unwrap();
+ new_word.push(c);
+ } else {
+ eprintln!("!!!! Char {} is not in remainder !!!!", c);
+ }
+ }
+ let word = &new_word;
+
+ if !Charset::new(word).intersects(&self.chars) {
+ ret += word;
+ continue;
+ }
+
+ let reb = match reb {
+ Some(reb) if reb.starts_with('#') => {
+ let ents = dict_idx.get(keb).map(|x| &x[..]).unwrap_or_default();
+ if let Some(ent) = ents.iter().find(|ent| {
+ let ent_seq = ent.children().find(|x| x.has_tag_name("ent_seq")).unwrap();
+ ent_seq.text().unwrap().trim() == reb.strip_prefix('#').unwrap()
+ }) {
+ let r_ele = ent.children().find(|x| x.has_tag_name("r_ele")).unwrap();
+ let reb = r_ele.children().find(|x| x.has_tag_name("reb")).unwrap();
+ reb.text().unwrap().trim()
+ } else {
+ println!("- entry id not found: {}", reb);
+ ret += &word;
+ continue;
+ }
+ }
+ Some(reb) => reb,
+ None => {
+ let ents = dict_idx.get(keb).map(|x| &x[..]).unwrap_or_default();
+ let matches = ents
+ .iter()
+ .map(|ent| {
+ let r_ele = ent.children().find(|x| x.has_tag_name("r_ele")).unwrap();
+ let reb = r_ele.children().find(|x| x.has_tag_name("reb")).unwrap();
+ reb.text().unwrap().trim()
+ })
+ .collect::<HashSet<_>>();
+ if matches.len() == 1 {
+ *matches.iter().next().unwrap()
+ } else {
+ println!("- word without reb: {}", word);
+ ret += &word;
+ continue;
+ }
+ }
+ };
+
+ //println!("+ word: {}, keb: {}, reb: {}", word, keb, reb);
+ let common_cnt = word
+ .chars()
+ .zip(keb.chars())
+ .take_while(|(x, y)| x == y)
+ .count();
+ if common_cnt == 0 {
+ // Strange cases
+ write!(&mut ret, "[[{}||{}]]", word, reb).unwrap();
+ continue;
+ }
+
+ let keb_suffix = keb.chars().skip(common_cnt).collect::<String>();
+ let word_suffix = word.chars().skip(common_cnt).collect::<String>();
+ let reb = reb
+ .strip_suffix(&keb_suffix)
+ .or(reb.strip_suffix(&word_suffix))
+ .unwrap_or(reb);
+ //println!(" common reb: {}, common word: {}", reb, word.chars().take(common_cnt).collect::<String>());
+
+ let wchars = Vec::from_iter(word.chars().take(common_cnt));
+ let rchars = Vec::from_iter(reb.chars());
+
+ // We shall invoke Levhenstein distance
+ let mut dynrow0 = vec![(0, 0, 0, false)];
+ for ri in 0..rchars.len() {
+ dynrow0.push((0, ri, 100 + ri + 1, false));
+ }
+ let mut dyntab = vec![dynrow0];
+
+ for (wi, wc) in wchars.iter().enumerate() {
+ let mut dynrow = vec![(wi, 0, 100 + wi + 1, false)];
+
+ for (ri, rc) in rchars.iter().enumerate() {
+ let mut x = vec![];
+ if dyntab[wi][ri + 1].3 {
+ x.push((wi, ri + 1, dyntab[wi][ri + 1].2 + 1, true));
+ }
+ if dynrow[ri].3 {
+ x.push((wi + 1, ri, dynrow[ri].2 + 1, true));
+ }
+ if wc == rc {
+ x.push((wi, ri, dyntab[wi][ri].2, false));
+ } else {
+ x.push((wi, ri, dyntab[wi][ri].2 + 1, true));
+ }
+ dynrow.push(x.into_iter().min_by_key(|(_, _, w, _)| *w).unwrap());
+ }
+ dyntab.push(dynrow);
+ }
+ //eprintln!("DYN TAB: {:?}", dyntab);
+
+ let mut path = vec![(wchars.len(), rchars.len())];
+ loop {
+ let (wi, ri) = *path.last().unwrap();
+ let (wi2, ri2, _, _) = dyntab[wi][ri];
+ path.push((wi2, ri2));
+ if wi2 == 0 && ri2 == 0 {
+ break;
+ }
+ }
+ path.reverse();
+ //eprintln!("DYN PATH: {:?}", path);
+
+ let mut wbuf = String::new();
+ let mut rbuf = String::new();
+ for ((wi1, ri1), (wi2, ri2)) in path.iter().copied().zip(path.iter().copied().skip(1)) {
+ if wi2 > wi1 && ri2 > ri1 && wchars[wi1] == rchars[ri1] {
+ if !wbuf.is_empty() || !rbuf.is_empty() {
+ write!(&mut ret, "[[{}||{}]]", wbuf, rbuf).unwrap();
+ wbuf.clear();
+ rbuf.clear();
+ }
+ ret.push(wchars[wi1]);
+ } else {
+ if wi2 > wi1 {
+ wbuf.push(wchars[wi1]);
+ }
+ if ri2 > ri1 {
+ rbuf.push(rchars[ri1]);
+ }
+ }
+ }
+ if !wbuf.is_empty() || !rbuf.is_empty() {
+ write!(&mut ret, "[[{}||{}]]", wbuf, rbuf).unwrap();
+ }
+
+ ret += &word_suffix;
+ }
+ ret += remainder;
+
+ // CHECK
+ let re = regex::Regex::new(r#"\|\|\w+\]\]"#).unwrap();
+ let back_to_ja = re.replace_all(&ret, "").replace("[[", "");
+ if self.ja != back_to_ja {
+ eprintln!("!!!! {} != {}", self.ja, back_to_ja);
+ }
+
+ self.furigana = Some(ret);
+ }
+
+ pub fn furigana_markup(&self) -> String {
+ if let Some(furi) = &self.furigana {
+ furi.replace("[[", "<ruby>")
+ .replace("||", "<rt>")
+ .replace("]]", "</rt></ruby>")
+ } else {
+ self.ja.to_string()
+ }
+ }
+}
+
+pub fn expl_clean_word(w: &str) -> (&str, Option<&str>) {
+ let mut ret = w;
+ for delim in ['(', '{', '['] {
+ if let Some((s, _)) = ret.split_once(delim) {
+ ret = s;
+ }
+ }
+ let p = w
+ .split_once('(')
+ .and_then(|(_, r)| r.split_once(')'))
+ .map(|(p, _)| p);
+ (ret, p)
+}
diff --git a/src/format.rs b/src/format.rs
index a556677..caed70a 100644
--- a/src/format.rs
+++ b/src/format.rs
@@ -1,9 +1,9 @@
-use std::collections::HashSet;
use std::fs;
use anyhow::Result;
use crate::charset::Charset;
+use crate::example::expl_clean_word;
use crate::*;
// =====================================================================
@@ -62,16 +62,26 @@ fn format_batch_aux<'a>(
for ex in batch.examples.iter() {
writeln!(f, "<hr />")?;
- write!(f, r#"<p class="ja">"#)?;
- for c in ex.ja.chars() {
- if batch.chars.contains(c) {
- write!(f, r#"<span class="char_cur">{}</span>"#, c)?;
+ write!(f, r#"<p class="ja ja_main">"#)?;
+ let furi = ex.furigana_markup();
+ for c in furi.chars() {
+ let class = if batch.chars.contains(c) {
+ Some("char_cur")
} else if batch.chars_p1.contains(c) {
- write!(f, r#"<span class="char_p1">{}</span>"#, c)?;
+ Some("char_p1")
} else if batch.chars_p2.contains(c) {
- write!(f, r#"<span class="char_p2">{}</span>"#, c)?;
+ Some("char_p2")
} else if batch.chars_bad.contains(c) {
- write!(f, r#"<span class="char_bad">{}</span>"#, c)?;
+ Some("char_bad")
+ } else {
+ None
+ };
+ if let Some(cls) = class {
+ write!(
+ f,
+ r#"<a href="https://jisho.org/search/{}%20%23kanji" class="{}">{}</a>"#,
+ c, cls, c
+ )?;
} else {
write!(f, "{}", c)?;
}
@@ -140,25 +150,17 @@ fn format_batch_aux<'a>(
writeln!(
f,
- r#"<details><summary>Extra examples (reading practice)</summary><table class="extratable">"#
+ r#"<p><strong>Extra examples (reading practice)</strong></p><table class="extratable">"#
)?;
for ex in batch.extra_examples.iter() {
- let furi = format_ex_furigana(dict_idx, ex);
- // println!(
- // "FURIGANA: {}\n => {}",
- // ex.ja,
- // format_ex_furigana(dict_idx, ex)
- // );
writeln!(
f,
r#"<tr><td><div class="extra_example"><div class="extra_ja font_ja">{}</div><div class="extra_en">{}</div></div></td></tr>"#,
- furi.replace("[[", "<ruby>")
- .replace("||", "<rt>")
- .replace("]]", "</rt></ruby>"),
+ ex.furigana_markup(),
ex.en
)?;
}
- writeln!(f, r#"</table></details>"#)?;
+ writeln!(f, r#"</table>"#)?;
writeln!(f, "<hr />")?;
writeln!(f, "<p>\(≧▽≦)/</p>")?;
@@ -172,208 +174,21 @@ fn format_vocab(f: &mut impl Write, vocab: &[&JlptVocab], t: &str) -> Result<()>
if !vocab.is_empty() {
writeln!(
f,
- r#"<details><summary>{}</summary><table class="vocabtable">"#,
+ r#"<p><strong>{}</strong></p><table class="vocabtable">"#,
t
)?;
for v in vocab {
writeln!(
f,
- r#"<tr><td>{}</td><td>&nbsp;&nbsp;<span class="tab_large font_ja">{}</span>&nbsp;&nbsp;</td><td>{}</td><td class="font_ja">{}</td></tr>"#,
+ r#"<tr><td>{}</td><td style="word-break: keep-all">&nbsp;&nbsp;<span class="tab_large font_ja">{}</span>&nbsp;&nbsp;</td><td>{}</td><td class="font_ja" style="word-break: keep-all">{}</td></tr>"#,
v.level, v.kanji, v.en, v.kana
)?;
}
- writeln!(f, "</table></details>")?;
+ writeln!(f, "</table>")?;
}
Ok(())
}
-fn format_ex_furigana<'a>(dict_idx: &DictIndex<'a>, ex: &Example) -> String {
- use std::fmt::Write;
-
- let mut remainder = ex.ja.as_str();
- let mut ret = String::new();
-
- for word in ex.expl.split(|c| c == ' ' || c == '~') {
- let (keb, reb) = expl_clean_word(word);
- let word = word
- .split_once('{')
- .and_then(|(_, r)| r.split_once('}'))
- .map(|(p, _)| p)
- .unwrap_or(keb);
-
- if let Some(i) = remainder.find(word) {
- ret += &remainder[..i];
- remainder = &remainder[i..];
- }
-
- let mut new_word = String::new();
- for c in word.chars() {
- if remainder.starts_with(c) {
- remainder = remainder.strip_prefix(c).unwrap();
- new_word.push(c);
- } else {
- eprintln!("!!!! Char {} is not in remainder !!!!", c);
- }
- }
- let word = &new_word;
-
- if !Charset::new(word).intersects(&ex.chars) {
- ret += word;
- continue;
- }
-
- let reb = match reb {
- Some(reb) if reb.starts_with('#') => {
- let ents = dict_idx.get(keb).map(|x| &x[..]).unwrap_or_default();
- if let Some(ent) = ents.iter().find(|ent| {
- let ent_seq = ent.children().find(|x| x.has_tag_name("ent_seq")).unwrap();
- ent_seq.text().unwrap().trim() == reb.strip_prefix('#').unwrap()
- }) {
- let r_ele = ent.children().find(|x| x.has_tag_name("r_ele")).unwrap();
- let reb = r_ele.children().find(|x| x.has_tag_name("reb")).unwrap();
- reb.text().unwrap().trim()
- } else {
- println!("- entry id not found: {}", reb);
- ret += &word;
- continue;
- }
- }
- Some(reb) => reb,
- None => {
- let ents = dict_idx.get(keb).map(|x| &x[..]).unwrap_or_default();
- let matches = ents
- .iter()
- .map(|ent| {
- let r_ele = ent.children().find(|x| x.has_tag_name("r_ele")).unwrap();
- let reb = r_ele.children().find(|x| x.has_tag_name("reb")).unwrap();
- reb.text().unwrap().trim()
- })
- .collect::<HashSet<_>>();
- if matches.len() == 1 {
- *matches.iter().next().unwrap()
- } else {
- println!("- word without reb: {}", word);
- ret += &word;
- continue;
- }
- }
- };
-
- //println!("+ word: {}, keb: {}, reb: {}", word, keb, reb);
- let common_cnt = word
- .chars()
- .zip(keb.chars())
- .take_while(|(x, y)| x == y)
- .count();
- if common_cnt == 0 {
- // Strange cases
- write!(&mut ret, "[[{}||{}]]", word, reb).unwrap();
- continue;
- }
-
- let keb_suffix = keb.chars().skip(common_cnt).collect::<String>();
- let word_suffix = word.chars().skip(common_cnt).collect::<String>();
- let reb = reb
- .strip_suffix(&keb_suffix)
- .or(reb.strip_suffix(&word_suffix))
- .unwrap_or(reb);
- //println!(" common reb: {}, common word: {}", reb, word.chars().take(common_cnt).collect::<String>());
-
- let wchars = Vec::from_iter(word.chars().take(common_cnt));
- let rchars = Vec::from_iter(reb.chars());
-
- // We shall invoke Levhenstein distance
- let mut dynrow0 = vec![(0, 0, 0, false)];
- for ri in 0..rchars.len() {
- dynrow0.push((0, ri, 100 + ri + 1, false));
- }
- let mut dyntab = vec![dynrow0];
-
- for (wi, wc) in wchars.iter().enumerate() {
- let mut dynrow = vec![(wi, 0, 100 + wi + 1, false)];
-
- for (ri, rc) in rchars.iter().enumerate() {
- let mut x = vec![];
- if dyntab[wi][ri + 1].3 {
- x.push((wi, ri + 1, dyntab[wi][ri + 1].2 + 1, true));
- }
- if dynrow[ri].3 {
- x.push((wi + 1, ri, dynrow[ri].2 + 1, true));
- }
- if wc == rc {
- x.push((wi, ri, dyntab[wi][ri].2, false));
- } else {
- x.push((wi, ri, dyntab[wi][ri].2 + 1, true));
- }
- dynrow.push(x.into_iter().min_by_key(|(_, _, w, _)| *w).unwrap());
- }
- dyntab.push(dynrow);
- }
- //eprintln!("DYN TAB: {:?}", dyntab);
-
- let mut path = vec![(wchars.len(), rchars.len())];
- loop {
- let (wi, ri) = *path.last().unwrap();
- let (wi2, ri2, _, _) = dyntab[wi][ri];
- path.push((wi2, ri2));
- if wi2 == 0 && ri2 == 0 {
- break;
- }
- }
- path.reverse();
- //eprintln!("DYN PATH: {:?}", path);
-
- let mut wbuf = String::new();
- let mut rbuf = String::new();
- for ((wi1, ri1), (wi2, ri2)) in path.iter().copied().zip(path.iter().copied().skip(1)) {
- if wi2 > wi1 && ri2 > ri1 && wchars[wi1] == rchars[ri1] {
- if !wbuf.is_empty() || !rbuf.is_empty() {
- write!(&mut ret, "[[{}||{}]]", wbuf, rbuf).unwrap();
- wbuf.clear();
- rbuf.clear();
- }
- ret.push(wchars[wi1]);
- } else {
- if wi2 > wi1 {
- wbuf.push(wchars[wi1]);
- }
- if ri2 > ri1 {
- rbuf.push(rchars[ri1]);
- }
- }
- }
- if !wbuf.is_empty() || !rbuf.is_empty() {
- write!(&mut ret, "[[{}||{}]]", wbuf, rbuf).unwrap();
- }
-
- ret += &word_suffix;
- }
- ret += remainder;
-
- // CHECK
- let re = regex::Regex::new(r#"\|\|\w+\]\]"#).unwrap();
- let back_to_ja = re.replace_all(&ret, "").replace("[[", "");
- if ex.ja != back_to_ja {
- eprintln!("!!!! {} != {}", ex.ja, back_to_ja);
- }
-
- ret
-}
-
-fn expl_clean_word(w: &str) -> (&str, Option<&str>) {
- let mut ret = w;
- for delim in ['(', '{', '['] {
- if let Some((s, _)) = ret.split_once(delim) {
- ret = s;
- }
- }
- let p = w
- .split_once('(')
- .and_then(|(_, r)| r.split_once(')'))
- .map(|(p, _)| p);
- (ret, p)
-}
-
fn dict_str_short<'a>(
qkeb: &str,
qreb: Option<&str>,
diff --git a/src/main.rs b/src/main.rs
index b8996e8..85b278a 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -10,6 +10,7 @@ use structopt::StructOpt;
mod charset;
mod datafiles;
+mod example;
mod format;
use charset::Charset;
use datafiles::*;
@@ -36,6 +37,7 @@ enum Cmd {
Cleanup,
AddVocab,
AddExamples,
+ AddFurigana,
Format,
}
@@ -70,73 +72,46 @@ fn main() {
.collect::<Vec<_>>();
let mut ex = read_examples(&all_kanji).expect("read_examples");
ex.retain(|e| (5..=25).contains(&e.ja.chars().count()));
- let mut batches: Vec<Batch> = fs::read("data/batches.json")
- .map_err(anyhow::Error::from)
- .and_then(|x| Ok(serde_json::from_slice(&x)?))
- .unwrap_or_default();
+
+ let mut batches = read_batches().unwrap_or_default();
+
if let Some(t) = truncate {
batches.truncate(t);
}
println!("---- starting after {} batches ----", batches.len());
let target_len = batches.len() + count;
gen_batches(&mut batches, target_len, &kanji_levels, &ex);
- fs::write(
- "data/batches.json",
- serde_json::to_string_pretty(&batches)
- .expect("serialize")
- .as_bytes(),
- )
- .expect("save");
+
+ save_batches(batches).expect("save_batches");
}
Cmd::Simplify => {
- let mut batches: Vec<Batch> = fs::read("data/batches.json")
- .map_err(anyhow::Error::from)
- .and_then(|x| Ok(serde_json::from_slice(&x)?))
- .expect("failed to decode batches.json");
+ let mut batches = read_batches().expect("read_batches");
+
for batch in batches.iter_mut() {
simplify_batch(batch);
}
- fs::write(
- "data/batches.json",
- serde_json::to_string_pretty(&batches)
- .expect("serialize")
- .as_bytes(),
- )
- .expect("save");
+
+ save_batches(batches).expect("save_batches");
}
Cmd::Cleanup => {
- let mut batches: Vec<Batch> = fs::read("data/batches.json")
- .map_err(anyhow::Error::from)
- .and_then(|x| Ok(serde_json::from_slice(&x)?))
- .expect("failed to decode batches.json");
+ let mut batches = read_batches().expect("read_batches");
+
let kanji_levels = read_kanji_levels().expect("read_kanji_levels");
let kanji_levels = kanji_levels
.into_iter()
.map(|(l, x)| (l, Charset::new(x)))
.collect::<Vec<_>>();
cleanup_batches(&mut batches, &kanji_levels);
- fs::write(
- "data/batches.json",
- serde_json::to_string_pretty(&batches)
- .expect("serialize")
- .as_bytes(),
- )
- .expect("save");
+
+ save_batches(batches).expect("save_batches");
}
Cmd::AddVocab => {
- let mut batches: Vec<Batch> = fs::read("data/batches.json")
- .map_err(anyhow::Error::from)
- .and_then(|x| Ok(serde_json::from_slice(&x)?))
- .expect("failed to decode batches.json");
+ let mut batches = read_batches().expect("read_batches");
+
let jlpt_vocab = load_jlpt_vocab().expect("load_jlpt_vocab");
add_vocab(&mut batches, &jlpt_vocab);
- fs::write(
- "data/batches.json",
- serde_json::to_string_pretty(&batches)
- .expect("serialize")
- .as_bytes(),
- )
- .expect("save");
+
+ save_batches(batches).expect("save_batches");
}
Cmd::AddExamples => {
let kanji_levels = read_kanji_levels().expect("read_kanji_levels");
@@ -151,20 +126,37 @@ fn main() {
let mut ex = read_examples(&all_kanji).expect("read_examples");
ex.retain(|e| (5..=25).contains(&e.ja.chars().count()));
- let mut batches: Vec<Batch> = fs::read("data/batches.json")
- .map_err(anyhow::Error::from)
- .and_then(|x| Ok(serde_json::from_slice(&x)?))
- .expect("failed to decode batches.json");
+ let mut batches = read_batches().expect("read_batches");
add_extra_examples(&mut batches, &ex);
- fs::write(
- "data/batches.json",
- serde_json::to_string_pretty(&batches)
- .expect("serialize")
- .as_bytes(),
+ save_batches(batches).expect("save_batches");
+ }
+ Cmd::AddFurigana => {
+ let mut batches = read_batches().expect("read_batches");
+
+ let jmdict = fs::read_to_string("data/JMdict_e.xml").expect("read_jmdict");
+ let jmdict = roxmltree::Document::parse_with_options(
+ &jmdict,
+ roxmltree::ParsingOptions {
+ allow_dtd: true,
+ ..Default::default()
+ },
)
- .expect("save");
+ .expect("parse_jmdict");
+ let jmdict_idx = index_jmdict(&jmdict);
+
+ for batch in batches.iter_mut() {
+ for ex in batch
+ .examples
+ .iter_mut()
+ .chain(batch.extra_examples.iter_mut())
+ {
+ ex.gen_furigana(&jmdict_idx);
+ }
+ }
+
+ save_batches(batches).expect("save_batches");
}
Cmd::Format => {
let jmdict = fs::read_to_string("data/JMdict_e.xml").expect("read_jmdict");
@@ -178,10 +170,7 @@ fn main() {
.expect("parse_jmdict");
let jmdict_idx = index_jmdict(&jmdict);
- let batches = fs::read("data/batches.json")
- .map_err(anyhow::Error::from)
- .and_then(|x| Ok(serde_json::from_slice::<Vec<Batch>>(&x)?))
- .expect("read/parse");
+ let batches = read_batches().expect("read/parse");
fs::create_dir_all("public").expect("mkdir public");
fs::copy("static/style.css", "public/style.css").expect("copy style.css");
@@ -200,6 +189,21 @@ fn main() {
}
}
+// ----
+
+fn read_batches() -> anyhow::Result<Vec<Batch>> {
+ let json = fs::read("data/batches.json")?;
+ Ok(serde_json::from_slice::<Vec<Batch>>(&json)?)
+}
+
+fn save_batches(batches: Vec<Batch>) -> anyhow::Result<()> {
+ fs::write(
+ "data/batches.json",
+ serde_json::to_string_pretty(&batches)?.as_bytes(),
+ )?;
+ Ok(())
+}
+
// =====================================================================
// BATCH STRUCTURES AND GENERATION
// =====================================================================