aboutsummaryrefslogtreecommitdiff
path: root/src/main.rs
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-09-25 16:23:22 +0200
committerAlex Auvolat <alex@adnab.me>2023-09-25 16:23:22 +0200
commit8bc57d9a984ce48c4e5525dc114f2ed788703636 (patch)
treeb0ba291b78cc140f66ea6ad8cca1995d9f1a8f64 /src/main.rs
parent8c6fe47d809eab3daad2e2b560295ecf4fa12796 (diff)
downloaddatagengo-8bc57d9a984ce48c4e5525dc114f2ed788703636.tar.gz
datagengo-8bc57d9a984ce48c4e5525dc114f2ed788703636.zip
cargo fmt
Diffstat (limited to 'src/main.rs')
-rw-r--r--src/main.rs106
1 files changed, 80 insertions, 26 deletions
diff --git a/src/main.rs b/src/main.rs
index 533f157..26e3980 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -46,10 +46,8 @@ fn main() {
}
Cmd::ParseJlptVocab => {
let kanji_levels = read_kanji_levels().expect("read_kanji_levels");
- let all_kanji = Charset::from_iter(kanji_levels
- .iter()
- .map(|(_, c)| c.chars())
- .flatten());
+ let all_kanji =
+ Charset::from_iter(kanji_levels.iter().map(|(_, c)| c.chars()).flatten());
parse_jlpt_vocab(&all_kanji).expect("error");
}
Cmd::New { truncate, count } => {
@@ -324,29 +322,59 @@ struct JlptVocab {
impl JlptVocab {
fn to_string(&self) -> String {
- format!("{}\t{}\t{}\t{}\t{}",
- self.level,
- self.chars.to_string(),
- self.kanji,
- self.kana,
- self.en)
+ format!(
+ "{}\t{}\t{}\t{}\t{}",
+ self.level,
+ self.chars.to_string(),
+ self.kanji,
+ self.kana,
+ self.en
+ )
}
}
fn parse_jlpt_vocab(all_kanji: &Charset) -> Result<()> {
let mut vocab = vec![];
- vocab.extend(parse_jlpt_vocab_combined("data/n5_vocab.txt", "N4", all_kanji)?);
- vocab.extend(parse_jlpt_vocab_split("data/n4_vocab_hiragana.txt", "data/n4_vocab_eng.txt", "N3", all_kanji)?);
- vocab.extend(parse_jlpt_vocab_split("data/n3_vocab_hiragana.txt", "data/n3_vocab_eng.txt", "N2a", all_kanji)?);
- vocab.extend(parse_jlpt_vocab_split("data/n2_vocab_hiragana.txt", "data/n2_vocab_eng.txt", "N2b", all_kanji)?);
- vocab.extend(parse_jlpt_vocab_split("data/n1_vocab_hiragana.txt", "data/n1_vocab_eng.txt", "N1", all_kanji)?);
+ vocab.extend(parse_jlpt_vocab_combined(
+ "data/n5_vocab.txt",
+ "N4",
+ all_kanji,
+ )?);
+ vocab.extend(parse_jlpt_vocab_split(
+ "data/n4_vocab_hiragana.txt",
+ "data/n4_vocab_eng.txt",
+ "N3",
+ all_kanji,
+ )?);
+ vocab.extend(parse_jlpt_vocab_split(
+ "data/n3_vocab_hiragana.txt",
+ "data/n3_vocab_eng.txt",
+ "N2a",
+ all_kanji,
+ )?);
+ vocab.extend(parse_jlpt_vocab_split(
+ "data/n2_vocab_hiragana.txt",
+ "data/n2_vocab_eng.txt",
+ "N2b",
+ all_kanji,
+ )?);
+ vocab.extend(parse_jlpt_vocab_split(
+ "data/n1_vocab_hiragana.txt",
+ "data/n1_vocab_eng.txt",
+ "N1",
+ all_kanji,
+ )?);
for v in vocab.iter() {
println!("{}", v.to_string());
}
Ok(())
}
-fn parse_jlpt_vocab_combined(file: &str, level: &str, all_kanji: &Charset) -> Result<Vec<JlptVocab>> {
+fn parse_jlpt_vocab_combined(
+ file: &str,
+ level: &str,
+ all_kanji: &Charset,
+) -> Result<Vec<JlptVocab>> {
let lines = jlpt_vocab_read_file(file)?;
let mut ret = vec![];
for (kanji, answer) in lines {
@@ -360,14 +388,19 @@ fn parse_jlpt_vocab_combined(file: &str, level: &str, all_kanji: &Charset) -> Re
chars: Charset::new(kanji).inter(all_kanji),
kanji: kanji.to_string(),
kana: kana.to_string(),
- en: eng.to_string()
+ en: eng.to_string(),
});
}
}
Ok(ret)
}
-fn parse_jlpt_vocab_split(kana_file: &str, eng_file: &str, level: &str, all_kanji: &Charset) -> Result<Vec<JlptVocab>> {
+fn parse_jlpt_vocab_split(
+ kana_file: &str,
+ eng_file: &str,
+ level: &str,
+ all_kanji: &Charset,
+) -> Result<Vec<JlptVocab>> {
let eng_lines = jlpt_vocab_read_file(eng_file)?
.into_iter()
.collect::<HashMap<String, String>>();
@@ -383,7 +416,7 @@ fn parse_jlpt_vocab_split(kana_file: &str, eng_file: &str, level: &str, all_kanj
chars: Charset::new(kanji).inter(all_kanji),
kanji: kanji.to_string(),
kana: kana.to_string(),
- en: eng.to_string()
+ en: eng.to_string(),
});
}
}
@@ -397,8 +430,7 @@ fn jlpt_vocab_read_file(file: &str) -> Result<Vec<(String, String)>> {
let file = fs::File::open(file)?;
let mut ret = vec![];
for line in io::BufReader::new(file).lines() {
- let line = line?.replace("<br>", "\n")
- .replace("</span>", "");
+ let line = line?.replace("<br>", "\n").replace("</span>", "");
let line = re.replace_all(&line, "");
if let Some((a, b)) = line.split_once('|') {
ret.push((a.trim().to_string(), b.trim().to_string()));
@@ -1034,7 +1066,15 @@ fn add_vocab(all_batches: &mut [Batch], vocab: &[JlptVocab]) {
.filter(|v| v.chars.inter_len(&batch.chars) > 0)
.filter(|v| match_level(batch, &v.level))
.filter(|v| v.chars.diff(&done_after).len() == 0)
- .filter(|v| !all_batches[i..std::cmp::min(all_batches.len(), i+10)].iter().any(|b| b.examples.iter().any(|ex| ex.ja.contains(&v.kanji) || ex.expl.contains(&v.kanji))))
+ .filter(|v| {
+ !all_batches[i..std::cmp::min(all_batches.len(), i + 10)]
+ .iter()
+ .any(|b| {
+ b.examples
+ .iter()
+ .any(|ex| ex.ja.contains(&v.kanji) || ex.expl.contains(&v.kanji))
+ })
+ })
.cloned()
.collect::<Vec<_>>();
extra_vocab.push(batch_extra_vocab);
@@ -1156,18 +1196,32 @@ fn format_batch_aux<'a>(
}
writeln!(f, "<hr />")?;
- writeln!(f, r#"<details><summary>Extra vocabulary (this level)</summary>"#)?;
+ writeln!(
+ f,
+ r#"<details><summary>Extra vocabulary (this level)</summary>"#
+ )?;
for v in batch.extra_vocab.iter() {
if batch.level.contains(&v.level) {
- writeln!(f, r#"<p>({}) {} [{}] {}</p>"#, v.level, v.kanji, v.kana, v.en)?;
+ writeln!(
+ f,
+ r#"<p>({}) {} [{}] {}</p>"#,
+ v.level, v.kanji, v.kana, v.en
+ )?;
}
}
writeln!(f, r#"</details>"#)?;
if !batch.level.contains("N4") {
- writeln!(f, r#"<details><summary>Extra vocabulary (previous levels)</summary>"#)?;
+ writeln!(
+ f,
+ r#"<details><summary>Extra vocabulary (previous levels)</summary>"#
+ )?;
for v in batch.extra_vocab.iter() {
if !batch.level.contains(&v.level) {
- writeln!(f, r#"<p>({}) {} [{}] {}</p>"#, v.level, v.kanji, v.kana, v.en)?;
+ writeln!(
+ f,
+ r#"<p>({}) {} [{}] {}</p>"#,
+ v.level, v.kanji, v.kana, v.en
+ )?;
}
}
writeln!(f, r#"</details>"#)?;