diff options
author | Alex Auvolat <alex@adnab.me> | 2023-09-25 16:23:22 +0200 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2023-09-25 16:23:22 +0200 |
commit | 8bc57d9a984ce48c4e5525dc114f2ed788703636 (patch) | |
tree | b0ba291b78cc140f66ea6ad8cca1995d9f1a8f64 /src/main.rs | |
parent | 8c6fe47d809eab3daad2e2b560295ecf4fa12796 (diff) | |
download | datagengo-8bc57d9a984ce48c4e5525dc114f2ed788703636.tar.gz datagengo-8bc57d9a984ce48c4e5525dc114f2ed788703636.zip |
cargo fmt
Diffstat (limited to 'src/main.rs')
-rw-r--r-- | src/main.rs | 106 |
1 files changed, 80 insertions, 26 deletions
diff --git a/src/main.rs b/src/main.rs index 533f157..26e3980 100644 --- a/src/main.rs +++ b/src/main.rs @@ -46,10 +46,8 @@ fn main() { } Cmd::ParseJlptVocab => { let kanji_levels = read_kanji_levels().expect("read_kanji_levels"); - let all_kanji = Charset::from_iter(kanji_levels - .iter() - .map(|(_, c)| c.chars()) - .flatten()); + let all_kanji = + Charset::from_iter(kanji_levels.iter().map(|(_, c)| c.chars()).flatten()); parse_jlpt_vocab(&all_kanji).expect("error"); } Cmd::New { truncate, count } => { @@ -324,29 +322,59 @@ struct JlptVocab { impl JlptVocab { fn to_string(&self) -> String { - format!("{}\t{}\t{}\t{}\t{}", - self.level, - self.chars.to_string(), - self.kanji, - self.kana, - self.en) + format!( + "{}\t{}\t{}\t{}\t{}", + self.level, + self.chars.to_string(), + self.kanji, + self.kana, + self.en + ) } } fn parse_jlpt_vocab(all_kanji: &Charset) -> Result<()> { let mut vocab = vec![]; - vocab.extend(parse_jlpt_vocab_combined("data/n5_vocab.txt", "N4", all_kanji)?); - vocab.extend(parse_jlpt_vocab_split("data/n4_vocab_hiragana.txt", "data/n4_vocab_eng.txt", "N3", all_kanji)?); - vocab.extend(parse_jlpt_vocab_split("data/n3_vocab_hiragana.txt", "data/n3_vocab_eng.txt", "N2a", all_kanji)?); - vocab.extend(parse_jlpt_vocab_split("data/n2_vocab_hiragana.txt", "data/n2_vocab_eng.txt", "N2b", all_kanji)?); - vocab.extend(parse_jlpt_vocab_split("data/n1_vocab_hiragana.txt", "data/n1_vocab_eng.txt", "N1", all_kanji)?); + vocab.extend(parse_jlpt_vocab_combined( + "data/n5_vocab.txt", + "N4", + all_kanji, + )?); + vocab.extend(parse_jlpt_vocab_split( + "data/n4_vocab_hiragana.txt", + "data/n4_vocab_eng.txt", + "N3", + all_kanji, + )?); + vocab.extend(parse_jlpt_vocab_split( + "data/n3_vocab_hiragana.txt", + "data/n3_vocab_eng.txt", + "N2a", + all_kanji, + )?); + vocab.extend(parse_jlpt_vocab_split( + "data/n2_vocab_hiragana.txt", + "data/n2_vocab_eng.txt", + "N2b", + all_kanji, + )?); + vocab.extend(parse_jlpt_vocab_split( + "data/n1_vocab_hiragana.txt", + "data/n1_vocab_eng.txt", + "N1", + all_kanji, + )?); for v in vocab.iter() { println!("{}", v.to_string()); } Ok(()) } -fn parse_jlpt_vocab_combined(file: &str, level: &str, all_kanji: &Charset) -> Result<Vec<JlptVocab>> { +fn parse_jlpt_vocab_combined( + file: &str, + level: &str, + all_kanji: &Charset, +) -> Result<Vec<JlptVocab>> { let lines = jlpt_vocab_read_file(file)?; let mut ret = vec![]; for (kanji, answer) in lines { @@ -360,14 +388,19 @@ fn parse_jlpt_vocab_combined(file: &str, level: &str, all_kanji: &Charset) -> Re chars: Charset::new(kanji).inter(all_kanji), kanji: kanji.to_string(), kana: kana.to_string(), - en: eng.to_string() + en: eng.to_string(), }); } } Ok(ret) } -fn parse_jlpt_vocab_split(kana_file: &str, eng_file: &str, level: &str, all_kanji: &Charset) -> Result<Vec<JlptVocab>> { +fn parse_jlpt_vocab_split( + kana_file: &str, + eng_file: &str, + level: &str, + all_kanji: &Charset, +) -> Result<Vec<JlptVocab>> { let eng_lines = jlpt_vocab_read_file(eng_file)? .into_iter() .collect::<HashMap<String, String>>(); @@ -383,7 +416,7 @@ fn parse_jlpt_vocab_split(kana_file: &str, eng_file: &str, level: &str, all_kanj chars: Charset::new(kanji).inter(all_kanji), kanji: kanji.to_string(), kana: kana.to_string(), - en: eng.to_string() + en: eng.to_string(), }); } } @@ -397,8 +430,7 @@ fn jlpt_vocab_read_file(file: &str) -> Result<Vec<(String, String)>> { let file = fs::File::open(file)?; let mut ret = vec![]; for line in io::BufReader::new(file).lines() { - let line = line?.replace("<br>", "\n") - .replace("</span>", ""); + let line = line?.replace("<br>", "\n").replace("</span>", ""); let line = re.replace_all(&line, ""); if let Some((a, b)) = line.split_once('|') { ret.push((a.trim().to_string(), b.trim().to_string())); @@ -1034,7 +1066,15 @@ fn add_vocab(all_batches: &mut [Batch], vocab: &[JlptVocab]) { .filter(|v| v.chars.inter_len(&batch.chars) > 0) .filter(|v| match_level(batch, &v.level)) .filter(|v| v.chars.diff(&done_after).len() == 0) - .filter(|v| !all_batches[i..std::cmp::min(all_batches.len(), i+10)].iter().any(|b| b.examples.iter().any(|ex| ex.ja.contains(&v.kanji) || ex.expl.contains(&v.kanji)))) + .filter(|v| { + !all_batches[i..std::cmp::min(all_batches.len(), i + 10)] + .iter() + .any(|b| { + b.examples + .iter() + .any(|ex| ex.ja.contains(&v.kanji) || ex.expl.contains(&v.kanji)) + }) + }) .cloned() .collect::<Vec<_>>(); extra_vocab.push(batch_extra_vocab); @@ -1156,18 +1196,32 @@ fn format_batch_aux<'a>( } writeln!(f, "<hr />")?; - writeln!(f, r#"<details><summary>Extra vocabulary (this level)</summary>"#)?; + writeln!( + f, + r#"<details><summary>Extra vocabulary (this level)</summary>"# + )?; for v in batch.extra_vocab.iter() { if batch.level.contains(&v.level) { - writeln!(f, r#"<p>({}) {} [{}] {}</p>"#, v.level, v.kanji, v.kana, v.en)?; + writeln!( + f, + r#"<p>({}) {} [{}] {}</p>"#, + v.level, v.kanji, v.kana, v.en + )?; } } writeln!(f, r#"</details>"#)?; if !batch.level.contains("N4") { - writeln!(f, r#"<details><summary>Extra vocabulary (previous levels)</summary>"#)?; + writeln!( + f, + r#"<details><summary>Extra vocabulary (previous levels)</summary>"# + )?; for v in batch.extra_vocab.iter() { if !batch.level.contains(&v.level) { - writeln!(f, r#"<p>({}) {} [{}] {}</p>"#, v.level, v.kanji, v.kana, v.en)?; + writeln!( + f, + r#"<p>({}) {} [{}] {}</p>"#, + v.level, v.kanji, v.kana, v.en + )?; } } writeln!(f, r#"</details>"#)?; |