diff options
Diffstat (limited to 'src/server.rs')
-rw-r--r-- | src/server.rs | 91 |
1 files changed, 39 insertions, 52 deletions
diff --git a/src/server.rs b/src/server.rs index 76911f6..51191f1 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1,5 +1,3 @@ -use std::fs; - use anyhow::{anyhow, Result}; use futures::stream::TryStreamExt; use rand::prelude::*; @@ -16,7 +14,7 @@ use crate::*; pub async fn server_main() -> tide::Result<()> { // ---- load data files ---- - eprintln!("Loading kanji levels..."); + info!("Loading kanji levels..."); let kanji_levels = read_kanji_levels().expect("read_kanji_levels"); let all_kanji = Charset::new( kanji_levels @@ -26,41 +24,23 @@ pub async fn server_main() -> tide::Result<()> { .join(""), ); - eprintln!("Loading examples..."); + info!("Loading examples..."); let mut examples = read_examples(&all_kanji).expect("read_examples"); examples.retain(|e| (5..=25).contains(&e.ja.chars().count())); let examples = Box::leak(examples.into_boxed_slice()); - eprintln!("Counting chars in examples..."); + info!("Counting chars in examples..."); let example_freq = calc_example_freq(&examples); - eprintln!("Loading furigana overrides..."); + info!("Loading furigana overrides..."); let furigana_overrides = read_furigana_overrides().expect("read_furigana_overrides"); - eprintln!("RAM: {}", ALLOCATOR.allocated() / 1024); - - eprintln!("Loading JMdict_e.xml..."); - let jmdict_raw = fs::read_to_string("data/JMdict_e.xml").expect("read_jmdict"); - eprintln!("RAM: {}", ALLOCATOR.allocated() / 1024); - - eprintln!("Parsing JMdict_e.xml..."); - let jmdict_xml = roxmltree::Document::parse_with_options( - &jmdict_raw, - roxmltree::ParsingOptions { - allow_dtd: true, - ..Default::default() - }, - ) - .expect("parse_jmdict"); - eprintln!("RAM: {}", ALLOCATOR.allocated() / 1024); - - eprintln!("Indexing JMdict_e.xml..."); - let jmdict_idx = index_jmdict(&jmdict_xml); - eprintln!("RAM: {}", ALLOCATOR.allocated() / 1024); - drop(jmdict_xml); - drop(jmdict_raw); - eprintln!("RAM: {}", ALLOCATOR.allocated() / 1024); - - eprintln!("Loading batches.json..."); + debug!("RAM: {}", ALLOCATOR.allocated() / 1024); + + info!("Loading jmdict_idx.json..."); + let jmdict_idx = read_jmdict_idx().expect("read jmdict_idx.json"); + debug!("RAM: {}", ALLOCATOR.allocated() / 1024); + + info!("Loading batches.json..."); let batches = read_batches().expect("read/parse"); let batches = Box::leak(batches.into_boxed_slice()); @@ -93,7 +73,8 @@ pub async fn server_main() -> tide::Result<()> { // ---- serve actual http ---- - eprintln!("Server listening on 127.0.0.1:8080"); + info!("Server listening on 127.0.0.1:8080"); + debug!("RAM: {}", ALLOCATOR.allocated() / 1024); app.listen("127.0.0.1:8080").await?; Ok(()) @@ -222,9 +203,7 @@ async fn gen_examples_page(mut req: Request<State>) -> tide::Result { ) .into_bytes()))?; - gen_examples(state, &allowed_chars, &needed_chars, 50, |mut ex| { - ex.gen_furigana(&req.state().jmdict_idx, &req.state().furigana_overrides); - + gen_examples(state, &allowed_chars, &needed_chars, 50, |ex| { let mut expl = "<table>".to_string(); for word in ex.expl.split(|c| c == ' ' || c == '~') { let (keb, reb) = expl_clean_word(word); @@ -370,7 +349,7 @@ where let mut remaining_needed = needed_chars.clone(); let mut have_chars = Charset::new(""); - println!("Ex\tMinCnt\tChars\tNeeded\tAllowed\tCandidates\tChars"); + trace!("Ex\tMinCnt\tChars\tNeeded\tAllowed\tCandidates\tChars"); while generated < count { let mut selection = None; let mut total_weight = 0f64; @@ -393,22 +372,30 @@ where if let Some((i, f)) = selection { let (ex, _) = candidates.remove(i); - remaining_needed = remaining_needed.diff(&ex.chars); - have_chars = have_chars.union(&ex.chars); - - generated += 1; - println!( - "{}\t{}\t{}\t{}\t{}\t{}\t{}", - generated, - f, - have_chars.len(), - remaining_needed.len(), - allowed_chars.len(), - counted, - ex.chars.to_string() - ); - - callback(ex.clone())?; + + let mut ex = ex.clone(); + if ex.gen_furigana(&data.jmdict_idx, &data.furigana_overrides) { + remaining_needed = remaining_needed.diff(&ex.chars); + have_chars = have_chars.union(&ex.chars); + generated += 1; + + trace!( + "{}\t{}\t{}\t{}\t{}\t{}\t{}", + generated, + f, + have_chars.len(), + remaining_needed.len(), + allowed_chars.len(), + counted, + ex.chars.to_string() + ); + + callback(ex)?; + } else { + warn!("Warning: failed to generate furigana"); + warn!(" sentence: {}", ex.ja); + warn!(" bad furi: {}", ex.furigana.as_deref().unwrap_or("-")); + } } else { break; } |