aboutsummaryrefslogblamecommitdiff
path: root/src/server.rs
blob: 985d3db272f37b99f9591c65a436d4c482d98e52 (plain) (tree)
1
2
3
4
5
6
7
8

                         
                             
                                  
                     
                       

                     


                        
                      
                     

             
                                                                     

                                
                                     








                                                                       
                                 



                                                                         
                                           

                                                    
                                           
                                                                                         






                                                                      


                                                        





                                                                        
                                                       


                   


                           
        






                                                         

                                                         
                                          
                                                



                                      
                                               
                                                    
                                 



          
                                  

                    
                          

                              


                                                











































                                                             
 









                                                                         



















                                           



                                              

                                    




                                                     


                                                                          














                                                            

                                                            


                      



                                                            
 
                                                                     








                                                                 
                                                                       

                                     



                                                                                                                                                             
 


                                                       
                             
                                          
                         

                                                                                                              
                                   
                          


                     
 





































                                                                                                                                




                                             
                                            
               




                                                        





                                

                     


                                  

                                   



                                           















                                                                    
                   



                            

                    


                                    
                               
                          




                                                                                                 









                                                              



                                                    
                                                                    
                             









                                                                                     

                                             








                                               























                                                                               




                  
          
 
use std::net::SocketAddr;

use anyhow::{anyhow, Result};
use futures::stream::TryStreamExt;
use rand::prelude::*;
use serde::Deserialize;

use http_types::mime;
use tide::Request;

use crate::datafiles::*;
use crate::example::*;
use crate::format::*;
use crate::*;

pub async fn server_main(bind_addr: SocketAddr) -> tide::Result<()> {
    // ---- load data files ----

    info!("Loading kanji levels...");
    let kanji_levels = read_kanji_levels().expect("read_kanji_levels");
    let all_kanji = Charset::new(
        kanji_levels
            .iter()
            .map(|(_, x)| x.to_string())
            .collect::<Vec<_>>()
            .join(""),
    );

    info!("Loading examples...");
    let mut examples = read_examples(&all_kanji).expect("read_examples");
    examples.retain(|e| (5..=25).contains(&e.ja.chars().count()));
    let examples = Box::leak(examples.into_boxed_slice());

    info!("Counting chars in examples...");
    let example_freq = calc_example_freq(&examples);

    info!("Loading furigana overrides...");
    let furigana_overrides = read_furigana_overrides().expect("read_furigana_overrides");
    debug!("RAM: {}", ALLOCATOR.allocated() / 1024);

    info!("Loading jmdict_idx.json...");
    let jmdict_idx = read_jmdict_idx().expect("read jmdict_idx.json");
    debug!("RAM: {}", ALLOCATOR.allocated() / 1024);

    info!("Loading batches.json...");
    let batches = read_batches().expect("read/parse");
    let batches = Box::leak(batches.into_boxed_slice());

    let mut index_bytes = Vec::new();
    format_index_to(&mut index_bytes, &batches, &kanji_levels).unwrap();
    let index = String::leak(String::from_utf8(index_bytes).unwrap());

    // ---- setup http server ----

    let state: State = Box::leak(Box::new(StateStruct {
        jmdict_idx,
        batches,
        index,
        examples,
        example_freq,
        furigana_overrides,
    }));

    let mut app = tide::with_state(state);
    app.with(tide::log::LogMiddleware::new());

    app.at("/").get(home_page);
    app.at("/index.html").get(home_page);
    app.at("/style.css").serve_file("static/style.css")?;
    app.at("/script.js").serve_file("static/script.js")?;
    app.at("/jquery.js").serve_file("static/jquery.js")?;
    app.at("/about.html").get(about_page);
    app.at("/gen.html").post(gen_examples_page);
    app.at("/:batch").get(batch_page);

    // ---- serve actual http ----

    info!("Server listening on {}", bind_addr);
    debug!("RAM: {}", ALLOCATOR.allocated() / 1024);
    app.listen(bind_addr).await?;

    Ok(())
}

type State = &'static StateStruct;
#[allow(dead_code)]
struct StateStruct {
    jmdict_idx: DictIndex,
    batches: &'static [Batch],
    index: &'static str,
    examples: &'static [Example],
    example_freq: HashMap<char, usize>,
    furigana_overrides: HashMap<String, String>,
}

async fn home_page(req: Request<State>) -> tide::Result {
    Ok(tide::Response::builder(200)
        .body(req.state().index)
        .content_type(mime::HTML)
        .build())
}

async fn about_page(_req: Request<State>) -> tide::Result {
    let mut about = Vec::new();
    format_about_to(&mut about)?;
    Ok(tide::Response::builder(200)
        .body(about)
        .content_type(mime::HTML)
        .build())
}

async fn batch_page(req: Request<State>) -> tide::Result {
    let batch_idx = req.param("batch")?;
    let batch_idx: usize = batch_idx
        .strip_suffix(".html")
        .unwrap_or(batch_idx)
        .parse()?;
    let batch = req
        .state()
        .batches
        .get(batch_idx)
        .ok_or(anyhow!("this batch number does not exist"))?;

    let mut buf = vec![];
    format_batch_to(
        &mut buf,
        &req.state().jmdict_idx,
        req.state().batches.len(),
        batch_idx,
        batch,
    )?;

    Ok(tide::Response::builder(200)
        .body(buf)
        .content_type(mime::HTML)
        .build())
}

#[derive(Deserialize)]
struct GenParam {
    first_level: usize,
    last_level: usize,
}

async fn gen_examples_page(mut req: Request<State>) -> tide::Result {
    let param: GenParam = req.body_form().await?;
    let first_level = std::cmp::min(param.first_level, param.last_level);
    let last_level = std::cmp::max(param.first_level, param.last_level);

    let allowed_chars = Charset::from_iter(
        req.state()
            .batches
            .get(..=last_level)
            .unwrap_or_default()
            .iter()
            .map(|b| b.chars.iter())
            .flatten(),
    );
    let needed_chars = Charset::from_iter(
        req.state()
            .batches
            .get(first_level..=last_level)
            .unwrap_or_default()
            .iter()
            .map(|b| b.chars.iter())
            .flatten(),
    );

    let (tx, rx) = async_channel::unbounded();

    let state: State = req.state();
    std::thread::spawn(move || {
        tx.send_blocking(Ok(format!(
            r#"
        <!DOCTYPE html>
        <html>
            <head>
                <meta charset=\"UTF-8\" />
                <title>{:03} - {:03} practice</title>
                <link rel="stylesheet" type="text/css" href="style.css" />
                <script src="jquery.js"></script>
                <script src="script.js"></script>
            </head>
            <body>
              <div class="batch_page">
                <p><a href="index.html">index</a></p>
                <p>Practice for {:03} - {:03}</p>
                <hr />
                <div id="gen_section">
                  <div id="gen_ex_cnt">
                  </div>
                  <div id="gen_ex_display">
                  </div>
                  <div id="gen_ex_en">
                  </div>
                  <div id="gen_ex_words" class="vocabtable">
                  </div>
                  <div id="gen_ex_kanji" class="vocabtable">
                  </div>
                </div>
              </div>
            </body>
        "#,
            first_level, last_level, first_level, last_level
        )
        .into_bytes()))?;

        gen_examples(state, &allowed_chars, &needed_chars, 50, |ex| {
            let mut expl = "<table>".to_string();
            for word in ex.expl.split(|c| c == ' ' || c == '~') {
                let (keb, reb) = expl_clean_word(word);
                let wchars = Charset::new(keb);
                if !wchars.intersects(&allowed_chars) {
                    continue;
                }
                if let Some(ents) = state.jmdict_idx.get(keb) {
                    for ent in ents.iter() {
                        if reb.map(|x| x != ent.reb).unwrap_or(false) {
                            continue;
                        }
                        expl += &format!(
                            r#"<tr><td style="word-break: keep-all">&nbsp;&nbsp;<span class="tab_large font_ja">{}</span>&nbsp;&nbsp;</td><td width="50%">"#,
                            keb
                        );

                        for sense in ent.sense.iter() {
                            if !expl.ends_with('>') {
                                expl += "; ";
                            }
                            expl += sense;
                        }
                        expl += &format!(
                            r#"</td><td style="word-break: keep-all" class="tab_large font_ja">{}</td></tr>"#,
                            ent.reb
                        );
                    }
                }
            }

            let mut kanji = "<table>".to_string();
            let mut chrvec = ex
                .chars
                .iter()
                .map(|chr| {
                    (
                        chr,
                        req.state()
                            .batches
                            .iter()
                            .take(last_level + 1)
                            .enumerate()
                            .flat_map(|(ib, b)| {
                                b.examples
                                    .iter()
                                    .filter(|ex| ex.chars.contains(chr))
                                    .map(move |ex| (ib, ex))
                            })
                            .collect::<Vec<_>>(),
                    )
                })
                .collect::<Vec<_>>();
            chrvec.sort_by_key(|(_, exs)| exs.len());
            for (chr, exs) in chrvec.iter().take(5) {
                for (cnt, (ib, ex)) in exs.iter().enumerate().take(4) {
                    if cnt == 0 {
                        kanji +=
                            &format!(r#"<tr><td class="tab_large font_ja">{}&nbsp;&nbsp;"#, chr);
                    } else {
                        kanji += &format!(r#"<tr><td>"#);
                    }
                    kanji += &format!(
                        r#"</td><td><a href="{:03}.html">{:03}</a>&nbsp;&nbsp;</td><td class="tab_large font_ja">{}</td></tr>"#,
                        ib, ib, ex.ja
                    );
                }
            }

            let item = serde_json::json!({
                "ja": ex.ja,
                "en": ex.en,
                "furi": ex.furigana_markup(),
                "vocab": expl + "</table>",
                "kanji": kanji + "</table>",
            });
            tx.send_blocking(Ok(format!(
                "<script> add_example({}); </script>\n",
                serde_json::to_string(&item)?
            )
            .into_bytes()))?;
            Ok(())
        })?;

        tx.send_blocking(Ok(br#"
            </body>
            </html>
        "#
        .to_vec()))?;

        Ok::<_, anyhow::Error>(())
    });

    Ok(tide::Response::builder(200)
        .body(tide::Body::from_reader(
            Box::pin(rx).into_async_read(),
            None,
        ))
        .content_type(mime::HTML)
        .build())
}

// ---- example calculation ----

fn calc_example_freq(examples: &[Example]) -> HashMap<char, usize> {
    let mut ret = HashMap::new();
    for ex in examples.iter() {
        for c in ex.chars.iter() {
            *ret.entry(c).or_default() += 1;
        }
    }
    ret
}

fn gen_examples<F>(
    data: &StateStruct,
    allowed_chars: &Charset,
    needed_chars: &Charset,
    count: usize,
    mut callback: F,
) -> Result<()>
where
    F: FnMut(Example) -> Result<()>,
{
    let mut rng = thread_rng();
    let mut generated = 0;

    let mut candidates = data
        .examples
        .iter()
        .filter(|x| x.chars.diff(&allowed_chars).is_empty() && x.chars.intersects(&needed_chars))
        .map(|ex| {
            (
                ex,
                *ex.chars
                    .iter()
                    .filter_map(|x| data.example_freq.get(&x))
                    .min()
                    .unwrap(),
            )
        })
        .collect::<Vec<_>>();
    let mut remaining_needed = needed_chars.clone();

    let mut have_chars = Charset::new("");
    trace!("Ex\tMinCnt\tChars\tNeeded\tAllowed\tCandidates\tChars");
    while generated < count {
        let mut selection = None;
        let mut total_weight = 0f64;

        let mut counted = 0;
        for (i, (x, f)) in candidates.iter().enumerate() {
            if remaining_needed.len() > 0 && !x.chars.intersects(&remaining_needed) {
                continue;
            }

            counted += 1;
            // compensate for rare characters
            let weight = 1f64 / (*f) as f64;
            total_weight += weight;
            let rand: f64 = rng.gen();
            if rand < weight / total_weight {
                selection = Some((i, *f))
            }
        }

        if let Some((i, f)) = selection {
            let (ex, _) = candidates.remove(i);

            let mut ex = ex.clone();
            if ex.gen_furigana(&data.jmdict_idx, &data.furigana_overrides) {
                remaining_needed = remaining_needed.diff(&ex.chars);
                have_chars = have_chars.union(&ex.chars);
                generated += 1;

                trace!(
                    "{}\t{}\t{}\t{}\t{}\t{}\t{}",
                    generated,
                    f,
                    have_chars.len(),
                    remaining_needed.len(),
                    allowed_chars.len(),
                    counted,
                    ex.chars.to_string()
                );

                callback(ex)?;
            } else {
                warn!("Warning: failed to generate furigana");
                warn!("  sentence: {}", ex.ja);
                warn!("  bad furi: {}", ex.furigana.as_deref().unwrap_or("-"));
            }
        } else {
            break;
        }
    }

    Ok(())
}