use std::fs;
use anyhow::{anyhow, Result};
use futures::stream::TryStreamExt;
use rand::prelude::*;
use serde::Deserialize;
use http_types::mime;
use tide::Request;
use crate::datafiles::*;
use crate::example::*;
use crate::format::*;
use crate::*;
pub async fn server_main() -> tide::Result<()> {
// ---- load data files ----
eprintln!("Loading kanji levels...");
let kanji_levels = read_kanji_levels().expect("read_kanji_levels");
let all_kanji = Charset::new(
kanji_levels
.iter()
.map(|(_, x)| x.to_string())
.collect::<Vec<_>>()
.join(""),
);
eprintln!("Loading examples...");
let mut examples = read_examples(&all_kanji).expect("read_examples");
examples.retain(|e| (5..=25).contains(&e.ja.chars().count()));
let examples = Box::leak(examples.into_boxed_slice());
eprintln!("Counting chars in examples...");
let example_freq = calc_example_freq(&examples);
eprintln!("Loading furigana overrides...");
let furigana_overrides = read_furigana_overrides().expect("read_furigana_overrides");
eprintln!("Loading JMdict_e.xml...");
let jmdict_raw = fs::read_to_string("data/JMdict_e.xml").expect("read_jmdict");
let jmdict_raw: &'static str = String::leak(jmdict_raw);
eprintln!("Parsing JMdict_e.xml...");
let jmdict = roxmltree::Document::parse_with_options(
&jmdict_raw,
roxmltree::ParsingOptions {
allow_dtd: true,
..Default::default()
},
)
.expect("parse_jmdict");
let jmdict_xml = Box::leak(Box::new(jmdict));
eprintln!("Indexing JMdict_e.xml...");
let jmdict_idx = index_jmdict(jmdict_xml);
eprintln!("Loading batches.json...");
let batches = read_batches().expect("read/parse");
let batches = Box::leak(batches.into_boxed_slice());
let mut index_bytes = Vec::new();
format_index_to(&mut index_bytes, &batches, &kanji_levels).unwrap();
let index = String::leak(String::from_utf8(index_bytes).unwrap());
// ---- setup http server ----
let state: State = Box::leak(Box::new(StateStruct {
jmdict_raw,
jmdict_xml,
jmdict_idx,
batches,
index,
examples,
example_freq,
furigana_overrides,
}));
let mut app = tide::with_state(state);
app.with(tide::log::LogMiddleware::new());
app.at("/").get(home_page);
app.at("/index.html").get(home_page);
app.at("/style.css").serve_file("static/style.css")?;
app.at("/script.js").serve_file("static/script.js")?;
app.at("/jquery.js").serve_file("static/jquery.js")?;
app.at("/about.html").get(about_page);
app.at("/gen.html").post(gen_examples_page);
app.at("/:batch").get(batch_page);
// ---- serve actual http ----
eprintln!("Server listening on 127.0.0.1:8080");
app.listen("127.0.0.1:8080").await?;
Ok(())
}
type State = &'static StateStruct;
#[allow(dead_code)]
struct StateStruct {
jmdict_raw: &'static str,
jmdict_xml: &'static roxmltree::Document<'static>,
jmdict_idx: DictIndex<'static>,
batches: &'static [Batch],
index: &'static str,
examples: &'static [Example],
example_freq: HashMap<char, usize>,
furigana_overrides: HashMap<String, String>,
}
async fn home_page(req: Request<State>) -> tide::Result {
Ok(tide::Response::builder(200)
.body(req.state().index)
.content_type(mime::HTML)
.build())
}
async fn about_page(_req: Request<State>) -> tide::Result {
let mut about = Vec::new();
format_about_to(&mut about)?;
Ok(tide::Response::builder(200)
.body(about)
.content_type(mime::HTML)
.build())
}
async fn batch_page(req: Request<State>) -> tide::Result {
let batch_idx = req.param("batch")?;
let batch_idx: usize = batch_idx
.strip_suffix(".html")
.unwrap_or(batch_idx)
.parse()?;
let batch = req
.state()
.batches
.get(batch_idx)
.ok_or(anyhow!("this batch number does not exist"))?;
let mut buf = vec![];
format_batch_to(
&mut buf,
&req.state().jmdict_idx,
req.state().batches.len(),
batch_idx,
batch,
)?;
Ok(tide::Response::builder(200)
.body(buf)
.content_type(mime::HTML)
.build())
}
#[derive(Deserialize)]
struct GenParam {
first_level: usize,
last_level: usize,
}
async fn gen_examples_page(mut req: Request<State>) -> tide::Result {
let param: GenParam = req.body_form().await?;
let first_level = std::cmp::min(param.first_level, param.last_level);
let last_level = std::cmp::max(param.first_level, param.last_level);
let allowed_chars = Charset::from_iter(
req.state()
.batches
.get(..=last_level)
.unwrap_or_default()
.iter()
.map(|b| b.chars.iter())
.flatten(),
);
let needed_chars = Charset::from_iter(
req.state()
.batches
.get(first_level..=last_level)
.unwrap_or_default()
.iter()
.map(|b| b.chars.iter())
.flatten(),
);
let (tx, rx) = async_channel::unbounded();
let state: State = req.state();
std::thread::spawn(move || {
tx.send_blocking(Ok(format!(
r#"
<!DOCTYPE html>
<html>
<head>
<meta charset=\"UTF-8\" />
<title>{:03} - {:03} practice</title>
<link rel="stylesheet" type="text/css" href="style.css" />
<script src="jquery.js"></script>
<script src="script.js"></script>
</head>
<body>
<div class="batch_page">
<p><a href="index.html">index</a></p>
<p>Practice for {:03} - {:03}</p>
<hr />
<div id="gen_section">
<div id="gen_ex_cnt">
</div>
<div id="gen_ex_display">
</div>
<div id="gen_ex_en">
</div>
<div id="gen_ex_words" class="vocabtable">
</div>
</div>
</div>
</body>
"#,
first_level, last_level, first_level, last_level
)
.into_bytes()))?;
gen_examples(state, &allowed_chars, &needed_chars, 50, |mut ex| {
ex.gen_furigana(&req.state().jmdict_idx, &req.state().furigana_overrides);
let mut expl = "<table>".to_string();
for word in ex.expl.split(|c| c == ' ' || c == '~') {
let (keb, reb) = expl_clean_word(word);
let wchars = Charset::new(keb);
if !wchars.intersects(&allowed_chars) {
continue;
}
if let Some(ents) = state.jmdict_idx.get(keb) {
for ent in ents.iter() {
let ent_r_ele = ent.children().find(|x| x.has_tag_name("r_ele")).unwrap();
let ent_reb = ent_r_ele
.children()
.find(|x| x.has_tag_name("reb"))
.unwrap();
let ent_reb = ent_reb.text().unwrap().trim();
if reb.map(|x| x != ent_reb).unwrap_or(false) {
continue;
}
expl += &format!(
r#"<tr><td style="word-break: keep-all"> <span class="tab_large font_ja">{}</span> </td><td width="50%">"#,
keb
);
for sense in ent.children().filter(|x| x.has_tag_name("sense")) {
if let Some(s) = sense.children().find(|x| x.has_tag_name("gloss")) {
if !expl.ends_with('>') {
expl += "; ";
}
expl += s.text().unwrap().trim();
}
}
expl += &format!(
r#"</td><td style="word-break: keep-all" class="tab_large font_ja">{}</td></tr>"#,
ent_reb
);
}
}
}
let item = serde_json::json!({
"ja": ex.ja,
"en": ex.en,
"furi": ex.furigana_markup(),
"vocab": expl + "</table>",
});
tx.send_blocking(Ok(format!(
"<script> add_example({}); </script>\n",
serde_json::to_string(&item)?
)
.into_bytes()))?;
Ok(())
})?;
tx.send_blocking(Ok(br#"
</body>
</html>
"#
.to_vec()))?;
Ok::<_, anyhow::Error>(())
});
Ok(tide::Response::builder(200)
.body(tide::Body::from_reader(
Box::pin(rx).into_async_read(),
None,
))
.content_type(mime::HTML)
.build())
}
// ---- example calculation ----
fn calc_example_freq(examples: &[Example]) -> HashMap<char, usize> {
let mut ret = HashMap::new();
for ex in examples.iter() {
for c in ex.chars.iter() {
*ret.entry(c).or_default() += 1;
}
}
ret
}
fn gen_examples<F>(
data: &StateStruct,
allowed_chars: &Charset,
needed_chars: &Charset,
count: usize,
mut callback: F,
) -> Result<()>
where
F: FnMut(Example) -> Result<()>,
{
let mut rng = thread_rng();
let mut generated = 0;
let mut candidates = data
.examples
.iter()
.filter(|x| x.chars.diff(&allowed_chars).is_empty() && x.chars.intersects(&needed_chars))
.map(|ex| {
(
ex,
*ex.chars
.iter()
.filter_map(|x| data.example_freq.get(&x))
.min()
.unwrap(),
)
})
.collect::<Vec<_>>();
let mut remaining_needed = needed_chars.clone();
let mut have_chars = Charset::new("");
println!("Ex\tMinCnt\tChars\tNeeded\tAllowed\tCandidates\tChars");
while generated < count {
let mut selection = None;
let mut total_weight = 0f64;
let mut counted = 0;
for (i, (x, f)) in candidates.iter().enumerate() {
if remaining_needed.len() > 0 && !x.chars.intersects(&remaining_needed) {
continue;
}
counted += 1;
// compensate twice for rare characters
// - once to bring all chars to equal probability of sampling
// - once to over-sample rare chars because we need to see them more
let weight = 1f64 / (*f * *f) as f64;
total_weight += weight;
let rand: f64 = rng.gen();
if rand < weight / total_weight {
selection = Some((i, *f))
}
}
if let Some((i, f)) = selection {
let (ex, _) = candidates.remove(i);
remaining_needed = remaining_needed.diff(&ex.chars);
have_chars = have_chars.union(&ex.chars);
generated += 1;
println!(
"{}\t{}\t{}\t{}\t{}\t{}\t{}",
generated,
f,
have_chars.len(),
remaining_needed.len(),
allowed_chars.len(),
counted,
ex.chars.to_string()
);
callback(ex.clone())?;
} else {
break;
}
}
Ok(())
}