use std::net::SocketAddr;
use anyhow::{anyhow, Result};
use futures::stream::TryStreamExt;
use rand::prelude::*;
use serde::Deserialize;
use http_types::mime;
use tide::Request;
use crate::datafiles::*;
use crate::example::*;
use crate::format::*;
use crate::*;
pub async fn server_main(bind_addr: SocketAddr) -> tide::Result<()> {
// ---- load data files ----
info!("Loading kanji levels...");
let kanji_levels = read_kanji_levels().expect("read_kanji_levels");
let all_kanji = Charset::new(
kanji_levels
.iter()
.map(|(_, x)| x.to_string())
.collect::<Vec<_>>()
.join(""),
);
info!("Loading examples...");
let mut examples = read_examples(&all_kanji).expect("read_examples");
examples.retain(|e| (5..=25).contains(&e.ja.chars().count()));
let examples = Box::leak(examples.into_boxed_slice());
info!("Counting chars in examples...");
let example_freq = calc_example_freq(&examples);
info!("Loading furigana overrides...");
let furigana_overrides = read_furigana_overrides().expect("read_furigana_overrides");
debug!("RAM: {}", ALLOCATOR.allocated() / 1024);
info!("Loading jmdict_idx.json...");
let jmdict_idx = read_jmdict_idx().expect("read jmdict_idx.json");
debug!("RAM: {}", ALLOCATOR.allocated() / 1024);
info!("Loading batches.json...");
let batches = read_batches().expect("read/parse");
let batches = Box::leak(batches.into_boxed_slice());
let mut index_bytes = Vec::new();
format_index_to(&mut index_bytes, &batches, &kanji_levels).unwrap();
let index = String::leak(String::from_utf8(index_bytes).unwrap());
// ---- setup http server ----
let state: State = Box::leak(Box::new(StateStruct {
jmdict_idx,
batches,
index,
examples,
example_freq,
furigana_overrides,
}));
let mut app = tide::with_state(state);
app.with(tide::log::LogMiddleware::new());
app.at("/").get(home_page);
app.at("/index.html").get(home_page);
app.at("/style.css").serve_file("static/style.css")?;
app.at("/script.js").serve_file("static/script.js")?;
app.at("/jquery.js").serve_file("static/jquery.js")?;
app.at("/about.html").get(about_page);
app.at("/gen.html").post(gen_examples_page);
app.at("/:batch").get(batch_page);
// ---- serve actual http ----
info!("Server listening on {}", bind_addr);
debug!("RAM: {}", ALLOCATOR.allocated() / 1024);
app.listen(bind_addr).await?;
Ok(())
}
type State = &'static StateStruct;
#[allow(dead_code)]
struct StateStruct {
jmdict_idx: DictIndex,
batches: &'static [Batch],
index: &'static str,
examples: &'static [Example],
example_freq: HashMap<char, usize>,
furigana_overrides: HashMap<String, String>,
}
async fn home_page(req: Request<State>) -> tide::Result {
Ok(tide::Response::builder(200)
.body(req.state().index)
.content_type(mime::HTML)
.build())
}
async fn about_page(_req: Request<State>) -> tide::Result {
let mut about = Vec::new();
format_about_to(&mut about)?;
Ok(tide::Response::builder(200)
.body(about)
.content_type(mime::HTML)
.build())
}
async fn batch_page(req: Request<State>) -> tide::Result {
let batch_idx = req.param("batch")?;
let batch_idx: usize = batch_idx
.strip_suffix(".html")
.unwrap_or(batch_idx)
.parse()?;
let batch = req
.state()
.batches
.get(batch_idx)
.ok_or(anyhow!("this batch number does not exist"))?;
let mut buf = vec![];
format_batch_to(
&mut buf,
&req.state().jmdict_idx,
req.state().batches.len(),
batch_idx,
batch,
)?;
Ok(tide::Response::builder(200)
.body(buf)
.content_type(mime::HTML)
.build())
}
#[derive(Deserialize)]
struct GenParam {
first_level: usize,
last_level: usize,
}
async fn gen_examples_page(mut req: Request<State>) -> tide::Result {
let param: GenParam = req.body_form().await?;
let first_level = std::cmp::min(param.first_level, param.last_level);
let last_level = std::cmp::max(param.first_level, param.last_level);
let allowed_chars = Charset::from_iter(
req.state()
.batches
.get(..=last_level)
.unwrap_or_default()
.iter()
.map(|b| b.chars.iter())
.flatten(),
);
let needed_chars = Charset::from_iter(
req.state()
.batches
.get(first_level..=last_level)
.unwrap_or_default()
.iter()
.map(|b| b.chars.iter())
.flatten(),
);
let (tx, rx) = async_channel::unbounded();
let state: State = req.state();
std::thread::spawn(move || {
tx.send_blocking(Ok(format!(
r#"
<!DOCTYPE html>
<html>
<head>
<meta charset=\"UTF-8\" />
<title>{:03} - {:03} practice</title>
<link rel="stylesheet" type="text/css" href="style.css" />
<script src="jquery.js"></script>
<script src="script.js"></script>
</head>
<body>
<div class="batch_page">
<p><a href="index.html">index</a></p>
<p>Practice for {:03} - {:03}</p>
<hr />
<div id="gen_section">
<div id="gen_ex_cnt">
</div>
<div id="gen_ex_display">
</div>
<div id="gen_ex_en">
</div>
<div id="gen_ex_words" class="vocabtable">
</div>
<div id="gen_ex_kanji" class="vocabtable">
</div>
</div>
</div>
</body>
"#,
first_level, last_level, first_level, last_level
)
.into_bytes()))?;
gen_examples(state, &allowed_chars, &needed_chars, 50, |ex| {
let mut expl = "<table>".to_string();
for word in ex.expl.split(|c| c == ' ' || c == '~') {
let (keb, reb) = expl_clean_word(word);
let wchars = Charset::new(keb);
if !wchars.intersects(&allowed_chars) {
continue;
}
if let Some(ents) = state.jmdict_idx.get(keb) {
for ent in ents.iter() {
if reb.map(|x| x != ent.reb).unwrap_or(false) {
continue;
}
expl += &format!(
r#"<tr><td style="word-break: keep-all"> <span class="tab_large font_ja">{}</span> </td><td width="50%">"#,
keb
);
for sense in ent.sense.iter() {
if !expl.ends_with('>') {
expl += "; ";
}
expl += sense;
}
expl += &format!(
r#"</td><td style="word-break: keep-all" class="tab_large font_ja">{}</td></tr>"#,
ent.reb
);
}
}
}
let mut kanji = "<table>".to_string();
let mut chrvec = ex
.chars
.iter()
.map(|chr| {
(
chr,
req.state()
.batches
.iter()
.take(last_level + 1)
.enumerate()
.flat_map(|(ib, b)| {
b.examples
.iter()
.filter(|ex| ex.chars.contains(chr))
.map(move |ex| (ib, ex))
})
.collect::<Vec<_>>(),
)
})
.collect::<Vec<_>>();
chrvec.sort_by_key(|(_, exs)| exs.len());
for (chr, exs) in chrvec.iter().take(5) {
for (cnt, (ib, ex)) in exs.iter().enumerate().take(4) {
if cnt == 0 {
kanji +=
&format!(r#"<tr><td class="tab_large font_ja">{} "#, chr);
} else {
kanji += &format!(r#"<tr><td>"#);
}
kanji += &format!(
r#"</td><td><a href="{:03}.html">{:03}</a> </td><td class="tab_large font_ja">{}</td></tr>"#,
ib, ib, ex.ja
);
}
}
let item = serde_json::json!({
"ja": ex.ja,
"en": ex.en,
"furi": ex.furigana_markup(),
"vocab": expl + "</table>",
"kanji": kanji + "</table>",
});
tx.send_blocking(Ok(format!(
"<script> add_example({}); </script>\n",
serde_json::to_string(&item)?
)
.into_bytes()))?;
Ok(())
})?;
tx.send_blocking(Ok(br#"
</body>
</html>
"#
.to_vec()))?;
Ok::<_, anyhow::Error>(())
});
Ok(tide::Response::builder(200)
.body(tide::Body::from_reader(
Box::pin(rx).into_async_read(),
None,
))
.content_type(mime::HTML)
.build())
}
// ---- example calculation ----
fn calc_example_freq(examples: &[Example]) -> HashMap<char, usize> {
let mut ret = HashMap::new();
for ex in examples.iter() {
for c in ex.chars.iter() {
*ret.entry(c).or_default() += 1;
}
}
ret
}
fn gen_examples<F>(
data: &StateStruct,
allowed_chars: &Charset,
needed_chars: &Charset,
count: usize,
mut callback: F,
) -> Result<()>
where
F: FnMut(Example) -> Result<()>,
{
let mut rng = thread_rng();
let mut generated = 0;
let mut candidates = data
.examples
.iter()
.filter(|x| x.chars.diff(&allowed_chars).is_empty() && x.chars.intersects(&needed_chars))
.map(|ex| {
(
ex,
*ex.chars
.iter()
.filter_map(|x| data.example_freq.get(&x))
.min()
.unwrap(),
)
})
.collect::<Vec<_>>();
let mut remaining_needed = needed_chars.clone();
let mut have_chars = Charset::new("");
trace!("Ex\tMinCnt\tChars\tNeeded\tAllowed\tCandidates\tChars");
while generated < count {
let mut selection = None;
let mut total_weight = 0f64;
let mut counted = 0;
for (i, (x, f)) in candidates.iter().enumerate() {
if remaining_needed.len() > 0 && !x.chars.intersects(&remaining_needed) {
continue;
}
counted += 1;
// compensate for rare characters
let weight = 1f64 / (*f) as f64;
total_weight += weight;
let rand: f64 = rng.gen();
if rand < weight / total_weight {
selection = Some((i, *f))
}
}
if let Some((i, f)) = selection {
let (ex, _) = candidates.remove(i);
let mut ex = ex.clone();
if ex.gen_furigana(&data.jmdict_idx, &data.furigana_overrides) {
remaining_needed = remaining_needed.diff(&ex.chars);
have_chars = have_chars.union(&ex.chars);
generated += 1;
trace!(
"{}\t{}\t{}\t{}\t{}\t{}\t{}",
generated,
f,
have_chars.len(),
remaining_needed.len(),
allowed_chars.len(),
counted,
ex.chars.to_string()
);
callback(ex)?;
} else {
warn!("Warning: failed to generate furigana");
warn!(" sentence: {}", ex.ja);
warn!(" bad furi: {}", ex.furigana.as_deref().unwrap_or("-"));
}
} else {
break;
}
}
Ok(())
}