path: root/src/format.rs



use std::fs;

use anyhow::Result;

use crate::charset::Charset;
use crate::*;

// =====================================================================
//                          FORMATTING TO HTML
// =====================================================================

pub fn format_batch<'a>(dict_idx: &DictIndex<'a>, count: usize, (i, batch): (usize, &Batch)) {
    format_batch_aux(dict_idx, count, i, batch).expect("format batch");
}

fn format_batch_aux<'a>(
    dict_idx: &DictIndex<'a>,
    count: usize,
    i: usize,
    batch: &Batch,
) -> Result<()> {
    let mut f = io::BufWriter::new(fs::File::create(format!("public/{:03}.html", i))?);
    write!(
        f,
        r#"<!DOCTYPE html>
        <html>
            <head>
                <meta charset=\"UTF-8\" />
                <title>Batch #{:03}</title>
                <link rel="stylesheet" type="text/css" href="style.css" />
            </head>
            <body><div class="batch_page">"#,
        i
    )?;

    writeln!(f, r#"<p><a href="index.html">index</a>"#)?;
    for j in 0..count {
        if j != i {
            writeln!(f, r#" <a href="{:03}.html">{:03}</a>"#, j, j)?;
        } else {
            writeln!(f, " {:03}", j)?;
        }
    }
    writeln!(f, r#"</p>"#)?;
    writeln!(f, "<p>Level: {}</p>", batch.level)?;

    write!(f, r#"<p class="ja">"#)?;
    let mut ex_prev = Charset::default();
    for ex in batch.examples.iter() {
        let ex_chars = ex.chars.inter(&batch.chars);
        for c in ex_chars.diff(&ex_prev).chars().iter() {
            write!(
                f,
                r#"<a href="https://jisho.org/search/{}%20%23kanji">{}</a>"#,
                c, c
            )?;
        }
        ex_prev = ex_prev.union(&ex_chars);
    }
    writeln!(f, r#"</p>"#)?;

    for ex in batch.examples.iter() {
        writeln!(f, "<hr />")?;
        write!(f, r#"<p class="ja">"#)?;
        for c in ex.ja.chars() {
            if batch.chars.contains(c) {
                write!(f, r#"<span class="char_cur">{}</span>"#, c)?;
            } else if batch.chars_p1.contains(c) {
                write!(f, r#"<span class="char_p1">{}</span>"#, c)?;
            } else if batch.chars_p2.contains(c) {
                write!(f, r#"<span class="char_p2">{}</span>"#, c)?;
            } else if batch.chars_bad.contains(c) {
                write!(f, r#"<span class="char_bad">{}</span>"#, c)?;
            } else {
                write!(f, "{}", c)?;
            }
        }
        writeln!(f, "</p>")?;
        writeln!(f, r#"<p class="en">{}</p>"#, ex.en)?;

        writeln!(f, r#"<details><summary>Explanation</summary>"#)?;
        let mut expl_batch = Vec::new();
        let mut expl_all = Vec::new();
        for word in ex.expl.split(|c| c == ' ' || c == '~') {
            let (keb, reb) = expl_clean_word(word);
            let wchars = Charset::new(keb);
            if !wchars.intersects(&ex.chars) {
                continue;
            }
            if let Some(ents) = dict_idx.get(keb) {
                for ent in ents.iter() {
                    if let Some(s) = dict_str(keb, reb, ent) {
                        if wchars.intersects(&batch.chars) {
                            expl_batch.push(s);
                        } else {
                            expl_all.push(s);
                        }
                    }
                }
            }
        }
        for be in expl_batch {
            writeln!(f, r#"<p>{}</p>"#, be)?;
        }
        writeln!(f, r#"<p class="chars">"#)?;
        for c in ex.chars.inter(&batch.chars).chars().iter() {
            writeln!(
                f,
                r#"<a href="https://jisho.org/search/{}%20%23kanji">{}</a>"#,
                c, c
            )?;
        }
        writeln!(f, r#"</p>"#)?;
        for be in expl_all {
            writeln!(f, r#"<p>{}</p>"#, be)?;
        }
        writeln!(f, r#"</details>"#)?;
    }

    writeln!(f, "<hr />")?;
    format_vocab(
        &mut f,
        &batch
            .extra_vocab
            .iter()
            .filter(|v| batch.level.contains(&v.level))
            .collect::<Vec<_>>(),
        "Extra vocabulary (this level)",
    )?;
    format_vocab(
        &mut f,
        &batch
            .extra_vocab
            .iter()
            .filter(|v| !batch.level.contains(&v.level))
            .collect::<Vec<_>>(),
        "Extra vocabulary (previous levels)",
    )?;

    writeln!(
        f,
        r#"<details><summary>Extra examples (reading practice)</summary><table class="extratable">"#
    )?;
    for ex in batch.extra_examples.iter() {
        let mut expl1 = Vec::new();
        let mut expl2 = Vec::new();
        for word in ex.expl.split(|c| c == ' ' || c == '~') {
            let (keb, reb) = expl_clean_word(word);
            let wchars = Charset::new(keb);
            if !wchars.intersects(&ex.chars) {
                continue;
            }
            if let Some(ents) = dict_idx.get(keb) {
                for ent in ents.iter() {
                    if let Some(s) = dict_str_short(keb, reb, ent) {
                        if wchars.intersects(&batch.chars) {
                            expl1.push(s);
                        } else {
                            expl2.push(s);
                        }
                    }
                }
            }
        }
        expl1.extend(expl2.into_iter());
        let expl = expl1.join("<br />");
        writeln!(
            f,
            r#"<tr><td><details><summary class="tab_large2 font_ja">&nbsp;&nbsp;{}&nbsp;&nbsp;</summary><div style="text-align: center">{}<br />{}</div></details></td></tr>"#,
            ex.ja, ex.en, expl
        )?;
    }
    writeln!(f, r#"</table></details>"#)?;

    writeln!(f, "<hr />")?;
    writeln!(f, "<p>＼(≧▽≦)／</p>")?;

    write!(f, "<div></body></html>")?;
    f.flush()?;
    Ok(())
}

fn format_vocab(f: &mut impl Write, vocab: &[&JlptVocab], t: &str) -> Result<()> {
    if !vocab.is_empty() {
        writeln!(
            f,
            r#"<details><summary>{}</summary><table class="vocabtable">"#,
            t
        )?;
        for v in vocab {
            writeln!(
                f,
                r#"<tr><td>{}</td><td>&nbsp;&nbsp;<span class="tab_large font_ja">{}</span>&nbsp;&nbsp;</td><td>{}</td><td class="font_ja">{}</td></tr>"#,
                v.level, v.kanji, v.en, v.kana
            )?;
        }
        writeln!(f, "</table></details>")?;
    }
    Ok(())
}

fn expl_clean_word(w: &str) -> (&str, Option<&str>) {
    let mut ret = w;
    for delim in ['(', '{', '['] {
        if let Some((s, _)) = ret.split_once(delim) {
            ret = s;
        }
    }
    let p = w
        .split_once('(')
        .and_then(|(_, r)| r.split_once(')'))
        .map(|(p, _)| p);
    (ret, p)
}

fn dict_str_short<'a>(
    qkeb: &str,
    qreb: Option<&str>,
    ent: &roxmltree::Node<'a, 'a>,
) -> Option<String> {
    let r_ele = ent.children().find(|x| x.has_tag_name("r_ele")).unwrap();
    let reb = r_ele.children().find(|x| x.has_tag_name("reb")).unwrap();
    let reb = reb.text().unwrap().trim();

    if qreb.map(|x| x != reb).unwrap_or(false) {
        return None;
    }

    Some(format!(
        r#"<span class="font_ja">{} 【{}】</span>"#,
        qkeb, reb
    ))
}

fn dict_str<'a>(qkeb: &str, qreb: Option<&str>, ent: &roxmltree::Node<'a, 'a>) -> Option<String> {
    let mut ret = dict_str_short(qkeb, qreb, ent)?;

    for sense in ent.children().filter(|x| x.has_tag_name("sense")) {
        if let Some(s) = sense.children().find(|x| x.has_tag_name("gloss")) {
            ret.extend(format!(" {};", s.text().unwrap().trim()).chars());
        }
    }

    if ret.chars().rev().next() == Some(';') {
        ret.pop();
    }
    Some(ret)
}

pub fn format_index(batches: &[Batch], kanji_levels: &[(String, String)]) -> Result<()> {
    let mut f = io::BufWriter::new(fs::File::create("public/index.html")?);
    write!(
        f,
        r#"<!DOCTYPE html>
        <html>
            <head>
                <meta charset=\"UTF-8\" />
                <title>List of batches</title>
                <link rel="stylesheet" type="text/css" href="style.css" />
            </head>
            <body><div class="index_page">"#
    )?;

    writeln!(f, r#"<p><a href="about.html">About / How-to</a></p><hr />"#)?;

    writeln!(f, "<table>")?;
    writeln!(f, "<tr><th>Num</th><th>Level</th><th>Kanji</th><th>Examples</th><th>Lesson-1</th><th>Lesson-2</th><th>Ignore</th></tr>")?;
    for (i, batch) in batches.iter().enumerate() {
        writeln!(
            f,
            r#"<tr><td><a href="{:03}.html">{:03}</a></td><td>{}</td><td class="font_ja">{}</td><td>&nbsp;&nbsp;{}</td><td class="font_ja">{}</td><td class="font_ja">{}</td><td class="font_ja">{}</td></tr>"#,
            i,
            i,
            batch.level,
            batch.chars.to_string(),
            batch.examples.len(),
            batch.chars_p1.to_string(),
            batch.chars_p2.to_string(),
            batch.chars_bad.to_string()
        )?;
    }
    writeln!(f, r#"</table>"#)?;

    writeln!(f, "<hr />")?;

    let all_chars = Charset::from_iter(
        batches
            .iter()
            .map(|x| x.chars.chars().iter().copied())
            .flatten(),
    );
    writeln!(f, "<table>")?;
    writeln!(
        f,
        r#"<tr><th>Level</th><th>Count</th><th width="60%">Kanji</th><th>Missing kanji</th></tr>"#
    )?;
    for (lvl, chars) in kanji_levels.iter() {
        if lvl == "N0+" || lvl.ends_with("-10") {
            continue;
        }
        let chars = Charset::new(chars);
        let missing = chars.diff(&all_chars);
        writeln!(
            f,
            r#"<tr><td>{}</td><td>{}</td><td class="font_ja">{}</td><td><span class="font_ja">{}</span> ({})</td></tr>"#,
            lvl,
            chars.len(),
            chars.to_string(),
            missing.to_string(),
            missing.len()
        )?;
    }
    writeln!(f, "</table>")?;

    write!(f, "</div></body></html>")?;
    f.flush()?;
    Ok(())
}

pub fn format_about() -> Result<()> {
    let mut f = io::BufWriter::new(fs::File::create("public/about.html")?);
    write!(
        f,
        r#"<!DOCTYPE html>
        <html>
            <head>
                <meta charset=\"UTF-8\" />
                <title>Datagengo README</title>
                <link rel="stylesheet" type="text/css" href="style.css" />
            </head>
            <body>"#
    )?;

    writeln!(f, r#"<div class="about_page">"#)?;
    writeln!(
        f,
        r#"<p><a href="index.html">Back to lessons</a></p><hr />"#
    )?;

    writeln!(
        f,
        "{}",
        markdown::to_html(&fs::read_to_string("README.md")?)
    )?;

    writeln!(f, r#"</div></body></html>"#)?;

    Ok(())
}
use std::fs;

use anyhow::Result;

use crate::charset::Charset;
use crate::*;

// =====================================================================
//                          FORMATTING TO HTML
// =====================================================================

pub fn format_batch<'a>(dict_idx: &DictIndex<'a>, count: usize, (i, batch): (usize, &Batch)) {
    format_batch_aux(dict_idx, count, i, batch).expect("format batch");
}

fn format_batch_aux<'a>(
    dict_idx: &DictIndex<'a>,
    count: usize,
    i: usize,
    batch: &Batch,
) -> Result<()> {
    let mut f = io::BufWriter::new(fs::File::create(format!("public/{:03}.html", i))?);
    write!(
        f,
        r#"<!DOCTYPE html>
        <html>
            <head>
                <meta charset=\"UTF-8\" />
                <title>Batch #{:03}</title>
                <link rel="stylesheet" type="text/css" href="style.css" />
            </head>
            <body><div class="batch_page">"#,
        i
    )?;

    writeln!(f, r#"<p><a href="index.html">index</a>"#)?;
    for j in 0..count {
        if j != i {
            writeln!(f, r#" <a href="{:03}.html">{:03}</a>"#, j, j)?;
        } else {
            writeln!(f, " {:03}", j)?;
        }
    }
    writeln!(f, r#"</p>"#)?;
    writeln!(f, "<p>Level: {}</p>", batch.level)?;

    write!(f, r#"<p class="ja">"#)?;
    let mut ex_prev = Charset::default();
    for ex in batch.examples.iter() {
        let ex_chars = ex.chars.inter(&batch.chars);
        for c in ex_chars.diff(&ex_prev).chars().iter() {
            write!(
                f,
                r#"<a href="https://jisho.org/search/{}%20%23kanji">{}</a>"#,
                c, c
            )?;
        }
        ex_prev = ex_prev.union(&ex_chars);
    }
    writeln!(f, r#"</p>"#)?;

    for ex in batch.examples.iter() {
        writeln!(f, "<hr />")?;
        write!(f, r#"<p class="ja">"#)?;
        for c in ex.ja.chars() {
            if batch.chars.contains(c) {
                write!(f, r#"<span class="char_cur">{}</span>"#, c)?;
            } else if batch.chars_p1.contains(c) {
                write!(f, r#"<span class="char_p1">{}</span>"#, c)?;
            } else if batch.chars_p2.contains(c) {
                write!(f, r#"<span class="char_p2">{}</span>"#, c)?;
            } else if batch.chars_bad.contains(c) {
                write!(f, r#"<span class="char_bad">{}</span>"#, c)?;
            } else {
                write!(f, "{}", c)?;
            }
        }
        writeln!(f, "</p>")?;
        writeln!(f, r#"<p class="en">{}</p>"#, ex.en)?;

        writeln!(f, r#"<details><summary>Explanation</summary>"#)?;
        let mut expl_batch = Vec::new();
        let mut expl_all = Vec::new();
        for word in ex.expl.split(|c| c == ' ' || c == '~') {
            let (keb, reb) = expl_clean_word(word);
            let wchars = Charset::new(keb);
            if !wchars.intersects(&ex.chars) {
                continue;
            }
            if let Some(ents) = dict_idx.get(keb) {
                for ent in ents.iter() {
                    if let Some(s) = dict_str(keb, reb, ent) {
                        if wchars.intersects(&batch.chars) {
                            expl_batch.push(s);
                        } else {
                            expl_all.push(s);
                        }
                    }
                }
            }
        }
        for be in expl_batch {
            writeln!(f, r#"<p>{}</p>"#, be)?;
        }
        writeln!(f, r#"<p class="chars">"#)?;
        for c in ex.chars.inter(&batch.chars).chars().iter() {
            writeln!(
                f,
                r#"<a href="https://jisho.org/search/{}%20%23kanji">{}</a>"#,
                c, c
            )?;
        }
        writeln!(f, r#"</p>"#)?;
        for be in expl_all {
            writeln!(f, r#"<p>{}</p>"#, be)?;
        }
        writeln!(f, r#"</details>"#)?;
    }

    writeln!(f, "<hr />")?;
    format_vocab(
        &mut f,
        &batch
            .extra_vocab
            .iter()
            .filter(|v| batch.level.contains(&v.level))
            .collect::<Vec<_>>(),
        "Extra vocabulary (this level)",
    )?;
    format_vocab(
        &mut f,
        &batch
            .extra_vocab
            .iter()
            .filter(|v| !batch.level.contains(&v.level))
            .collect::<Vec<_>>(),
        "Extra vocabulary (previous levels)",
    )?;

    writeln!(
        f,
        r#"<details><summary>Extra examples (reading practice)</summary><table class="extratable">"#
    )?;
    for ex in batch.extra_examples.iter() {
        let mut expl1 = Vec::new();
        let mut expl2 = Vec::new();
        for word in ex.expl.split(|c| c == ' ' || c == '~') {
            let (keb, reb) = expl_clean_word(word);
            let wchars = Charset::new(keb);
            if !wchars.intersects(&ex.chars) {
                continue;
            }
            if let Some(ents) = dict_idx.get(keb) {
                for ent in ents.iter() {
                    if let Some(s) = dict_str_short(keb, reb, ent) {
                        if wchars.intersects(&batch.chars) {
                            expl1.push(s);
                        } else {
                            expl2.push(s);
                        }
                    }
                }
            }
        }
        expl1.extend(expl2.into_iter());
        let expl = expl1.join("<br />");
        writeln!(
            f,
            r#"<tr><td><details><summary class="tab_large2 font_ja">&nbsp;&nbsp;{}&nbsp;&nbsp;</summary><div style="text-align: center">{}<br />{}</div></details></td></tr>"#,
            ex.ja, ex.en, expl
        )?;
    }
    writeln!(f, r#"</table></details>"#)?;

    writeln!(f, "<hr />")?;
    writeln!(f, "<p>＼(≧▽≦)／</p>")?;

    write!(f, "<div></body></html>")?;
    f.flush()?;
    Ok(())
}

fn format_vocab(f: &mut impl Write, vocab: &[&JlptVocab], t: &str) -> Result<()> {
    if !vocab.is_empty() {
        writeln!(
            f,
            r#"<details><summary>{}</summary><table class="vocabtable">"#,
            t
        )?;
        for v in vocab {
            writeln!(
                f,
                r#"<tr><td>{}</td><td>&nbsp;&nbsp;<span class="tab_large font_ja">{}</span>&nbsp;&nbsp;</td><td>{}</td><td class="font_ja">{}</td></tr>"#,
                v.level, v.kanji, v.en, v.kana
            )?;
        }
        writeln!(f, "</table></details>")?;
    }
    Ok(())
}

fn expl_clean_word(w: &str) -> (&str, Option<&str>) {
    let mut ret = w;
    for delim in ['(', '{', '['] {
        if let Some((s, _)) = ret.split_once(delim) {
            ret = s;
        }
    }
    let p = w
        .split_once('(')
        .and_then(|(_, r)| r.split_once(')'))
        .map(|(p, _)| p);
    (ret, p)
}

fn dict_str_short<'a>(
    qkeb: &str,
    qreb: Option<&str>,
    ent: &roxmltree::Node<'a, 'a>,
) -> Option<String> {
    let r_ele = ent.children().find(|x| x.has_tag_name("r_ele")).unwrap();
    let reb = r_ele.children().find(|x| x.has_tag_name("reb")).unwrap();
    let reb = reb.text().unwrap().trim();

    if qreb.map(|x| x != reb).unwrap_or(false) {
        return None;
    }

    Some(format!(
        r#"<span class="font_ja">{} 【{}】</span>"#,
        qkeb, reb
    ))
}

fn dict_str<'a>(qkeb: &str, qreb: Option<&str>, ent: &roxmltree::Node<'a, 'a>) -> Option<String> {
    let mut ret = dict_str_short(qkeb, qreb, ent)?;

    for sense in ent.children().filter(|x| x.has_tag_name("sense")) {
        if let Some(s) = sense.children().find(|x| x.has_tag_name("gloss")) {
            ret.extend(format!(" {};", s.text().unwrap().trim()).chars());
        }
    }

    if ret.chars().rev().next() == Some(';') {
        ret.pop();
    }
    Some(ret)
}

pub fn format_index(batches: &[Batch], kanji_levels: &[(String, String)]) -> Result<()> {
    let mut f = io::BufWriter::new(fs::File::create("public/index.html")?);
    write!(
        f,
        r#"<!DOCTYPE html>
        <html>
            <head>
                <meta charset=\"UTF-8\" />
                <title>List of batches</title>
                <link rel="stylesheet" type="text/css" href="style.css" />
            </head>
            <body><div class="index_page">"#
    )?;

    writeln!(f, r#"<p><a href="about.html">About / How-to</a></p><hr />"#)?;

    writeln!(f, "<table>")?;
    writeln!(f, "<tr><th>Num</th><th>Level</th><th>Kanji</th><th>Examples</th><th>Lesson-1</th><th>Lesson-2</th><th>Ignore</th></tr>")?;
    for (i, batch) in batches.iter().enumerate() {
        writeln!(
            f,
            r#"<tr><td><a href="{:03}.html">{:03}</a></td><td>{}</td><td class="font_ja">{}</td><td>&nbsp;&nbsp;{}</td><td class="font_ja">{}</td><td class="font_ja">{}</td><td class="font_ja">{}</td></tr>"#,
            i,
            i,
            batch.level,
            batch.chars.to_string(),
            batch.examples.len(),
            batch.chars_p1.to_string(),
            batch.chars_p2.to_string(),
            batch.chars_bad.to_string()
        )?;
    }
    writeln!(f, r#"</table>"#)?;

    writeln!(f, "<hr />")?;

    let all_chars = Charset::from_iter(
        batches
            .iter()
            .map(|x| x.chars.chars().iter().copied())
            .flatten(),
    );
    writeln!(f, "<table>")?;
    writeln!(
        f,
        r#"<tr><th>Level</th><th>Count</th><th width="60%">Kanji</th><th>Missing kanji</th></tr>"#
    )?;
    for (lvl, chars) in kanji_levels.iter() {
        if lvl == "N0+" || lvl.ends_with("-10") {
            continue;
        }
        let chars = Charset::new(chars);
        let missing = chars.diff(&all_chars);
        writeln!(
            f,
            r#"<tr><td>{}</td><td>{}</td><td class="font_ja">{}</td><td><span class="font_ja">{}</span> ({})</td></tr>"#,
            lvl,
            chars.len(),
            chars.to_string(),
            missing.to_string(),
            missing.len()
        )?;
    }
    writeln!(f, "</table>")?;

    write!(f, "</div></body></html>")?;
    f.flush()?;
    Ok(())
}

pub fn format_about() -> Result<()> {
    let mut f = io::BufWriter::new(fs::File::create("public/about.html")?);
    write!(
        f,
        r#"<!DOCTYPE html>
        <html>
            <head>
                <meta charset=\"UTF-8\" />
                <title>Datagengo README</title>
                <link rel="stylesheet" type="text/css" href="style.css" />
            </head>
            <body>"#
    )?;

    writeln!(f, r#"<div class="about_page">"#)?;
    writeln!(
        f,
        r#"<p><a href="index.html">Back to lessons</a></p><hr />"#
    )?;

    writeln!(
        f,
        "{}",
        markdown::to_html(&fs::read_to_string("README.md")?)
    )?;

    writeln!(f, r#"</div></body></html>"#)?;

    Ok(())
}