aboutsummaryrefslogblamecommitdiff
path: root/src/main.rs
blob: 837b7ff004644cbbf060448190db0ca8ffd8125b (plain) (tree)
1
2
3
4
5
6
7
8


                              
                                    
 
                             

                                    











                                                                           



                                          
           







                                                          

                                                             
             
         
                              
                                                                               



                                                            


                                                       






                                                                           
                               
                                                                                        


                                              




                                                                                                                                 










                                                                 



                                                                            



                                                                                      
                          
                            
                                                                           


                                                                               



         
















                                                                                  
                                                       



















                                                         


                                                                                


                                                                                 


                 
                                    
                                                                                     


         























                                                            















                                                               
                                                                   













                                                                  
                                                                     






                                                
                                                                     




                           

                                                       





           









                                                                                                             

                                                                
 

                                                                
 
                                            

                                                                               

                                                



                                                                                     


                                        



                                      

                             
                                             
 





                                                          

                       
                            
                                                                     
                                                 

                                                                              

                                    

                                                                                           
                                                      
                                                                  






                                                                                        
                                                              
                                                                  
                            
         
                                      


                                                                        


                                                                        

                                                                       







                                                                            










                                                                                                                         






                                                                          

                                                                            
                                                    
                                            
                                                       
                            
                




                                                                                                  


         



                                                    


             

                                                                                          

 
                                                                                                        
                                                                                       








                                                                          
                                                         








                                                                     

                                                                                 

















                                                                     




                                                                   


                                                             


                                              
                                                   
                                        





                                                              






                                             




                                                                                             



                                             






                                 
                                                     





                                                     



                                             

 
                                                                                                  

                                                                          
                                         
 




                                                









                                                                             
             

 
                                                                                     
                                                                           





























                                                                                                                                             


                                                                  













                                                                                 
                                               







                       
                                               


                   


                       


                           
                                                                             








                                                               





                                                            











                                                                   
                                                





                                                             



















                                                                   
                                              



















                                                                   





























































                                                                   


                                





                                         















                                              


















                                                                        

     
use std::collections::HashMap;
use std::fs;
use std::cmp::Ordering;
use std::io::{self, BufRead, Write};

use anyhow::{anyhow, Result};
use rayon::prelude::*;
use serde::{Serialize, Deserialize};
use structopt::StructOpt;

#[derive(Debug, StructOpt)]
#[structopt(name = "datagengo", about = "Japanese example practice maker")]
struct Opt {
    #[structopt(subcommand)]
    cmd: Cmd,
}

#[derive(Debug, StructOpt)]
enum Cmd {
	ParseKanjidic,
    New {
        #[structopt(default_value = "10")]
        count: usize,
    },
    Format,
}

fn main() {
    let opt = Opt::from_args();

    match opt.cmd {
        Cmd::ParseKanjidic => {
            let levels = parse_kanjidic().expect("error");
            for (level, chars) in levels.iter() {
                println!("{}: {}", level, chars.to_string());
            }
        }
        Cmd::New{ count } => {
            let kanji_levels = read_kanji_levels().expect("read_kanji_levels");
            let all_kanji = Charset::new(kanji_levels.iter()
                .map(|(_, x)| x.to_string())
                .collect::<Vec<_>>()
                .join(""));
            let kanji_levels = kanji_levels.into_iter()
                .map(|(l, x)| (l, Charset::new(x)))
                .collect::<Vec<_>>();
            let mut ex = read_examples(&all_kanji).expect("read_examples");
            ex.retain(|e| (5..=25).contains(&e.ja.chars().count()));
            let mut batches: Vec<Batch> = fs::read("data/batches.json")
                .map_err(anyhow::Error::from)
                .and_then(|x| Ok(serde_json::from_slice(&x)?))
                .unwrap_or_default();
            println!("---- starting after {} batches ----", batches.len());
            for _ in 0..count {
                let batch = gen_batch(&batches, &kanji_levels, &ex).expect("gen_batch");
                if batch.examples.is_empty() {
                    break;
                }
                batches.push(batch);
            }
            fs::write("data/batches.json", serde_json::to_string_pretty(&batches).expect("serialize").as_bytes()).expect("save");
        }
        Cmd::Format => {
            let jmdict = fs::read_to_string("data/JMdict_e.xml")
                .expect("read_jmdict");
            let jmdict = roxmltree::Document::parse_with_options(
                &jmdict,
                roxmltree::ParsingOptions {
                    allow_dtd: true,
                    ..Default::default()
                })
                .expect("parse_jmdict");
            let jmdict_idx = index_jmdict(&jmdict);

            let batches = fs::read("data/batches.json")
                .map_err(anyhow::Error::from)
                .and_then(|x| Ok(serde_json::from_slice::<Vec<Batch>>(&x)?))
                .expect("read/parse");

            fs::create_dir_all("public").expect("mkdir public");
            fs::copy("static/style.css", "public/style.css").expect("copy style.css");

            batches.iter()
                .enumerate()
                .for_each(|x| format_batch(&jmdict_idx, batches.len(), x));

            let kanji_levels = read_kanji_levels().expect("read_kanji_levels");
            format_index(&batches, &kanji_levels).expect("format_index");
        }
    }
}

type DictIndex<'a> = HashMap<&'a str, Vec<roxmltree::Node<'a, 'a>>>;
fn index_jmdict<'a>(dict: &'a roxmltree::Document) -> DictIndex<'a> {
    let dict = dict.root().children().find(|x| x.has_tag_name("JMdict")).unwrap();

    let mut ret: DictIndex<'a> = HashMap::new();
    for x in dict.children().filter(|x| x.has_tag_name("entry")) {
        for r in x.children().filter(|x| x.has_tag_name("k_ele")) {
            if let Some(keb) = r.children().find(|x| x.has_tag_name("keb")) {
                let txt = keb.text().unwrap().trim();
                ret.entry(txt).or_default().push(x);
            }
        }
    }

    ret
}

fn parse_kanjidic() -> Result<Vec<(String, Charset)>> {
    let file = fs::read_to_string("data/kanjidic2.xml")?;
    let xml = roxmltree::Document::parse(&file)?;
    let kanjidic = xml.root().first_child().unwrap();
    assert!(kanjidic.has_tag_name("kanjidic2"));

    let mut levels = HashMap::new();

    for x in kanjidic.children() {
        if !x.has_tag_name("character") {
            continue;
        }
        let mut literal = None;
        let mut jlpt = None;
        let mut grade = None;
        for y in x.children() {
            if y.has_tag_name("literal") {
                literal = y.text();
            }
            if y.has_tag_name("misc") {
                for z in y.children() {
                    if z.has_tag_name("jlpt") {
                        jlpt = z.text().and_then(|x| str::parse::<i32>(x).ok());
                    }
                    if z.has_tag_name("grade") {
                        grade = z.text().and_then(|x| str::parse::<i32>(x).ok());
                    }
                }
            }
        }
        if let Some(lit) = literal {
            levels.entry((jlpt, grade)).or_insert(String::new()).extend(lit.chars());
        }
    }

    let mut levels = levels.into_iter().collect::<Vec<_>>();
    levels.sort_by_key(|((j, g), _)| match (j, g) {
            (Some(j), Some(g)) => (10-*j)*20+*g,
            (Some(j), None) => (10-*j)*20+15,
            (None, Some(g)) => 1000+*g,
            (None, None) => 1015,

    });

    let mut ret = Vec::new();
    let mut pc = Charset::default();
    for ((j, g), chars) in levels.into_iter() {
        let name = match (j, g) {
            (Some(j), Some(g)) => format!("N{}-{}", j, g),
            (Some(j), None) => format!("N{}+", j),
            (None, Some(g)) => format!("N0-{}", g),
            (None, None) => format!("N0+"),
        };
        let chars = Charset::new(chars).diff(&pc);
        pc = pc.union(&chars);
        ret.push((name, chars));
    }

    Ok(ret)
}

fn read_kanji_levels() -> Result<Vec<(String, String)>> {
    Ok(fs::read_to_string("data/kanji_levels.txt")?
        .lines()
        .filter_map(|l| l.split_once(": "))
        .map(|(l, k)| (l.to_string(), k.to_string()))
        .collect::<Vec<_>>())
}

fn read_examples(all_kanji: &Charset) -> Result<Vec<Example>> {
    let file = fs::File::open("data/examples.utf")?;

    let mut ret = Vec::new();
    let mut a = "".to_string();

    for (i, line) in io::BufReader::new(file).lines().enumerate() {
        let line = line?;
        if line.starts_with("A:") {
            a = line;
        } else if line.starts_with("B:") {
            let s = a.strip_prefix("A: ");
            let t = line.strip_prefix("B: ");
            if let (Some(a), Some(b)) = (s, t) {
                if let Some((ja, eng)) = a.split_once("\t") {
                    if let Some((eng, id)) = eng.split_once("#") {
                        ret.push(Example {
                            ja: ja.to_string(),
                            en: eng.to_string(),
                            expl: b.to_string(),
                            id: Some(id.to_string()),
                            chars: Charset::new(ja).inter(all_kanji),
                        });
                    } else {
                        ret.push(Example {
                            ja: ja.to_string(),
                            en: eng.to_string(),
                            expl: b.to_string(),
                            id: None,
                            chars: Charset::new(ja).inter(all_kanji),
                        });
                    }
                }
            }
        }
        if i % 10000 == 0 {
            eprintln!("read examples: {}/300", i/1000);
        }
    }

    Ok(ret)
}

fn gen_batch(previous: &[Batch], kanji_levels: &[(String, Charset)], examples: &[Example]) -> Result<Batch> {
    let prev_chars = Charset::from_iter(previous.iter()
        .map(|x| x.chars.chars().iter().copied())
        .flatten());

    let (mut target_i, target_level, mut target_chars) = kanji_levels.iter().enumerate()
        .map(|(i, (l, c))| (i, l, c.diff(&prev_chars)))
        .find(|(_, _, c)| !c.is_empty())
        .ok_or(anyhow!("no more batches to make!"))?;

    let chars_p1 = previous.iter().rev().next()
        .map(|b| b.chars.clone()).unwrap_or(Charset::default());

    let chars_p2 = previous.iter().rev().skip(1).next()
        .map(|b| b.chars.clone()).unwrap_or(Charset::default());

    let mut chars_late = Charset::default();

    let mut chars_bad = Charset::from_iter(kanji_levels.iter().skip(target_i+1)
        .map(|(_, c)| c.chars().iter().copied())
        .flatten());
    let mut chars_bad_avoid = Charset::from_iter(kanji_levels.iter().skip(target_i+1)
        .filter(|(l, _)| !l.ends_with("-9") && !l.ends_with("-10"))
        .map(|(_, c)| c.chars().iter().copied())
        .flatten());

    let mut batch = Batch {
        level: target_level.to_string(),
        chars: Charset::default(),
        chars_p1: Charset::default(),
        chars_p2: Charset::default(),
        chars_bad: Charset::default(),
        examples: Vec::new(),
    };
    let mut batch_chars = Charset::default();

    eprintln!("----");
    eprintln!("Level  :  {}", batch.level);
    eprintln!("Target :  {}", target_chars.to_string());
    eprintln!("Prev1  :  {}", chars_p1.to_string());
    eprintln!("Prev2  :  {}", chars_p2.to_string());
    eprintln!("Bad    :  {} characters", chars_bad.len());

    let batch_len = 20;
    let mut stalled = false;
    while batch.chars.len() < batch_len && !target_chars.is_empty() {
        let need = batch_len - batch.chars.len();
        let should_add = need > target_chars.len() && target_chars.len() <= 3;
        if target_i + 1 < kanji_levels.len() && (should_add || stalled) {
            // upgrade to next level
            target_i += 1;
            chars_late = chars_late.union(&target_chars);
            target_chars = target_chars.union(&kanji_levels[target_i].1.diff(&prev_chars));
            chars_bad = chars_bad.diff(&target_chars);
            chars_bad_avoid = chars_bad_avoid.diff(&target_chars);
            if batch.examples.is_empty() {
                batch.level = kanji_levels[target_i].0.to_string();
            } else {
                batch.level = format!("{} + {}", batch.level, kanji_levels[target_i].0);
            }
            eprintln!("Level  :  {}", batch.level);
            eprintln!("Target:  {}", target_chars.to_string());
            eprintln!("Late   :  {}", chars_late.to_string());
            eprintln!("Bad    :  {} characters", chars_bad.len());
            stalled = false;
        }
        /*  this one works well enough
        let cost = |ex: &Example, ex_tgt_inter: usize| {
            20i32 * ex_tgt_inter as i32
                        + 30i32 * ex.chars.inter_len(&chars_late) as i32
                        + 6i32 * ex.chars.inter_len(&batch.chars) as i32
                        + 4i32 * ex.chars.inter_len(&chars_p1) as i32
                        + 3i32 * ex.chars.inter_len(&chars_p2) as i32
                        - 40i32 * ex.chars.inter_len(&chars_bad) as i32
        };
        */
        let cost = |ex: &Example, ex_tgt_inter: usize| { (
            - (ex.chars.inter_len(&chars_bad_avoid) as i32),
            ex_tgt_inter,
            ex.chars.inter_len(&chars_late),
            2*ex.chars.inter_len(&chars_p1) + ex.chars.inter_len(&chars_p2),
            - (ex.ja.chars().count() as i32),
        ) };
        let cand_1 = examples.par_iter()
            .map(|ex| (ex, ex.chars.inter_len(&target_chars)))
            .filter(|(_, ex_tgt_inter)| (1..=4).contains(ex_tgt_inter) && *ex_tgt_inter + batch.chars.len() <= batch_len)
            .max_by_key(|(ex, ex_tgt_inter)| cost(ex, *ex_tgt_inter));
        let cand  = cand_1.or_else(|| {
            examples.par_iter()
            .map(|ex| (ex, ex.chars.inter_len(&target_chars)))
            .filter(|(_, ex_tgt_inter)| *ex_tgt_inter > 0)
            .max_by_key(|(ex, ex_tgt_inter)| cost(ex, *ex_tgt_inter))
        });
        if let Some((ex, _)) = cand {
            eprintln!("* add {} (rep: {}, p1: {}, p2: {}, bad: {})    {}",
                ex.chars.inter(&target_chars).to_string(),
                ex.chars.inter(&batch.chars).to_string(),
                ex.chars.inter(&chars_p1).to_string(),
                ex.chars.inter(&chars_p2).to_string(),
                ex.chars.inter(&chars_bad).to_string(),
                ex.ja);
            batch.chars = batch.chars.union(&ex.chars.inter(&target_chars));
            target_chars = target_chars.diff(&ex.chars);
            chars_late = chars_late.diff(&ex.chars);
            batch.examples.push(ex.clone());
            batch_chars = batch_chars.union(&ex.chars);
            stalled = false;
        } else {
            if stalled {
                eprintln!("could not find suitable sentence, stopping batch now (need {})", need);
                break;
            }
            stalled = true;
        }
    }

    batch.chars_p1 = chars_p1.inter(&batch_chars);
    batch.chars_p2 = chars_p2.inter(&batch_chars);
    batch.chars_bad = chars_bad.inter(&batch_chars);

    Ok(batch)
}

fn format_batch<'a>(dict_idx: &DictIndex<'a>, count: usize, (i, batch): (usize, &Batch)) {
    format_batch_aux(dict_idx, count, i, batch).expect("format batch");
}

fn format_batch_aux<'a>(dict_idx: &DictIndex<'a>, count: usize, i: usize, batch: &Batch) -> Result<()> {
    let mut f = io::BufWriter::new(fs::File::create(format!("public/{:03}.html", i))?);
    write!(f, r#"<!DOCTYPE html>
        <html>
            <head>
                <meta charset=\"UTF-8\" />
                <title>Batch #{:03}</title>
                <link rel="stylesheet" type="text/css" href="style.css" />
            </head>
            <body>"#, i)?;

    writeln!(f, r#"<p><a href="index.html">index</a>"#)?;
    for j in 0..count {
        if j != i {
            writeln!(f, r#" <a href="{:03}.html">{:03}</a>"#, j, j)?;
        } else {
            writeln!(f, " {:03}", j)?;
        }
    }
    writeln!(f, r#"</p>"#)?;
    writeln!(f, "<p>Level: {}</p>", batch.level)?;

    writeln!(f, r#"<p class="ja chars">【{}】</p>"#, batch.chars.to_string())?;

    for ex in batch.examples.iter() {
        writeln!(f, "<hr />")?;
        write!(f, r#"<p class="ja">"#)?;
        for c in ex.ja.chars() {
            if batch.chars.contains(c) {
                write!(f, r#"<span class="char_cur">{}</span>"#, c)?;
            } else if batch.chars_p1.contains(c) {
                write!(f, r#"<span class="char_p1">{}</span>"#, c)?;
            } else if batch.chars_p2.contains(c) {
                write!(f, r#"<span class="char_p2">{}</span>"#, c)?;
            } else if batch.chars_bad.contains(c) {
                write!(f, r#"<span class="char_bad">{}</span>"#, c)?;
            } else {
                write!(f, "{}", c)?;
            }
        }
        writeln!(f, "</p>")?;
        writeln!(f, r#"<p class="en">{}</p>"#, ex.en)?;

        writeln!(f, r#"<details><summary>Explanation</summary>"#)?;
        let mut expl_batch = Vec::new();
        let mut expl_all = Vec::new();
        for word in ex.expl.split(|c| c == ' ' || c == '~') {
            let (keb, reb) = expl_clean_word(word);
            let wchars = Charset::new(keb);
            if !wchars.intersects(&ex.chars) {
                continue;
            }
            if let Some(ents) = dict_idx.get(keb) {
                for ent in ents.iter() {
                    if let Some(s) = dict_str(keb, reb, ent) {
                        if wchars.intersects(&batch.chars) {
                            expl_batch.push(s);
                        } else {
                            expl_all.push(s);
                        }
                    }
                }
            }
        }
        for be in expl_batch {
            writeln!(f, r#"<p>{}</p>"#, be)?;
        }
        writeln!(f, r#"<p class="chars">"#)?;
        for c in ex.chars.inter(&batch.chars).chars().iter() {
            writeln!(f, r#"<a href="https://jisho.org/search/{}%20%23kanji">{}</a>"#, c, c)?;
        }
        writeln!(f, r#"</p>"#)?;
        for be in expl_all {
            writeln!(f, r#"<p>{}</p>"#, be)?;
        }
        writeln!(f, r#"</details>"#)?;
    }

    write!(f, "</body></html>")?;
    f.flush()?;
    Ok(())
}

fn expl_clean_word(w: &str) -> (&str, Option<&str>) {
    let mut ret = w;
    for delim in ['(', '{', '['] {
        if let Some((s, _)) = ret.split_once(delim) {
            ret = s;
        }
    }
    let p = w.split_once('(')
        .and_then(|(_, r)| r.split_once(')'))
        .map(|(p, _)| p);
    (ret, p)
}

fn dict_str<'a>(qkeb: &str, qreb: Option<&str>, ent: &roxmltree::Node<'a, 'a>) -> Option<String> {
    let r_ele = ent.children().find(|x| x.has_tag_name("r_ele")).unwrap();
    let reb = r_ele.children().find(|x| x.has_tag_name("reb")).unwrap();
    let reb = reb.text().unwrap().trim();

    if qreb.map(|x| x != reb).unwrap_or(false) {
        return None;
    }

    let mut ret = format!("{} [{}]", qkeb, reb);

    for sense in ent.children().filter(|x| x.has_tag_name("sense")) {
        if let Some(s) = sense.children().find(|x| x.has_tag_name("gloss")) {
            ret.extend(format!(" {};", s.text().unwrap().trim()).chars());
        }
    }

    if ret.chars().rev().next() == Some(';') {
        ret.pop();
    }
    Some(ret)
}

fn format_index(batches: &[Batch], kanji_levels: &[(String, String)]) -> Result<()> {
    let mut f = io::BufWriter::new(fs::File::create("public/index.html")?);
    write!(f, r#"<!DOCTYPE html>
        <html>
            <head>
                <meta charset=\"UTF-8\" />
                <title>List of batches</title>
                <link rel="stylesheet" type="text/css" href="style.css" />
            </head>
            <body>"#)?;

    writeln!(f, "<table>")?;
    writeln!(f, "<tr><th>Num</th><th>Level</th><th>Chars</th><th>Examples</th><th>Review</th><th>Ignore</th></tr>")?;
    for (i, batch) in batches.iter().enumerate() {
        writeln!(f, r#"<tr><td><a href="{:03}.html">{:03}</a></td><td>{}</td><td>{}</td><td>&nbsp;&nbsp;{}</td><td>{}</td><td>{}</td></tr>"#,
                 i, i,
                 batch.level,
                 batch.chars.to_string(),
                 batch.examples.len(),
                 batch.chars_p1.union(&batch.chars_p2).to_string(),
                 batch.chars_bad.to_string())?;
    }
    writeln!(f, r#"</table>"#)?;

    writeln!(f, "<hr />")?;

    let all_chars = Charset::from_iter(batches.iter()
        .map(|x| x.chars.chars().iter().copied())
        .flatten());
    writeln!(f, "<table>")?;
    writeln!(f, "<tr><th>Level</th><th>Count</th><th>Chars</th><th>Missing chars</th></tr>")?;
    for (lvl, chars) in kanji_levels.iter() {
        if lvl == "N0+" || lvl == "N0-9" || lvl.ends_with("-10") {
            continue;
        }
        let chars = Charset::new(chars);
        let missing = chars.diff(&all_chars);
        writeln!(f, "<tr><td>{}</td><td>{}</td><td>{}</td><td>{} ({})</td></tr>",
            lvl,
            chars.len(), chars.to_string(),
            missing.to_string(), missing.len())?;
    }
    writeln!(f, "</table>")?;

    write!(f, "</body></html>")?;
    f.flush()?;
    Ok(())
}

#[derive(Debug, Clone, Serialize, Deserialize)]
struct Example {
    ja: String,
    en: String,
    expl: String,
    id: Option<String>,
    chars: Charset,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
struct Batch {
    level: String,
    chars: Charset,
    chars_p1: Charset,
    chars_p2: Charset,
    chars_bad: Charset,
    examples: Vec<Example>,
}

#[derive(Debug, Eq, PartialEq, Hash, Clone, Serialize, Deserialize, Default)]
struct Charset(Vec<char>);

impl Charset {
    fn new<S: AsRef<str>>(s: S) -> Self {
        let mut chars = s.as_ref().chars().collect::<Vec<_>>();
        chars.sort();
        chars.dedup();
        Self(chars)
    }
    fn from_iter<S: IntoIterator<Item=char>>(s: S) -> Self {
        let mut chars = s.into_iter().collect::<Vec<_>>();
        chars.sort();
        chars.dedup();
        Self(chars)
    }
    fn intersects(&self, other: &Self) -> bool {
        let mut it1 = self.0.iter().peekable();
        let mut it2 = other.0.iter().peekable();
        while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) {
            match c1.cmp(c2) {
                Ordering::Equal => return true,
                Ordering::Less => it1.next(),
                Ordering::Greater => it2.next(),
            };
        }
        false
    }
    fn inter_len(&self, other: &Self) -> usize {
        if other.len() > 20*self.len() {
            // alternative path
            return self.0.iter()
                .filter(|x| other.0.binary_search(x).is_ok())
                .count();
        }
        let mut it1 = self.0.iter().peekable();
        let mut it2 = other.0.iter().peekable();
        let mut ret = 0;
        while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) {
            match c1.cmp(c2) {
                Ordering::Equal => {
                    ret += 1;
                    it1.next();
                    it2.next();
                }
                Ordering::Less => {
                    it1.next();
                }
                Ordering::Greater => {
                    it2.next();
                }
            };
        }
        ret
    }
    fn inter(&self, other: &Self) -> Charset {
        let mut it1 = self.0.iter().peekable();
        let mut it2 = other.0.iter().peekable();
        let mut ret = Vec::new();
        while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) {
            match c1.cmp(c2) {
                Ordering::Equal => {
                    ret.push(**c1);
                    it1.next();
                    it2.next();
                }
                Ordering::Less => {
                    it1.next();
                }
                Ordering::Greater => {
                    it2.next();
                }
            };
        }
        Self(ret)
    }
    fn union(&self, other: &Self) -> Charset {
        let mut it1 = self.0.iter().peekable();
        let mut it2 = other.0.iter().peekable();
        let mut ret = Vec::new();
        while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) {
            match c1.cmp(c2) {
                Ordering::Equal => {
                    ret.push(**c1);
                    it1.next();
                    it2.next();
                }
                Ordering::Less => {
                    ret.push(**c1);
                    it1.next();
                }
                Ordering::Greater => {
                    ret.push(**c2);
                    it2.next();
                }
            };
        }
        while let Some(c) = it1.peek() {
            ret.push(**c);
            it1.next();
        }
        while let Some(c) = it2.peek() {
            ret.push(**c);
            it2.next();
        }
        Self(ret)
    }
    fn diff(&self, other: &Self) -> Charset {
        let mut it1 = self.0.iter().peekable();
        let mut it2 = other.0.iter().peekable();
        let mut ret = Vec::new();
        while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) {
            match c1.cmp(c2) {
                Ordering::Equal => {
                    it1.next();
                    it2.next();
                }
                Ordering::Less => {
                    ret.push(**c1);
                    it1.next();
                }
                Ordering::Greater => {
                    it2.next();
                }
            };
        }
        while let Some(c) = it1.peek() {
            ret.push(**c);
            it1.next();
        }
        Self(ret)
    }
    fn len(&self) -> usize {
        self.0.len()
    }
    fn is_empty(&self) -> bool {
        self.0.is_empty()
    }
    fn chars(&self) -> &[char] {
        &self.0
    }
    fn contains(&self, c: char) -> bool {
        self.0.binary_search(&c).is_ok()
    }
    fn to_string(&self) -> String {
        self.0.iter().collect::<String>()
    }
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_charset() {
        let c1 = Charset::new("azerty");
        let c2 = Charset::new("uiopqsqdf");
        let c3 = Charset::new("hello, world");

        assert!(!c1.intersects(&c2));
        assert!(c1.intersects(&c3));
        assert!(c2.intersects(&c3));

        assert_eq!(c1.inter_len(&c2), 0);
        assert_eq!(c1.inter_len(&c3), 2);
        assert_eq!(c2.inter_len(&c3), 2);

        assert_eq!(c1.inter(&c2), Charset::new(""));
        assert_eq!(c1.inter(&c3), Charset::new("er"));
        assert_eq!(c2.inter(&c3), Charset::new("od"));

        assert_eq!(c1.union(&c2), Charset::new("azertyuiopqsdf"));
        assert_eq!(c1.union(&c3), Charset::new("azertyhello, world"));
        assert_eq!(c2.union(&c3), Charset::new("uiopqsdfhello, world"));

        assert_eq!(c1.diff(&c2), Charset::new("azerty"));
        assert_eq!(c1.diff(&c3), Charset::new("azty"));
        assert_eq!(c2.diff(&c3), Charset::new("uipqsf"));

        assert_eq!(c2.diff(&c1), Charset::new("uiopqsdf"));
        assert_eq!(c3.diff(&c1), Charset::new("hllo, wold"));
        assert_eq!(c3.diff(&c2), Charset::new("hell, wrl"));
    }
}