aboutsummaryrefslogblamecommitdiff
path: root/src/charset.rs
blob: b322324622180abed627ca94606fbc979683b5c0 (plain) (tree)
1
2
3
4
5
6



                                    

                                                                             














                                                                  


                                                           






































































































































                                                                   





















                                                                                 


































                                                                        
use std::cmp::Ordering;

use serde::{Deserialize, Serialize};

#[derive(Debug, Eq, PartialEq, Hash, Clone, Default, Serialize, Deserialize)]
#[serde(into = "String", from = "CharsetSerializedEnum")]
pub struct Charset(pub Vec<char>);

impl Charset {
    pub fn new<S: AsRef<str>>(s: S) -> Self {
        let mut chars = s.as_ref().chars().collect::<Vec<_>>();
        chars.sort();
        chars.dedup();
        Self(chars)
    }
    pub fn from_iter<S: IntoIterator<Item = char>>(s: S) -> Self {
        let mut chars = s.into_iter().collect::<Vec<_>>();
        chars.sort();
        chars.dedup();
        Self(chars)
    }
    pub fn iter(&self) -> impl Iterator<Item = char> + '_ {
        self.0.iter().copied()
    }
    pub fn intersects(&self, other: &Self) -> bool {
        let mut it1 = self.0.iter().peekable();
        let mut it2 = other.0.iter().peekable();
        while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) {
            match c1.cmp(c2) {
                Ordering::Equal => return true,
                Ordering::Less => it1.next(),
                Ordering::Greater => it2.next(),
            };
        }
        false
    }
    pub fn inter_len(&self, other: &Self) -> usize {
        if other.len() > 20 * self.len() {
            // alternative path
            return self
                .0
                .iter()
                .filter(|x| other.0.binary_search(x).is_ok())
                .count();
        }
        let mut it1 = self.0.iter().peekable();
        let mut it2 = other.0.iter().peekable();
        let mut ret = 0;
        while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) {
            match c1.cmp(c2) {
                Ordering::Equal => {
                    ret += 1;
                    it1.next();
                    it2.next();
                }
                Ordering::Less => {
                    it1.next();
                }
                Ordering::Greater => {
                    it2.next();
                }
            };
        }
        ret
    }
    pub fn inter(&self, other: &Self) -> Charset {
        let mut it1 = self.0.iter().peekable();
        let mut it2 = other.0.iter().peekable();
        let mut ret = Vec::new();
        while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) {
            match c1.cmp(c2) {
                Ordering::Equal => {
                    ret.push(**c1);
                    it1.next();
                    it2.next();
                }
                Ordering::Less => {
                    it1.next();
                }
                Ordering::Greater => {
                    it2.next();
                }
            };
        }
        Self(ret)
    }
    pub fn union(&self, other: &Self) -> Charset {
        let mut it1 = self.0.iter().peekable();
        let mut it2 = other.0.iter().peekable();
        let mut ret = Vec::new();
        while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) {
            match c1.cmp(c2) {
                Ordering::Equal => {
                    ret.push(**c1);
                    it1.next();
                    it2.next();
                }
                Ordering::Less => {
                    ret.push(**c1);
                    it1.next();
                }
                Ordering::Greater => {
                    ret.push(**c2);
                    it2.next();
                }
            };
        }
        while let Some(c) = it1.peek() {
            ret.push(**c);
            it1.next();
        }
        while let Some(c) = it2.peek() {
            ret.push(**c);
            it2.next();
        }
        Self(ret)
    }
    pub fn diff(&self, other: &Self) -> Charset {
        let mut it1 = self.0.iter().peekable();
        let mut it2 = other.0.iter().peekable();
        let mut ret = Vec::new();
        while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) {
            match c1.cmp(c2) {
                Ordering::Equal => {
                    it1.next();
                    it2.next();
                }
                Ordering::Less => {
                    ret.push(**c1);
                    it1.next();
                }
                Ordering::Greater => {
                    it2.next();
                }
            };
        }
        while let Some(c) = it1.peek() {
            ret.push(**c);
            it1.next();
        }
        Self(ret)
    }
    pub fn len(&self) -> usize {
        self.0.len()
    }
    pub fn is_empty(&self) -> bool {
        self.0.is_empty()
    }
    pub fn chars(&self) -> &[char] {
        &self.0
    }
    pub fn contains(&self, c: char) -> bool {
        self.0.binary_search(&c).is_ok()
    }
    pub fn to_string(&self) -> String {
        self.0.iter().collect::<String>()
    }
}

impl Into<String> for Charset {
    fn into(self) -> String {
        self.to_string()
    }
}

#[derive(Deserialize)]
#[serde(untagged)]
enum CharsetSerializedEnum {
    Array(Vec<char>),
    Str(String),
}

impl From<CharsetSerializedEnum> for Charset {
    fn from(s: CharsetSerializedEnum) -> Charset {
        match s {
            CharsetSerializedEnum::Array(a) => Charset::from_iter(a.into_iter()),
            CharsetSerializedEnum::Str(s) => Charset::new(&s),
        }
    }
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_charset() {
        let c1 = Charset::new("azerty");
        let c2 = Charset::new("uiopqsqdf");
        let c3 = Charset::new("hello, world");

        assert!(!c1.intersects(&c2));
        assert!(c1.intersects(&c3));
        assert!(c2.intersects(&c3));

        assert_eq!(c1.inter_len(&c2), 0);
        assert_eq!(c1.inter_len(&c3), 2);
        assert_eq!(c2.inter_len(&c3), 2);

        assert_eq!(c1.inter(&c2), Charset::new(""));
        assert_eq!(c1.inter(&c3), Charset::new("er"));
        assert_eq!(c2.inter(&c3), Charset::new("od"));

        assert_eq!(c1.union(&c2), Charset::new("azertyuiopqsdf"));
        assert_eq!(c1.union(&c3), Charset::new("azertyhello, world"));
        assert_eq!(c2.union(&c3), Charset::new("uiopqsdfhello, world"));

        assert_eq!(c1.diff(&c2), Charset::new("azerty"));
        assert_eq!(c1.diff(&c3), Charset::new("azty"));
        assert_eq!(c2.diff(&c3), Charset::new("uipqsf"));

        assert_eq!(c2.diff(&c1), Charset::new("uiopqsdf"));
        assert_eq!(c3.diff(&c1), Charset::new("hllo, wold"));
        assert_eq!(c3.diff(&c2), Charset::new("hell, wrl"));
    }
}