diff options
Diffstat (limited to 'src/charset.rs')
-rw-r--r-- | src/charset.rs | 190 |
1 files changed, 190 insertions, 0 deletions
diff --git a/src/charset.rs b/src/charset.rs new file mode 100644 index 0000000..a79eddb --- /dev/null +++ b/src/charset.rs @@ -0,0 +1,190 @@ +use std::cmp::Ordering; + +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Eq, PartialEq, Hash, Clone, Serialize, Deserialize, Default)] +pub struct Charset(pub Vec<char>); + +impl Charset { + pub fn new<S: AsRef<str>>(s: S) -> Self { + let mut chars = s.as_ref().chars().collect::<Vec<_>>(); + chars.sort(); + chars.dedup(); + Self(chars) + } + pub fn from_iter<S: IntoIterator<Item = char>>(s: S) -> Self { + let mut chars = s.into_iter().collect::<Vec<_>>(); + chars.sort(); + chars.dedup(); + Self(chars) + } + pub fn intersects(&self, other: &Self) -> bool { + let mut it1 = self.0.iter().peekable(); + let mut it2 = other.0.iter().peekable(); + while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) { + match c1.cmp(c2) { + Ordering::Equal => return true, + Ordering::Less => it1.next(), + Ordering::Greater => it2.next(), + }; + } + false + } + pub fn inter_len(&self, other: &Self) -> usize { + if other.len() > 20 * self.len() { + // alternative path + return self + .0 + .iter() + .filter(|x| other.0.binary_search(x).is_ok()) + .count(); + } + let mut it1 = self.0.iter().peekable(); + let mut it2 = other.0.iter().peekable(); + let mut ret = 0; + while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) { + match c1.cmp(c2) { + Ordering::Equal => { + ret += 1; + it1.next(); + it2.next(); + } + Ordering::Less => { + it1.next(); + } + Ordering::Greater => { + it2.next(); + } + }; + } + ret + } + pub fn inter(&self, other: &Self) -> Charset { + let mut it1 = self.0.iter().peekable(); + let mut it2 = other.0.iter().peekable(); + let mut ret = Vec::new(); + while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) { + match c1.cmp(c2) { + Ordering::Equal => { + ret.push(**c1); + it1.next(); + it2.next(); + } + Ordering::Less => { + it1.next(); + } + Ordering::Greater => { + it2.next(); + } + }; + } + Self(ret) + } + pub fn union(&self, other: &Self) -> Charset { + let mut it1 = self.0.iter().peekable(); + let mut it2 = other.0.iter().peekable(); + let mut ret = Vec::new(); + while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) { + match c1.cmp(c2) { + Ordering::Equal => { + ret.push(**c1); + it1.next(); + it2.next(); + } + Ordering::Less => { + ret.push(**c1); + it1.next(); + } + Ordering::Greater => { + ret.push(**c2); + it2.next(); + } + }; + } + while let Some(c) = it1.peek() { + ret.push(**c); + it1.next(); + } + while let Some(c) = it2.peek() { + ret.push(**c); + it2.next(); + } + Self(ret) + } + pub fn diff(&self, other: &Self) -> Charset { + let mut it1 = self.0.iter().peekable(); + let mut it2 = other.0.iter().peekable(); + let mut ret = Vec::new(); + while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) { + match c1.cmp(c2) { + Ordering::Equal => { + it1.next(); + it2.next(); + } + Ordering::Less => { + ret.push(**c1); + it1.next(); + } + Ordering::Greater => { + it2.next(); + } + }; + } + while let Some(c) = it1.peek() { + ret.push(**c); + it1.next(); + } + Self(ret) + } + pub fn len(&self) -> usize { + self.0.len() + } + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + pub fn chars(&self) -> &[char] { + &self.0 + } + pub fn contains(&self, c: char) -> bool { + self.0.binary_search(&c).is_ok() + } + pub fn to_string(&self) -> String { + self.0.iter().collect::<String>() + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_charset() { + let c1 = Charset::new("azerty"); + let c2 = Charset::new("uiopqsqdf"); + let c3 = Charset::new("hello, world"); + + assert!(!c1.intersects(&c2)); + assert!(c1.intersects(&c3)); + assert!(c2.intersects(&c3)); + + assert_eq!(c1.inter_len(&c2), 0); + assert_eq!(c1.inter_len(&c3), 2); + assert_eq!(c2.inter_len(&c3), 2); + + assert_eq!(c1.inter(&c2), Charset::new("")); + assert_eq!(c1.inter(&c3), Charset::new("er")); + assert_eq!(c2.inter(&c3), Charset::new("od")); + + assert_eq!(c1.union(&c2), Charset::new("azertyuiopqsdf")); + assert_eq!(c1.union(&c3), Charset::new("azertyhello, world")); + assert_eq!(c2.union(&c3), Charset::new("uiopqsdfhello, world")); + + assert_eq!(c1.diff(&c2), Charset::new("azerty")); + assert_eq!(c1.diff(&c3), Charset::new("azty")); + assert_eq!(c2.diff(&c3), Charset::new("uipqsf")); + + assert_eq!(c2.diff(&c1), Charset::new("uiopqsdf")); + assert_eq!(c3.diff(&c1), Charset::new("hllo, wold")); + assert_eq!(c3.diff(&c2), Charset::new("hell, wrl")); + } +} |