use std::cmp::Ordering; use serde::{Deserialize, Serialize}; #[derive(Debug, Eq, PartialEq, Hash, Clone, Default, Serialize, Deserialize)] #[serde(into = "String", from = "CharsetSerializedEnum")] pub struct Charset(pub Vec); impl Charset { pub fn new>(s: S) -> Self { let mut chars = s.as_ref().chars().collect::>(); chars.sort(); chars.dedup(); Self(chars) } pub fn from_iter>(s: S) -> Self { let mut chars = s.into_iter().collect::>(); chars.sort(); chars.dedup(); Self(chars) } pub fn iter(&self) -> impl Iterator + '_ { self.0.iter().copied() } pub fn intersects(&self, other: &Self) -> bool { let mut it1 = self.0.iter().peekable(); let mut it2 = other.0.iter().peekable(); while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) { match c1.cmp(c2) { Ordering::Equal => return true, Ordering::Less => it1.next(), Ordering::Greater => it2.next(), }; } false } pub fn inter_len(&self, other: &Self) -> usize { if other.len() > 20 * self.len() { // alternative path return self .0 .iter() .filter(|x| other.0.binary_search(x).is_ok()) .count(); } let mut it1 = self.0.iter().peekable(); let mut it2 = other.0.iter().peekable(); let mut ret = 0; while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) { match c1.cmp(c2) { Ordering::Equal => { ret += 1; it1.next(); it2.next(); } Ordering::Less => { it1.next(); } Ordering::Greater => { it2.next(); } }; } ret } pub fn inter(&self, other: &Self) -> Charset { let mut it1 = self.0.iter().peekable(); let mut it2 = other.0.iter().peekable(); let mut ret = Vec::new(); while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) { match c1.cmp(c2) { Ordering::Equal => { ret.push(**c1); it1.next(); it2.next(); } Ordering::Less => { it1.next(); } Ordering::Greater => { it2.next(); } }; } Self(ret) } pub fn union(&self, other: &Self) -> Charset { let mut it1 = self.0.iter().peekable(); let mut it2 = other.0.iter().peekable(); let mut ret = Vec::new(); while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) { match c1.cmp(c2) { Ordering::Equal => { ret.push(**c1); it1.next(); it2.next(); } Ordering::Less => { ret.push(**c1); it1.next(); } Ordering::Greater => { ret.push(**c2); it2.next(); } }; } while let Some(c) = it1.peek() { ret.push(**c); it1.next(); } while let Some(c) = it2.peek() { ret.push(**c); it2.next(); } Self(ret) } pub fn diff(&self, other: &Self) -> Charset { let mut it1 = self.0.iter().peekable(); let mut it2 = other.0.iter().peekable(); let mut ret = Vec::new(); while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) { match c1.cmp(c2) { Ordering::Equal => { it1.next(); it2.next(); } Ordering::Less => { ret.push(**c1); it1.next(); } Ordering::Greater => { it2.next(); } }; } while let Some(c) = it1.peek() { ret.push(**c); it1.next(); } Self(ret) } pub fn len(&self) -> usize { self.0.len() } pub fn is_empty(&self) -> bool { self.0.is_empty() } pub fn chars(&self) -> &[char] { &self.0 } pub fn contains(&self, c: char) -> bool { self.0.binary_search(&c).is_ok() } pub fn to_string(&self) -> String { self.0.iter().collect::() } } impl Into for Charset { fn into(self) -> String { self.to_string() } } #[derive(Deserialize)] #[serde(untagged)] enum CharsetSerializedEnum { Array(Vec), Str(String), } impl From for Charset { fn from(s: CharsetSerializedEnum) -> Charset { match s { CharsetSerializedEnum::Array(a) => Charset::from_iter(a.into_iter()), CharsetSerializedEnum::Str(s) => Charset::new(&s), } } } #[cfg(test)] mod test { use super::*; #[test] fn test_charset() { let c1 = Charset::new("azerty"); let c2 = Charset::new("uiopqsqdf"); let c3 = Charset::new("hello, world"); assert!(!c1.intersects(&c2)); assert!(c1.intersects(&c3)); assert!(c2.intersects(&c3)); assert_eq!(c1.inter_len(&c2), 0); assert_eq!(c1.inter_len(&c3), 2); assert_eq!(c2.inter_len(&c3), 2); assert_eq!(c1.inter(&c2), Charset::new("")); assert_eq!(c1.inter(&c3), Charset::new("er")); assert_eq!(c2.inter(&c3), Charset::new("od")); assert_eq!(c1.union(&c2), Charset::new("azertyuiopqsdf")); assert_eq!(c1.union(&c3), Charset::new("azertyhello, world")); assert_eq!(c2.union(&c3), Charset::new("uiopqsdfhello, world")); assert_eq!(c1.diff(&c2), Charset::new("azerty")); assert_eq!(c1.diff(&c3), Charset::new("azty")); assert_eq!(c2.diff(&c3), Charset::new("uipqsf")); assert_eq!(c2.diff(&c1), Charset::new("uiopqsdf")); assert_eq!(c3.diff(&c1), Charset::new("hllo, wold")); assert_eq!(c3.diff(&c2), Charset::new("hell, wrl")); } }