aboutsummaryrefslogtreecommitdiff
path: root/src/charset.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/charset.rs')
-rw-r--r--src/charset.rs190
1 files changed, 190 insertions, 0 deletions
diff --git a/src/charset.rs b/src/charset.rs
new file mode 100644
index 0000000..a79eddb
--- /dev/null
+++ b/src/charset.rs
@@ -0,0 +1,190 @@
+use std::cmp::Ordering;
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Eq, PartialEq, Hash, Clone, Serialize, Deserialize, Default)]
+pub struct Charset(pub Vec<char>);
+
+impl Charset {
+ pub fn new<S: AsRef<str>>(s: S) -> Self {
+ let mut chars = s.as_ref().chars().collect::<Vec<_>>();
+ chars.sort();
+ chars.dedup();
+ Self(chars)
+ }
+ pub fn from_iter<S: IntoIterator<Item = char>>(s: S) -> Self {
+ let mut chars = s.into_iter().collect::<Vec<_>>();
+ chars.sort();
+ chars.dedup();
+ Self(chars)
+ }
+ pub fn intersects(&self, other: &Self) -> bool {
+ let mut it1 = self.0.iter().peekable();
+ let mut it2 = other.0.iter().peekable();
+ while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) {
+ match c1.cmp(c2) {
+ Ordering::Equal => return true,
+ Ordering::Less => it1.next(),
+ Ordering::Greater => it2.next(),
+ };
+ }
+ false
+ }
+ pub fn inter_len(&self, other: &Self) -> usize {
+ if other.len() > 20 * self.len() {
+ // alternative path
+ return self
+ .0
+ .iter()
+ .filter(|x| other.0.binary_search(x).is_ok())
+ .count();
+ }
+ let mut it1 = self.0.iter().peekable();
+ let mut it2 = other.0.iter().peekable();
+ let mut ret = 0;
+ while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) {
+ match c1.cmp(c2) {
+ Ordering::Equal => {
+ ret += 1;
+ it1.next();
+ it2.next();
+ }
+ Ordering::Less => {
+ it1.next();
+ }
+ Ordering::Greater => {
+ it2.next();
+ }
+ };
+ }
+ ret
+ }
+ pub fn inter(&self, other: &Self) -> Charset {
+ let mut it1 = self.0.iter().peekable();
+ let mut it2 = other.0.iter().peekable();
+ let mut ret = Vec::new();
+ while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) {
+ match c1.cmp(c2) {
+ Ordering::Equal => {
+ ret.push(**c1);
+ it1.next();
+ it2.next();
+ }
+ Ordering::Less => {
+ it1.next();
+ }
+ Ordering::Greater => {
+ it2.next();
+ }
+ };
+ }
+ Self(ret)
+ }
+ pub fn union(&self, other: &Self) -> Charset {
+ let mut it1 = self.0.iter().peekable();
+ let mut it2 = other.0.iter().peekable();
+ let mut ret = Vec::new();
+ while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) {
+ match c1.cmp(c2) {
+ Ordering::Equal => {
+ ret.push(**c1);
+ it1.next();
+ it2.next();
+ }
+ Ordering::Less => {
+ ret.push(**c1);
+ it1.next();
+ }
+ Ordering::Greater => {
+ ret.push(**c2);
+ it2.next();
+ }
+ };
+ }
+ while let Some(c) = it1.peek() {
+ ret.push(**c);
+ it1.next();
+ }
+ while let Some(c) = it2.peek() {
+ ret.push(**c);
+ it2.next();
+ }
+ Self(ret)
+ }
+ pub fn diff(&self, other: &Self) -> Charset {
+ let mut it1 = self.0.iter().peekable();
+ let mut it2 = other.0.iter().peekable();
+ let mut ret = Vec::new();
+ while let (Some(c1), Some(c2)) = (it1.peek(), it2.peek()) {
+ match c1.cmp(c2) {
+ Ordering::Equal => {
+ it1.next();
+ it2.next();
+ }
+ Ordering::Less => {
+ ret.push(**c1);
+ it1.next();
+ }
+ Ordering::Greater => {
+ it2.next();
+ }
+ };
+ }
+ while let Some(c) = it1.peek() {
+ ret.push(**c);
+ it1.next();
+ }
+ Self(ret)
+ }
+ pub fn len(&self) -> usize {
+ self.0.len()
+ }
+ pub fn is_empty(&self) -> bool {
+ self.0.is_empty()
+ }
+ pub fn chars(&self) -> &[char] {
+ &self.0
+ }
+ pub fn contains(&self, c: char) -> bool {
+ self.0.binary_search(&c).is_ok()
+ }
+ pub fn to_string(&self) -> String {
+ self.0.iter().collect::<String>()
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn test_charset() {
+ let c1 = Charset::new("azerty");
+ let c2 = Charset::new("uiopqsqdf");
+ let c3 = Charset::new("hello, world");
+
+ assert!(!c1.intersects(&c2));
+ assert!(c1.intersects(&c3));
+ assert!(c2.intersects(&c3));
+
+ assert_eq!(c1.inter_len(&c2), 0);
+ assert_eq!(c1.inter_len(&c3), 2);
+ assert_eq!(c2.inter_len(&c3), 2);
+
+ assert_eq!(c1.inter(&c2), Charset::new(""));
+ assert_eq!(c1.inter(&c3), Charset::new("er"));
+ assert_eq!(c2.inter(&c3), Charset::new("od"));
+
+ assert_eq!(c1.union(&c2), Charset::new("azertyuiopqsdf"));
+ assert_eq!(c1.union(&c3), Charset::new("azertyhello, world"));
+ assert_eq!(c2.union(&c3), Charset::new("uiopqsdfhello, world"));
+
+ assert_eq!(c1.diff(&c2), Charset::new("azerty"));
+ assert_eq!(c1.diff(&c3), Charset::new("azty"));
+ assert_eq!(c2.diff(&c3), Charset::new("uipqsf"));
+
+ assert_eq!(c2.diff(&c1), Charset::new("uiopqsdf"));
+ assert_eq!(c3.diff(&c1), Charset::new("hllo, wold"));
+ assert_eq!(c3.diff(&c2), Charset::new("hell, wrl"));
+ }
+}