From f58904f5bb3dbd429555c406c867f850654843a6 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Sat, 6 Jan 2024 18:01:44 +0100 Subject: Search can now filter on index data --- src/imap/search.rs | 166 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 152 insertions(+), 14 deletions(-) (limited to 'src/imap/search.rs') diff --git a/src/imap/search.rs b/src/imap/search.rs index b3c6b05..2a1119c 100644 --- a/src/imap/search.rs +++ b/src/imap/search.rs @@ -3,6 +3,9 @@ use imap_codec::imap_types::search::SearchKey; use imap_codec::imap_types::sequence::{SeqOrUid, Sequence, SequenceSet}; use std::num::NonZeroU32; +use crate::mail::query::{QueryScope, QueryResult}; +use crate::imap::index::MailIndex; + pub enum SeqType { Undefined, NonUid, @@ -54,6 +57,10 @@ impl<'a> Criteria<'a> { tracing::debug!( "using AND in a search request is slow: no intersection is performed" ); + // As we perform no intersection, we don't care if we mix uid or id. + // We only keep the smallest range, being it ID or UID, depending of + // which one has the less items. This is an approximation as UID ranges + // can have holes while ID ones can't. search_list .as_ref() .iter() @@ -72,31 +79,119 @@ impl<'a> Criteria<'a> { /// Not really clever as we can have cases where we filter out /// the email before needing to inspect its meta. /// But for now we are seeking the most basic/stupid algorithm. - pub fn need_meta(&self) -> bool { + pub fn query_scope(&self) -> QueryScope { use SearchKey::*; match self.0 { // IMF Headers Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_) - | Subject(_) | To(_) => true, + | Subject(_) | To(_) => QueryScope::Partial, // Internal Date is also stored in MailMeta - Before(_) | On(_) | Since(_) => true, + Before(_) | On(_) | Since(_) => QueryScope::Partial, // Message size is also stored in MailMeta - Larger(_) | Smaller(_) => true, - And(and_list) => and_list.as_ref().iter().any(|sk| Criteria(sk).need_meta()), - Not(inner) => Criteria(inner).need_meta(), - Or(left, right) => Criteria(left).need_meta() || Criteria(right).need_meta(), - _ => false, + Larger(_) | Smaller(_) => QueryScope::Partial, + // Text and Body require that we fetch the full content! + Text(_) | Body(_) => QueryScope::Full, + And(and_list) => and_list.as_ref().iter().fold(QueryScope::Index, |prev, sk| { + prev.union(&Criteria(sk).query_scope()) + }), + Not(inner) => Criteria(inner).query_scope(), + Or(left, right) => Criteria(left).query_scope().union(&Criteria(right).query_scope()), + _ => QueryScope::Index, } } - pub fn need_body(&self) -> bool { + pub fn filter_on_idx<'b>(&self, midx_list: &[MailIndex<'b>]) -> Vec> { + midx_list + .iter() + .filter(|x| self.is_keep_on_idx(x).is_keep()) + .map(|x| (*x).clone()) + .collect::>() + } + + pub fn filter_on_query(&self, midx_list: &[MailIndex], query_result: &Vec>) -> Vec { + unimplemented!(); + } + + // ---- + + /// Here we are doing a partial filtering: we do not have access + /// to the headers or to the body, so every time we encounter a rule + /// based on them, we need to keep it. + /// + /// @TODO Could be optimized on a per-email basis by also returning the QueryScope + /// when more information is needed! + fn is_keep_on_idx(&self, midx: &MailIndex) -> PartialDecision { use SearchKey::*; match self.0 { - Text(_) | Body(_) => true, - And(and_list) => and_list.as_ref().iter().any(|sk| Criteria(sk).need_body()), - Not(inner) => Criteria(inner).need_body(), - Or(left, right) => Criteria(left).need_body() || Criteria(right).need_body(), - _ => false, + // Combinator logic + And(expr_list) => expr_list + .as_ref() + .iter() + .fold(PartialDecision::Keep, |acc, cur| acc.and(&Criteria(cur).is_keep_on_idx(midx))), + Or(left, right) => { + let left_decision = Criteria(left).is_keep_on_idx(midx); + let right_decision = Criteria(right).is_keep_on_idx(midx); + left_decision.or(&right_decision) + } + Not(expr) => Criteria(expr).is_keep_on_idx(midx).not(), + All => PartialDecision::Keep, + + // Sequence logic + SequenceSet(seq_set) => seq_set.0.as_ref().iter().fold(PartialDecision::Discard, |acc, seq| { + let local_decision: PartialDecision = midx.is_in_sequence_i(seq).into(); + acc.or(&local_decision) + }), + Uid(seq_set) => seq_set.0.as_ref().iter().fold(PartialDecision::Discard, |acc, seq| { + let local_decision: PartialDecision = midx.is_in_sequence_uid(seq).into(); + acc.or(&local_decision) + }), + + // Flag logic + Answered => midx.is_flag_set("\\Answered").into(), + Deleted => midx.is_flag_set("\\Deleted").into(), + Draft => midx.is_flag_set("\\Draft").into(), + Flagged => midx.is_flag_set("\\Flagged").into(), + Keyword(kw) => midx.is_flag_set(kw.inner()).into(), + New => { + let is_recent: PartialDecision = midx.is_flag_set("\\Recent").into(); + let is_seen: PartialDecision = midx.is_flag_set("\\Seen").into(); + is_recent.and(&is_seen.not()) + }, + Old => { + let is_recent: PartialDecision = midx.is_flag_set("\\Recent").into(); + is_recent.not() + }, + Recent => midx.is_flag_set("\\Recent").into(), + Seen => midx.is_flag_set("\\Seen").into(), + Unanswered => { + let is_answered: PartialDecision = midx.is_flag_set("\\Recent").into(); + is_answered.not() + }, + Undeleted => { + let is_deleted: PartialDecision = midx.is_flag_set("\\Deleted").into(); + is_deleted.not() + }, + Undraft => { + let is_draft: PartialDecision = midx.is_flag_set("\\Draft").into(); + is_draft.not() + }, + Unflagged => { + let is_flagged: PartialDecision = midx.is_flag_set("\\Flagged").into(); + is_flagged.not() + }, + Unkeyword(kw) => { + let is_keyword_set: PartialDecision = midx.is_flag_set(kw.inner()).into(); + is_keyword_set.not() + }, + Unseen => { + let is_seen: PartialDecision = midx.is_flag_set("\\Seen").into(); + is_seen.not() + }, + + // All the stuff we can't evaluate yet + Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_) + | Subject(_) | To(_) | Before(_) | On(_) | Since(_) | Larger(_) | Smaller(_) + | Text(_) | Body(_) => PartialDecision::Postpone, } } } @@ -128,3 +223,46 @@ fn approx_sequence_size(seq: &Sequence) -> u64 { } } } + +enum PartialDecision { + Keep, + Discard, + Postpone, +} +impl From for PartialDecision { + fn from(x: bool) -> Self { + match x { + true => PartialDecision::Keep, + _ => PartialDecision::Discard, + } + } +} +impl PartialDecision { + fn not(&self) -> Self { + match self { + Self::Keep => Self::Discard, + Self::Discard => Self::Keep, + Self::Postpone => Self::Postpone, + } + } + + fn or(&self, other: &Self) -> Self { + match (self, other) { + (Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone, + (Self::Keep, _) | (_, Self::Keep) => Self::Keep, + (Self::Discard, Self::Discard) => Self::Discard, + } + } + + fn and(&self, other: &Self) -> Self { + match (self, other) { + (Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone, + (Self::Discard, _) | (_, Self::Discard) => Self::Discard, + (Self::Keep, Self::Keep) => Self::Keep, + } + } + + fn is_keep(&self) -> bool { + !matches!(self, Self::Discard) + } +} -- cgit v1.2.3 From 870de493c84c6c3134d14ee8a234f124360354a7 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Sat, 6 Jan 2024 18:51:21 +0100 Subject: Search is made more clear --- src/imap/search.rs | 192 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 130 insertions(+), 62 deletions(-) (limited to 'src/imap/search.rs') diff --git a/src/imap/search.rs b/src/imap/search.rs index 2a1119c..0ab0300 100644 --- a/src/imap/search.rs +++ b/src/imap/search.rs @@ -17,6 +17,7 @@ impl SeqType { } } + pub struct Criteria<'a>(pub &'a SearchKey<'a>); impl<'a> Criteria<'a> { /// Returns a set of email identifiers that is greater or equal @@ -82,6 +83,14 @@ impl<'a> Criteria<'a> { pub fn query_scope(&self) -> QueryScope { use SearchKey::*; match self.0 { + // Combinators + And(and_list) => and_list.as_ref().iter().fold(QueryScope::Index, |prev, sk| { + prev.union(&Criteria(sk).query_scope()) + }), + Not(inner) => Criteria(inner).query_scope(), + Or(left, right) => Criteria(left).query_scope().union(&Criteria(right).query_scope()), + All => QueryScope::Index, + // IMF Headers Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_) | Subject(_) | To(_) => QueryScope::Partial, @@ -91,25 +100,34 @@ impl<'a> Criteria<'a> { Larger(_) | Smaller(_) => QueryScope::Partial, // Text and Body require that we fetch the full content! Text(_) | Body(_) => QueryScope::Full, - And(and_list) => and_list.as_ref().iter().fold(QueryScope::Index, |prev, sk| { - prev.union(&Criteria(sk).query_scope()) - }), - Not(inner) => Criteria(inner).query_scope(), - Or(left, right) => Criteria(left).query_scope().union(&Criteria(right).query_scope()), + _ => QueryScope::Index, } } - pub fn filter_on_idx<'b>(&self, midx_list: &[MailIndex<'b>]) -> Vec> { - midx_list + /// Returns emails that we now for sure we want to keep + /// but also a second list of emails we need to investigate further by + /// fetching some remote data + pub fn filter_on_idx<'b>(&self, midx_list: &[MailIndex<'b>]) -> (Vec>, Vec>) { + let (p1, p2): (Vec<_>, Vec<_>) = midx_list .iter() - .filter(|x| self.is_keep_on_idx(x).is_keep()) - .map(|x| (*x).clone()) - .collect::>() + .map(|x| (x, self.is_keep_on_idx(x))) + .filter(|(_midx, decision)| decision.is_keep()) + .map(|(midx, decision)| ((*midx).clone(), decision)) + .partition(|(_midx, decision)| matches!(decision, PartialDecision::Keep)); + + let to_keep = p1.into_iter().map(|(v, _)| v).collect(); + let to_fetch = p2.into_iter().map(|(v, _)| v).collect(); + (to_keep, to_fetch) } - pub fn filter_on_query(&self, midx_list: &[MailIndex], query_result: &Vec>) -> Vec { - unimplemented!(); + pub fn filter_on_query<'b>(&self, midx_list: &[MailIndex<'b>], query_result: &Vec>) -> Vec> { + midx_list + .iter() + .zip(query_result.iter()) + .filter(|(midx, qr)| self.is_keep_on_query(midx, qr)) + .map(|(midx, _qr)| midx.clone()) + .collect() } // ---- @@ -137,65 +155,37 @@ impl<'a> Criteria<'a> { All => PartialDecision::Keep, // Sequence logic - SequenceSet(seq_set) => seq_set.0.as_ref().iter().fold(PartialDecision::Discard, |acc, seq| { - let local_decision: PartialDecision = midx.is_in_sequence_i(seq).into(); - acc.or(&local_decision) - }), - Uid(seq_set) => seq_set.0.as_ref().iter().fold(PartialDecision::Discard, |acc, seq| { - let local_decision: PartialDecision = midx.is_in_sequence_uid(seq).into(); - acc.or(&local_decision) - }), - - // Flag logic - Answered => midx.is_flag_set("\\Answered").into(), - Deleted => midx.is_flag_set("\\Deleted").into(), - Draft => midx.is_flag_set("\\Draft").into(), - Flagged => midx.is_flag_set("\\Flagged").into(), - Keyword(kw) => midx.is_flag_set(kw.inner()).into(), - New => { - let is_recent: PartialDecision = midx.is_flag_set("\\Recent").into(); - let is_seen: PartialDecision = midx.is_flag_set("\\Seen").into(); - is_recent.and(&is_seen.not()) - }, - Old => { - let is_recent: PartialDecision = midx.is_flag_set("\\Recent").into(); - is_recent.not() - }, - Recent => midx.is_flag_set("\\Recent").into(), - Seen => midx.is_flag_set("\\Seen").into(), - Unanswered => { - let is_answered: PartialDecision = midx.is_flag_set("\\Recent").into(); - is_answered.not() - }, - Undeleted => { - let is_deleted: PartialDecision = midx.is_flag_set("\\Deleted").into(); - is_deleted.not() - }, - Undraft => { - let is_draft: PartialDecision = midx.is_flag_set("\\Draft").into(); - is_draft.not() - }, - Unflagged => { - let is_flagged: PartialDecision = midx.is_flag_set("\\Flagged").into(); - is_flagged.not() - }, - Unkeyword(kw) => { - let is_keyword_set: PartialDecision = midx.is_flag_set(kw.inner()).into(); - is_keyword_set.not() - }, - Unseen => { - let is_seen: PartialDecision = midx.is_flag_set("\\Seen").into(); - is_seen.not() - }, + maybe_seq if is_sk_seq(maybe_seq) => is_keep_seq(maybe_seq, midx).into(), + maybe_flag if is_sk_flag(maybe_flag) => is_keep_flag(maybe_flag, midx).into(), // All the stuff we can't evaluate yet Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_) | Subject(_) | To(_) | Before(_) | On(_) | Since(_) | Larger(_) | Smaller(_) | Text(_) | Body(_) => PartialDecision::Postpone, + + _ => unreachable!(), + } + } + + fn is_keep_on_query(&self, midx: &MailIndex, qr: &QueryResult) -> bool { + use SearchKey::*; + match self.0 { + // Combinator logic + And(expr_list) => expr_list + .as_ref() + .iter() + .any(|cur| Criteria(cur).is_keep_on_query(midx, qr)), + Or(left, right) => { + Criteria(left).is_keep_on_query(midx, qr) || Criteria(right).is_keep_on_query(midx, qr) + } + Not(expr) => !Criteria(expr).is_keep_on_query(midx, qr), + All => true, + _ => unimplemented!(), } } } +// ---- Sequence things ---- fn sequence_set_all() -> SequenceSet { SequenceSet::from(Sequence::Range( SeqOrUid::Value(NonZeroU32::MIN), @@ -224,6 +214,8 @@ fn approx_sequence_size(seq: &Sequence) -> u64 { } } +// --- Partial decision things ---- + enum PartialDecision { Keep, Discard, @@ -266,3 +258,79 @@ impl PartialDecision { !matches!(self, Self::Discard) } } + +// ----- Search Key things --- +fn is_sk_flag(sk: &SearchKey) -> bool { + use SearchKey::*; + match sk { + Answered | Deleted | Draft | Flagged | Keyword(..) | New | Old + | Recent | Seen | Unanswered | Undeleted | Undraft + | Unflagged | Unkeyword(..) | Unseen => true, + _ => false, + } +} + +fn is_keep_flag(sk: &SearchKey, midx: &MailIndex) -> bool { + use SearchKey::*; + match sk { + Answered => midx.is_flag_set("\\Answered"), + Deleted => midx.is_flag_set("\\Deleted"), + Draft => midx.is_flag_set("\\Draft"), + Flagged => midx.is_flag_set("\\Flagged"), + Keyword(kw) => midx.is_flag_set(kw.inner()), + New => { + let is_recent = midx.is_flag_set("\\Recent"); + let is_seen = midx.is_flag_set("\\Seen"); + is_recent && !is_seen + }, + Old => { + let is_recent = midx.is_flag_set("\\Recent"); + !is_recent + }, + Recent => midx.is_flag_set("\\Recent"), + Seen => midx.is_flag_set("\\Seen"), + Unanswered => { + let is_answered = midx.is_flag_set("\\Recent"); + !is_answered + }, + Undeleted => { + let is_deleted = midx.is_flag_set("\\Deleted"); + !is_deleted + }, + Undraft => { + let is_draft = midx.is_flag_set("\\Draft"); + !is_draft + }, + Unflagged => { + let is_flagged = midx.is_flag_set("\\Flagged"); + !is_flagged + }, + Unkeyword(kw) => { + let is_keyword_set = midx.is_flag_set(kw.inner()); + !is_keyword_set + }, + Unseen => { + let is_seen = midx.is_flag_set("\\Seen"); + !is_seen + }, + + // Not flag logic + _ => unreachable!(), + } +} + +fn is_sk_seq(sk: &SearchKey) -> bool { + use SearchKey::*; + match sk { + SequenceSet(..) | Uid(..) => true, + _ => false, + } +} +fn is_keep_seq(sk: &SearchKey, midx: &MailIndex) -> bool { + use SearchKey::*; + match sk { + SequenceSet(seq_set) => seq_set.0.as_ref().iter().any(|seq| midx.is_in_sequence_i(seq)), + Uid(seq_set) => seq_set.0.as_ref().iter().any(|seq| midx.is_in_sequence_uid(seq)), + _ => unreachable!(), + } +} -- cgit v1.2.3 From ea1772df425cb7faa4628b1c6c398ae3f77fca34 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Sat, 6 Jan 2024 20:40:18 +0100 Subject: Searching on storage date is now possible --- src/imap/search.rs | 90 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 76 insertions(+), 14 deletions(-) (limited to 'src/imap/search.rs') diff --git a/src/imap/search.rs b/src/imap/search.rs index 0ab0300..0e00025 100644 --- a/src/imap/search.rs +++ b/src/imap/search.rs @@ -1,10 +1,13 @@ +use std::num::NonZeroU32; + +use anyhow::Result; use imap_codec::imap_types::core::NonEmptyVec; use imap_codec::imap_types::search::SearchKey; use imap_codec::imap_types::sequence::{SeqOrUid, Sequence, SequenceSet}; -use std::num::NonZeroU32; use crate::mail::query::{QueryScope, QueryResult}; use crate::imap::index::MailIndex; +use crate::imap::mail_view::MailView; pub enum SeqType { Undefined, @@ -121,13 +124,16 @@ impl<'a> Criteria<'a> { (to_keep, to_fetch) } - pub fn filter_on_query<'b>(&self, midx_list: &[MailIndex<'b>], query_result: &Vec>) -> Vec> { - midx_list + pub fn filter_on_query<'b>(&self, midx_list: &[MailIndex<'b>], query_result: &'b Vec>) -> Result>> { + Ok(midx_list .iter() .zip(query_result.iter()) - .filter(|(midx, qr)| self.is_keep_on_query(midx, qr)) - .map(|(midx, _qr)| midx.clone()) - .collect() + .map(|(midx, qr)| MailView::new(qr, midx.clone())) + .collect::, _>>()? + .into_iter() + .filter(|mail_view| self.is_keep_on_query(mail_view)) + .map(|mail_view| mail_view.in_idx) + .collect()) } // ---- @@ -163,24 +169,80 @@ impl<'a> Criteria<'a> { | Subject(_) | To(_) | Before(_) | On(_) | Since(_) | Larger(_) | Smaller(_) | Text(_) | Body(_) => PartialDecision::Postpone, - _ => unreachable!(), + unknown => { + tracing::error!("Unknown filter {:?}", unknown); + PartialDecision::Discard + }, } } - fn is_keep_on_query(&self, midx: &MailIndex, qr: &QueryResult) -> bool { + + /// @TODO we re-eveluate twice the same logic. The correct way would be, on each pass, + /// to simplify the searck query, by removing the elements that were already checked. + /// For example if we have AND(OR(seqid(X), body(Y)), body(X)), we can't keep for sure + /// the email, as body(x) might be false. So we need to check it. But as seqid(x) is true, + /// we could simplify the request to just body(x) and truncate the first OR. Today, we are + /// not doing that, and thus we reevaluate everything. + fn is_keep_on_query(&self, mail_view: &MailView) -> bool { use SearchKey::*; match self.0 { // Combinator logic And(expr_list) => expr_list .as_ref() .iter() - .any(|cur| Criteria(cur).is_keep_on_query(midx, qr)), + .any(|cur| Criteria(cur).is_keep_on_query(mail_view)), Or(left, right) => { - Criteria(left).is_keep_on_query(midx, qr) || Criteria(right).is_keep_on_query(midx, qr) + Criteria(left).is_keep_on_query(mail_view) || Criteria(right).is_keep_on_query(mail_view) } - Not(expr) => !Criteria(expr).is_keep_on_query(midx, qr), + Not(expr) => !Criteria(expr).is_keep_on_query(mail_view), All => true, - _ => unimplemented!(), + + // Reevaluating our previous logic... + maybe_seq if is_sk_seq(maybe_seq) => is_keep_seq(maybe_seq, &mail_view.in_idx), + maybe_flag if is_sk_flag(maybe_flag) => is_keep_flag(maybe_flag, &mail_view.in_idx), + + // Filter on mail meta + Before(search_naive) => match mail_view.stored_naive_date() { + Ok(msg_naive) => &msg_naive < search_naive.as_ref(), + _ => false, + }, + On(search_naive) => match mail_view.stored_naive_date() { + Ok(msg_naive) => &msg_naive == search_naive.as_ref(), + _ => false, + }, + Since(search_naive) => match mail_view.stored_naive_date() { + Ok(msg_naive) => &msg_naive > search_naive.as_ref(), + _ => false, + }, + + // Message size is also stored in MailMeta + Larger(size_ref) => mail_view.query_result.metadata().expect("metadata were fetched").rfc822_size > *size_ref as usize, + Smaller(size_ref) => mail_view.query_result.metadata().expect("metadata were fetched").rfc822_size < *size_ref as usize, + + // Filter on well-known headers + Bcc(_) => unimplemented!(), + Cc(_) => unimplemented!(), + From(_) => unimplemented!(), + Subject(_)=> unimplemented!(), + To(_) => unimplemented!(), + + // Filter on arbitrary header + Header(..) => unimplemented!(), + + // Filter on Date header + SentBefore(_) => unimplemented!(), + SentOn(_) => unimplemented!(), + SentSince(_) => unimplemented!(), + + + // Filter on the full content of the email + Text(_) => unimplemented!(), + Body(_) => unimplemented!(), + + unknown => { + tracing::error!("Unknown filter {:?}", unknown); + false + }, } } } @@ -240,16 +302,16 @@ impl PartialDecision { fn or(&self, other: &Self) -> Self { match (self, other) { - (Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone, (Self::Keep, _) | (_, Self::Keep) => Self::Keep, + (Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone, (Self::Discard, Self::Discard) => Self::Discard, } } fn and(&self, other: &Self) -> Self { match (self, other) { - (Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone, (Self::Discard, _) | (_, Self::Discard) => Self::Discard, + (Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone, (Self::Keep, Self::Keep) => Self::Keep, } } -- cgit v1.2.3 From 5622a71cd163e4b18a3eabe8a28a5aedb23ee25d Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Sat, 6 Jan 2024 22:53:41 +0100 Subject: Search MIME headers --- src/imap/search.rs | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'src/imap/search.rs') diff --git a/src/imap/search.rs b/src/imap/search.rs index 0e00025..2fbfdcc 100644 --- a/src/imap/search.rs +++ b/src/imap/search.rs @@ -220,19 +220,17 @@ impl<'a> Criteria<'a> { Smaller(size_ref) => mail_view.query_result.metadata().expect("metadata were fetched").rfc822_size < *size_ref as usize, // Filter on well-known headers - Bcc(_) => unimplemented!(), - Cc(_) => unimplemented!(), - From(_) => unimplemented!(), - Subject(_)=> unimplemented!(), - To(_) => unimplemented!(), - - // Filter on arbitrary header - Header(..) => unimplemented!(), + Bcc(txt) => mail_view.is_header_contains_pattern(&b"bcc"[..], txt.as_ref()), + Cc(txt) => mail_view.is_header_contains_pattern(&b"cc"[..], txt.as_ref()), + From(txt) => mail_view.is_header_contains_pattern(&b"from"[..], txt.as_ref()), + Subject(txt)=> mail_view.is_header_contains_pattern(&b"subject"[..], txt.as_ref()), + To(txt) => mail_view.is_header_contains_pattern(&b"to"[..], txt.as_ref()), + Header(hdr, txt) => mail_view.is_header_contains_pattern(hdr.as_ref(), txt.as_ref()), // Filter on Date header - SentBefore(_) => unimplemented!(), - SentOn(_) => unimplemented!(), - SentSince(_) => unimplemented!(), + SentBefore(search_naive) => mail_view.imf().map(|imf| imf.naive_date().ok()).flatten().map(|msg_naive| &msg_naive < search_naive.as_ref()).unwrap_or(false), + SentOn(search_naive) => mail_view.imf().map(|imf| imf.naive_date().ok()).flatten().map(|msg_naive| &msg_naive == search_naive.as_ref()).unwrap_or(false), + SentSince(search_naive) => mail_view.imf().map(|imf| imf.naive_date().ok()).flatten().map(|msg_naive| &msg_naive > search_naive.as_ref()).unwrap_or(false), // Filter on the full content of the email -- cgit v1.2.3 From 4e3cbf79d03c84028733b0ad5f9bd06a8a13757b Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Sat, 6 Jan 2024 23:24:44 +0100 Subject: implemented text search --- src/imap/search.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/imap/search.rs') diff --git a/src/imap/search.rs b/src/imap/search.rs index 2fbfdcc..fb889d7 100644 --- a/src/imap/search.rs +++ b/src/imap/search.rs @@ -190,7 +190,7 @@ impl<'a> Criteria<'a> { And(expr_list) => expr_list .as_ref() .iter() - .any(|cur| Criteria(cur).is_keep_on_query(mail_view)), + .all(|cur| Criteria(cur).is_keep_on_query(mail_view)), Or(left, right) => { Criteria(left).is_keep_on_query(mail_view) || Criteria(right).is_keep_on_query(mail_view) } @@ -234,8 +234,8 @@ impl<'a> Criteria<'a> { // Filter on the full content of the email - Text(_) => unimplemented!(), - Body(_) => unimplemented!(), + Text(txt) => mail_view.content.as_msg().map(|msg| msg.raw_part.windows(txt.as_ref().len()).any(|win| win == txt.as_ref())).unwrap_or(false), + Body(txt) => mail_view.content.as_msg().map(|msg| msg.raw_body.windows(txt.as_ref().len()).any(|win| win == txt.as_ref())).unwrap_or(false), unknown => { tracing::error!("Unknown filter {:?}", unknown); -- cgit v1.2.3 From 1d84b0ffd006b895aeb15c621fcd0ced826a0599 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Sat, 6 Jan 2024 23:35:23 +0100 Subject: Format code --- src/imap/search.rs | 160 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 112 insertions(+), 48 deletions(-) (limited to 'src/imap/search.rs') diff --git a/src/imap/search.rs b/src/imap/search.rs index fb889d7..2b0b34b 100644 --- a/src/imap/search.rs +++ b/src/imap/search.rs @@ -5,9 +5,9 @@ use imap_codec::imap_types::core::NonEmptyVec; use imap_codec::imap_types::search::SearchKey; use imap_codec::imap_types::sequence::{SeqOrUid, Sequence, SequenceSet}; -use crate::mail::query::{QueryScope, QueryResult}; use crate::imap::index::MailIndex; use crate::imap::mail_view::MailView; +use crate::mail::query::{QueryResult, QueryScope}; pub enum SeqType { Undefined, @@ -20,7 +20,6 @@ impl SeqType { } } - pub struct Criteria<'a>(pub &'a SearchKey<'a>); impl<'a> Criteria<'a> { /// Returns a set of email identifiers that is greater or equal @@ -87,11 +86,16 @@ impl<'a> Criteria<'a> { use SearchKey::*; match self.0 { // Combinators - And(and_list) => and_list.as_ref().iter().fold(QueryScope::Index, |prev, sk| { - prev.union(&Criteria(sk).query_scope()) - }), + And(and_list) => and_list + .as_ref() + .iter() + .fold(QueryScope::Index, |prev, sk| { + prev.union(&Criteria(sk).query_scope()) + }), Not(inner) => Criteria(inner).query_scope(), - Or(left, right) => Criteria(left).query_scope().union(&Criteria(right).query_scope()), + Or(left, right) => Criteria(left) + .query_scope() + .union(&Criteria(right).query_scope()), All => QueryScope::Index, // IMF Headers @@ -109,9 +113,12 @@ impl<'a> Criteria<'a> { } /// Returns emails that we now for sure we want to keep - /// but also a second list of emails we need to investigate further by + /// but also a second list of emails we need to investigate further by /// fetching some remote data - pub fn filter_on_idx<'b>(&self, midx_list: &[MailIndex<'b>]) -> (Vec>, Vec>) { + pub fn filter_on_idx<'b>( + &self, + midx_list: &[MailIndex<'b>], + ) -> (Vec>, Vec>) { let (p1, p2): (Vec<_>, Vec<_>) = midx_list .iter() .map(|x| (x, self.is_keep_on_idx(x))) @@ -124,7 +131,11 @@ impl<'a> Criteria<'a> { (to_keep, to_fetch) } - pub fn filter_on_query<'b>(&self, midx_list: &[MailIndex<'b>], query_result: &'b Vec>) -> Result>> { + pub fn filter_on_query<'b>( + &self, + midx_list: &[MailIndex<'b>], + query_result: &'b Vec>, + ) -> Result>> { Ok(midx_list .iter() .zip(query_result.iter()) @@ -137,8 +148,8 @@ impl<'a> Criteria<'a> { } // ---- - - /// Here we are doing a partial filtering: we do not have access + + /// Here we are doing a partial filtering: we do not have access /// to the headers or to the body, so every time we encounter a rule /// based on them, we need to keep it. /// @@ -151,7 +162,9 @@ impl<'a> Criteria<'a> { And(expr_list) => expr_list .as_ref() .iter() - .fold(PartialDecision::Keep, |acc, cur| acc.and(&Criteria(cur).is_keep_on_idx(midx))), + .fold(PartialDecision::Keep, |acc, cur| { + acc.and(&Criteria(cur).is_keep_on_idx(midx)) + }), Or(left, right) => { let left_decision = Criteria(left).is_keep_on_idx(midx); let right_decision = Criteria(right).is_keep_on_idx(midx); @@ -163,7 +176,7 @@ impl<'a> Criteria<'a> { // Sequence logic maybe_seq if is_sk_seq(maybe_seq) => is_keep_seq(maybe_seq, midx).into(), maybe_flag if is_sk_flag(maybe_flag) => is_keep_flag(maybe_flag, midx).into(), - + // All the stuff we can't evaluate yet Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_) | Subject(_) | To(_) | Before(_) | On(_) | Since(_) | Larger(_) | Smaller(_) @@ -172,16 +185,15 @@ impl<'a> Criteria<'a> { unknown => { tracing::error!("Unknown filter {:?}", unknown); PartialDecision::Discard - }, + } } } - /// @TODO we re-eveluate twice the same logic. The correct way would be, on each pass, /// to simplify the searck query, by removing the elements that were already checked. /// For example if we have AND(OR(seqid(X), body(Y)), body(X)), we can't keep for sure /// the email, as body(x) might be false. So we need to check it. But as seqid(x) is true, - /// we could simplify the request to just body(x) and truncate the first OR. Today, we are + /// we could simplify the request to just body(x) and truncate the first OR. Today, we are /// not doing that, and thus we reevaluate everything. fn is_keep_on_query(&self, mail_view: &MailView) -> bool { use SearchKey::*; @@ -192,7 +204,8 @@ impl<'a> Criteria<'a> { .iter() .all(|cur| Criteria(cur).is_keep_on_query(mail_view)), Or(left, right) => { - Criteria(left).is_keep_on_query(mail_view) || Criteria(right).is_keep_on_query(mail_view) + Criteria(left).is_keep_on_query(mail_view) + || Criteria(right).is_keep_on_query(mail_view) } Not(expr) => !Criteria(expr).is_keep_on_query(mail_view), All => true, @@ -209,38 +222,82 @@ impl<'a> Criteria<'a> { On(search_naive) => match mail_view.stored_naive_date() { Ok(msg_naive) => &msg_naive == search_naive.as_ref(), _ => false, - }, + }, Since(search_naive) => match mail_view.stored_naive_date() { Ok(msg_naive) => &msg_naive > search_naive.as_ref(), _ => false, }, // Message size is also stored in MailMeta - Larger(size_ref) => mail_view.query_result.metadata().expect("metadata were fetched").rfc822_size > *size_ref as usize, - Smaller(size_ref) => mail_view.query_result.metadata().expect("metadata were fetched").rfc822_size < *size_ref as usize, + Larger(size_ref) => { + mail_view + .query_result + .metadata() + .expect("metadata were fetched") + .rfc822_size + > *size_ref as usize + } + Smaller(size_ref) => { + mail_view + .query_result + .metadata() + .expect("metadata were fetched") + .rfc822_size + < *size_ref as usize + } // Filter on well-known headers - Bcc(txt) => mail_view.is_header_contains_pattern(&b"bcc"[..], txt.as_ref()), - Cc(txt) => mail_view.is_header_contains_pattern(&b"cc"[..], txt.as_ref()), + Bcc(txt) => mail_view.is_header_contains_pattern(&b"bcc"[..], txt.as_ref()), + Cc(txt) => mail_view.is_header_contains_pattern(&b"cc"[..], txt.as_ref()), From(txt) => mail_view.is_header_contains_pattern(&b"from"[..], txt.as_ref()), - Subject(txt)=> mail_view.is_header_contains_pattern(&b"subject"[..], txt.as_ref()), + Subject(txt) => mail_view.is_header_contains_pattern(&b"subject"[..], txt.as_ref()), To(txt) => mail_view.is_header_contains_pattern(&b"to"[..], txt.as_ref()), - Header(hdr, txt) => mail_view.is_header_contains_pattern(hdr.as_ref(), txt.as_ref()), + Header(hdr, txt) => mail_view.is_header_contains_pattern(hdr.as_ref(), txt.as_ref()), // Filter on Date header - SentBefore(search_naive) => mail_view.imf().map(|imf| imf.naive_date().ok()).flatten().map(|msg_naive| &msg_naive < search_naive.as_ref()).unwrap_or(false), - SentOn(search_naive) => mail_view.imf().map(|imf| imf.naive_date().ok()).flatten().map(|msg_naive| &msg_naive == search_naive.as_ref()).unwrap_or(false), - SentSince(search_naive) => mail_view.imf().map(|imf| imf.naive_date().ok()).flatten().map(|msg_naive| &msg_naive > search_naive.as_ref()).unwrap_or(false), - + SentBefore(search_naive) => mail_view + .imf() + .map(|imf| imf.naive_date().ok()) + .flatten() + .map(|msg_naive| &msg_naive < search_naive.as_ref()) + .unwrap_or(false), + SentOn(search_naive) => mail_view + .imf() + .map(|imf| imf.naive_date().ok()) + .flatten() + .map(|msg_naive| &msg_naive == search_naive.as_ref()) + .unwrap_or(false), + SentSince(search_naive) => mail_view + .imf() + .map(|imf| imf.naive_date().ok()) + .flatten() + .map(|msg_naive| &msg_naive > search_naive.as_ref()) + .unwrap_or(false), // Filter on the full content of the email - Text(txt) => mail_view.content.as_msg().map(|msg| msg.raw_part.windows(txt.as_ref().len()).any(|win| win == txt.as_ref())).unwrap_or(false), - Body(txt) => mail_view.content.as_msg().map(|msg| msg.raw_body.windows(txt.as_ref().len()).any(|win| win == txt.as_ref())).unwrap_or(false), + Text(txt) => mail_view + .content + .as_msg() + .map(|msg| { + msg.raw_part + .windows(txt.as_ref().len()) + .any(|win| win == txt.as_ref()) + }) + .unwrap_or(false), + Body(txt) => mail_view + .content + .as_msg() + .map(|msg| { + msg.raw_body + .windows(txt.as_ref().len()) + .any(|win| win == txt.as_ref()) + }) + .unwrap_or(false), unknown => { tracing::error!("Unknown filter {:?}", unknown); false - }, + } } } } @@ -281,7 +338,7 @@ enum PartialDecision { Discard, Postpone, } -impl From for PartialDecision { +impl From for PartialDecision { fn from(x: bool) -> Self { match x { true => PartialDecision::Keep, @@ -323,9 +380,8 @@ impl PartialDecision { fn is_sk_flag(sk: &SearchKey) -> bool { use SearchKey::*; match sk { - Answered | Deleted | Draft | Flagged | Keyword(..) | New | Old - | Recent | Seen | Unanswered | Undeleted | Undraft - | Unflagged | Unkeyword(..) | Unseen => true, + Answered | Deleted | Draft | Flagged | Keyword(..) | New | Old | Recent | Seen + | Unanswered | Undeleted | Undraft | Unflagged | Unkeyword(..) | Unseen => true, _ => false, } } @@ -342,37 +398,37 @@ fn is_keep_flag(sk: &SearchKey, midx: &MailIndex) -> bool { let is_recent = midx.is_flag_set("\\Recent"); let is_seen = midx.is_flag_set("\\Seen"); is_recent && !is_seen - }, + } Old => { let is_recent = midx.is_flag_set("\\Recent"); !is_recent - }, - Recent => midx.is_flag_set("\\Recent"), - Seen => midx.is_flag_set("\\Seen"), - Unanswered => { + } + Recent => midx.is_flag_set("\\Recent"), + Seen => midx.is_flag_set("\\Seen"), + Unanswered => { let is_answered = midx.is_flag_set("\\Recent"); !is_answered - }, + } Undeleted => { let is_deleted = midx.is_flag_set("\\Deleted"); !is_deleted - }, + } Undraft => { let is_draft = midx.is_flag_set("\\Draft"); !is_draft - }, + } Unflagged => { let is_flagged = midx.is_flag_set("\\Flagged"); !is_flagged - }, + } Unkeyword(kw) => { let is_keyword_set = midx.is_flag_set(kw.inner()); !is_keyword_set - }, + } Unseen => { let is_seen = midx.is_flag_set("\\Seen"); !is_seen - }, + } // Not flag logic _ => unreachable!(), @@ -389,8 +445,16 @@ fn is_sk_seq(sk: &SearchKey) -> bool { fn is_keep_seq(sk: &SearchKey, midx: &MailIndex) -> bool { use SearchKey::*; match sk { - SequenceSet(seq_set) => seq_set.0.as_ref().iter().any(|seq| midx.is_in_sequence_i(seq)), - Uid(seq_set) => seq_set.0.as_ref().iter().any(|seq| midx.is_in_sequence_uid(seq)), + SequenceSet(seq_set) => seq_set + .0 + .as_ref() + .iter() + .any(|seq| midx.is_in_sequence_i(seq)), + Uid(seq_set) => seq_set + .0 + .as_ref() + .iter() + .any(|seq| midx.is_in_sequence_uid(seq)), _ => unreachable!(), } } -- cgit v1.2.3 From 558e32fbd27be9a81144571b4baf318293be1344 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Mon, 8 Jan 2024 11:13:13 +0100 Subject: UID sequence are now correctly fetched --- src/imap/search.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/imap/search.rs') diff --git a/src/imap/search.rs b/src/imap/search.rs index 2b0b34b..22afd0c 100644 --- a/src/imap/search.rs +++ b/src/imap/search.rs @@ -117,13 +117,13 @@ impl<'a> Criteria<'a> { /// fetching some remote data pub fn filter_on_idx<'b>( &self, - midx_list: &[MailIndex<'b>], - ) -> (Vec>, Vec>) { + midx_list: &[&'b MailIndex<'b>], + ) -> (Vec<&'b MailIndex<'b>>, Vec<&'b MailIndex<'b>>) { let (p1, p2): (Vec<_>, Vec<_>) = midx_list .iter() .map(|x| (x, self.is_keep_on_idx(x))) .filter(|(_midx, decision)| decision.is_keep()) - .map(|(midx, decision)| ((*midx).clone(), decision)) + .map(|(midx, decision)| (*midx, decision)) .partition(|(_midx, decision)| matches!(decision, PartialDecision::Keep)); let to_keep = p1.into_iter().map(|(v, _)| v).collect(); @@ -133,13 +133,13 @@ impl<'a> Criteria<'a> { pub fn filter_on_query<'b>( &self, - midx_list: &[MailIndex<'b>], + midx_list: &[&'b MailIndex<'b>], query_result: &'b Vec>, - ) -> Result>> { + ) -> Result>> { Ok(midx_list .iter() .zip(query_result.iter()) - .map(|(midx, qr)| MailView::new(qr, midx.clone())) + .map(|(midx, qr)| MailView::new(qr, midx)) .collect::, _>>()? .into_iter() .filter(|mail_view| self.is_keep_on_query(mail_view)) -- cgit v1.2.3