From f58904f5bb3dbd429555c406c867f850654843a6 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Sat, 6 Jan 2024 18:01:44 +0100 Subject: Search can now filter on index data --- src/imap/command/examined.rs | 12 ++-- src/imap/index.rs | 40 ++++++++++- src/imap/mail_view.rs | 74 +++++++++---------- src/imap/mailbox_view.rs | 12 +++- src/imap/search.rs | 166 +++++++++++++++++++++++++++++++++++++++---- src/mail/mod.rs | 1 - src/mail/query.rs | 9 +++ 7 files changed, 254 insertions(+), 60 deletions(-) (limited to 'src') diff --git a/src/imap/command/examined.rs b/src/imap/command/examined.rs index ec16973..3dd11e2 100644 --- a/src/imap/command/examined.rs +++ b/src/imap/command/examined.rs @@ -111,15 +111,17 @@ impl<'a> ExaminedContext<'a> { pub async fn search( self, - _charset: &Option>, - _criteria: &SearchKey<'a>, - _uid: &bool, + charset: &Option>, + criteria: &SearchKey<'a>, + uid: &bool, ) -> Result<(Response<'static>, flow::Transition)> { + let found = self.mailbox.search(charset, criteria, *uid).await?; Ok(( Response::build() .to_req(self.req) - .message("Not implemented") - .bad()?, + .set_body(found) + .message("SEARCH completed") + .ok()?, flow::Transition::None, )) } diff --git a/src/imap/index.rs b/src/imap/index.rs index 01dd2ef..48d5ebd 100644 --- a/src/imap/index.rs +++ b/src/imap/index.rs @@ -1,7 +1,7 @@ use std::num::NonZeroU32; use anyhow::{anyhow, bail, Result}; -use imap_codec::imap_types::sequence::{self, SequenceSet}; +use imap_codec::imap_types::sequence::{self, Sequence, SequenceSet, SeqOrUid}; use crate::mail::uidindex::{ImapUid, UidIndex}; use crate::mail::unique_ident::UniqueIdent; @@ -87,9 +87,47 @@ impl<'a> Index<'a> { } } +#[derive(Clone)] pub struct MailIndex<'a> { pub i: NonZeroU32, pub uid: ImapUid, pub uuid: UniqueIdent, pub flags: &'a Vec, } + +impl<'a> MailIndex<'a> { + // The following functions are used to implement the SEARCH command + pub fn is_in_sequence_i(&self, seq: &Sequence) -> bool { + match seq { + Sequence::Single(SeqOrUid::Asterisk) => true, + Sequence::Single(SeqOrUid::Value(target)) => target == &self.i, + Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x)) + | Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.i, + Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => if x1 < x2 { + x1 <= &self.i && &self.i <= x2 + } else { + x1 >= &self.i && &self.i >= x2 + }, + Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true, + } + } + + pub fn is_in_sequence_uid(&self, seq: &Sequence) -> bool { + match seq { + Sequence::Single(SeqOrUid::Asterisk) => true, + Sequence::Single(SeqOrUid::Value(target)) => target == &self.uid, + Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x)) + | Sequence::Range(SeqOrUid::Value(x),SeqOrUid::Asterisk) => x <= &self.uid, + Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => if x1 < x2 { + x1 <= &self.uid && &self.uid <= x2 + } else { + x1 >= &self.uid && &self.uid >= x2 + }, + Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true, + } + } + + pub fn is_flag_set(&self, flag: &str) -> bool { + self.flags.iter().any(|candidate| candidate.as_str() == flag) + } +} diff --git a/src/imap/mail_view.rs b/src/imap/mail_view.rs index de9bfe3..fc36e21 100644 --- a/src/imap/mail_view.rs +++ b/src/imap/mail_view.rs @@ -52,6 +52,44 @@ impl<'a> MailView<'a> { }) } + pub fn filter(&self, ap: &AttributesProxy) -> Result<(Body<'static>, SeenFlag)> { + let mut seen = SeenFlag::DoNothing; + let res_attrs = ap + .attrs + .iter() + .map(|attr| match attr { + MessageDataItemName::Uid => Ok(self.uid()), + MessageDataItemName::Flags => Ok(self.flags()), + MessageDataItemName::Rfc822Size => self.rfc_822_size(), + MessageDataItemName::Rfc822Header => self.rfc_822_header(), + MessageDataItemName::Rfc822Text => self.rfc_822_text(), + MessageDataItemName::Rfc822 => self.rfc822(), + MessageDataItemName::Envelope => Ok(self.envelope()), + MessageDataItemName::Body => self.body(), + MessageDataItemName::BodyStructure => self.body_structure(), + MessageDataItemName::BodyExt { + section, + partial, + peek, + } => { + let (body, has_seen) = self.body_ext(section, partial, peek)?; + seen = has_seen; + Ok(body) + } + MessageDataItemName::InternalDate => self.internal_date(), + }) + .collect::, _>>()?; + + Ok(( + Body::Data(Data::Fetch { + seq: self.in_idx.i, + items: res_attrs.try_into()?, + }), + seen, + )) + } + + // Private function, mainly for filter! fn uid(&self) -> MessageDataItem<'static> { MessageDataItem::Uid(self.in_idx.uid.clone()) } @@ -168,42 +206,6 @@ impl<'a> MailView<'a> { Ok(MessageDataItem::InternalDate(DateTime::unvalidated(dt))) } - pub fn filter(&self, ap: &AttributesProxy) -> Result<(Body<'static>, SeenFlag)> { - let mut seen = SeenFlag::DoNothing; - let res_attrs = ap - .attrs - .iter() - .map(|attr| match attr { - MessageDataItemName::Uid => Ok(self.uid()), - MessageDataItemName::Flags => Ok(self.flags()), - MessageDataItemName::Rfc822Size => self.rfc_822_size(), - MessageDataItemName::Rfc822Header => self.rfc_822_header(), - MessageDataItemName::Rfc822Text => self.rfc_822_text(), - MessageDataItemName::Rfc822 => self.rfc822(), - MessageDataItemName::Envelope => Ok(self.envelope()), - MessageDataItemName::Body => self.body(), - MessageDataItemName::BodyStructure => self.body_structure(), - MessageDataItemName::BodyExt { - section, - partial, - peek, - } => { - let (body, has_seen) = self.body_ext(section, partial, peek)?; - seen = has_seen; - Ok(body) - } - MessageDataItemName::InternalDate => self.internal_date(), - }) - .collect::, _>>()?; - - Ok(( - Body::Data(Data::Fetch { - seq: self.in_idx.i, - items: res_attrs.try_into()?, - }), - seen, - )) - } } pub enum SeenFlag { diff --git a/src/imap/mailbox_view.rs b/src/imap/mailbox_view.rs index e4ffdcd..a07f6a4 100644 --- a/src/imap/mailbox_view.rs +++ b/src/imap/mailbox_view.rs @@ -319,12 +319,18 @@ impl MailboxView { let selection = self.index().fetch(&seq_set, seq_type.is_uid())?; // 3. Filter the selection based on the ID / UID / Flags + let selection = crit.filter_on_idx(&selection); - // 4. If needed, filter the selection based on the metadata - let _need_meta = crit.need_meta(); + // 4. Fetch additional info about the emails + let query_scope = crit.query_scope(); + let uuids = selection + .iter() + .map(|midx| midx.uuid) + .collect::>(); + let query_result = self.0.query(&uuids, query_scope).fetch().await?; // 5. If needed, filter the selection based on the body - let _need_body = crit.need_body(); + let selection = crit.filter_on_query(&selection, &query_result); // 6. Format the result according to the client's taste: // either return UID or ID. diff --git a/src/imap/search.rs b/src/imap/search.rs index b3c6b05..2a1119c 100644 --- a/src/imap/search.rs +++ b/src/imap/search.rs @@ -3,6 +3,9 @@ use imap_codec::imap_types::search::SearchKey; use imap_codec::imap_types::sequence::{SeqOrUid, Sequence, SequenceSet}; use std::num::NonZeroU32; +use crate::mail::query::{QueryScope, QueryResult}; +use crate::imap::index::MailIndex; + pub enum SeqType { Undefined, NonUid, @@ -54,6 +57,10 @@ impl<'a> Criteria<'a> { tracing::debug!( "using AND in a search request is slow: no intersection is performed" ); + // As we perform no intersection, we don't care if we mix uid or id. + // We only keep the smallest range, being it ID or UID, depending of + // which one has the less items. This is an approximation as UID ranges + // can have holes while ID ones can't. search_list .as_ref() .iter() @@ -72,31 +79,119 @@ impl<'a> Criteria<'a> { /// Not really clever as we can have cases where we filter out /// the email before needing to inspect its meta. /// But for now we are seeking the most basic/stupid algorithm. - pub fn need_meta(&self) -> bool { + pub fn query_scope(&self) -> QueryScope { use SearchKey::*; match self.0 { // IMF Headers Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_) - | Subject(_) | To(_) => true, + | Subject(_) | To(_) => QueryScope::Partial, // Internal Date is also stored in MailMeta - Before(_) | On(_) | Since(_) => true, + Before(_) | On(_) | Since(_) => QueryScope::Partial, // Message size is also stored in MailMeta - Larger(_) | Smaller(_) => true, - And(and_list) => and_list.as_ref().iter().any(|sk| Criteria(sk).need_meta()), - Not(inner) => Criteria(inner).need_meta(), - Or(left, right) => Criteria(left).need_meta() || Criteria(right).need_meta(), - _ => false, + Larger(_) | Smaller(_) => QueryScope::Partial, + // Text and Body require that we fetch the full content! + Text(_) | Body(_) => QueryScope::Full, + And(and_list) => and_list.as_ref().iter().fold(QueryScope::Index, |prev, sk| { + prev.union(&Criteria(sk).query_scope()) + }), + Not(inner) => Criteria(inner).query_scope(), + Or(left, right) => Criteria(left).query_scope().union(&Criteria(right).query_scope()), + _ => QueryScope::Index, } } - pub fn need_body(&self) -> bool { + pub fn filter_on_idx<'b>(&self, midx_list: &[MailIndex<'b>]) -> Vec> { + midx_list + .iter() + .filter(|x| self.is_keep_on_idx(x).is_keep()) + .map(|x| (*x).clone()) + .collect::>() + } + + pub fn filter_on_query(&self, midx_list: &[MailIndex], query_result: &Vec>) -> Vec { + unimplemented!(); + } + + // ---- + + /// Here we are doing a partial filtering: we do not have access + /// to the headers or to the body, so every time we encounter a rule + /// based on them, we need to keep it. + /// + /// @TODO Could be optimized on a per-email basis by also returning the QueryScope + /// when more information is needed! + fn is_keep_on_idx(&self, midx: &MailIndex) -> PartialDecision { use SearchKey::*; match self.0 { - Text(_) | Body(_) => true, - And(and_list) => and_list.as_ref().iter().any(|sk| Criteria(sk).need_body()), - Not(inner) => Criteria(inner).need_body(), - Or(left, right) => Criteria(left).need_body() || Criteria(right).need_body(), - _ => false, + // Combinator logic + And(expr_list) => expr_list + .as_ref() + .iter() + .fold(PartialDecision::Keep, |acc, cur| acc.and(&Criteria(cur).is_keep_on_idx(midx))), + Or(left, right) => { + let left_decision = Criteria(left).is_keep_on_idx(midx); + let right_decision = Criteria(right).is_keep_on_idx(midx); + left_decision.or(&right_decision) + } + Not(expr) => Criteria(expr).is_keep_on_idx(midx).not(), + All => PartialDecision::Keep, + + // Sequence logic + SequenceSet(seq_set) => seq_set.0.as_ref().iter().fold(PartialDecision::Discard, |acc, seq| { + let local_decision: PartialDecision = midx.is_in_sequence_i(seq).into(); + acc.or(&local_decision) + }), + Uid(seq_set) => seq_set.0.as_ref().iter().fold(PartialDecision::Discard, |acc, seq| { + let local_decision: PartialDecision = midx.is_in_sequence_uid(seq).into(); + acc.or(&local_decision) + }), + + // Flag logic + Answered => midx.is_flag_set("\\Answered").into(), + Deleted => midx.is_flag_set("\\Deleted").into(), + Draft => midx.is_flag_set("\\Draft").into(), + Flagged => midx.is_flag_set("\\Flagged").into(), + Keyword(kw) => midx.is_flag_set(kw.inner()).into(), + New => { + let is_recent: PartialDecision = midx.is_flag_set("\\Recent").into(); + let is_seen: PartialDecision = midx.is_flag_set("\\Seen").into(); + is_recent.and(&is_seen.not()) + }, + Old => { + let is_recent: PartialDecision = midx.is_flag_set("\\Recent").into(); + is_recent.not() + }, + Recent => midx.is_flag_set("\\Recent").into(), + Seen => midx.is_flag_set("\\Seen").into(), + Unanswered => { + let is_answered: PartialDecision = midx.is_flag_set("\\Recent").into(); + is_answered.not() + }, + Undeleted => { + let is_deleted: PartialDecision = midx.is_flag_set("\\Deleted").into(); + is_deleted.not() + }, + Undraft => { + let is_draft: PartialDecision = midx.is_flag_set("\\Draft").into(); + is_draft.not() + }, + Unflagged => { + let is_flagged: PartialDecision = midx.is_flag_set("\\Flagged").into(); + is_flagged.not() + }, + Unkeyword(kw) => { + let is_keyword_set: PartialDecision = midx.is_flag_set(kw.inner()).into(); + is_keyword_set.not() + }, + Unseen => { + let is_seen: PartialDecision = midx.is_flag_set("\\Seen").into(); + is_seen.not() + }, + + // All the stuff we can't evaluate yet + Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_) + | Subject(_) | To(_) | Before(_) | On(_) | Since(_) | Larger(_) | Smaller(_) + | Text(_) | Body(_) => PartialDecision::Postpone, } } } @@ -128,3 +223,46 @@ fn approx_sequence_size(seq: &Sequence) -> u64 { } } } + +enum PartialDecision { + Keep, + Discard, + Postpone, +} +impl From for PartialDecision { + fn from(x: bool) -> Self { + match x { + true => PartialDecision::Keep, + _ => PartialDecision::Discard, + } + } +} +impl PartialDecision { + fn not(&self) -> Self { + match self { + Self::Keep => Self::Discard, + Self::Discard => Self::Keep, + Self::Postpone => Self::Postpone, + } + } + + fn or(&self, other: &Self) -> Self { + match (self, other) { + (Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone, + (Self::Keep, _) | (_, Self::Keep) => Self::Keep, + (Self::Discard, Self::Discard) => Self::Discard, + } + } + + fn and(&self, other: &Self) -> Self { + match (self, other) { + (Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone, + (Self::Discard, _) | (_, Self::Discard) => Self::Discard, + (Self::Keep, Self::Keep) => Self::Keep, + } + } + + fn is_keep(&self) -> bool { + !matches!(self, Self::Discard) + } +} diff --git a/src/mail/mod.rs b/src/mail/mod.rs index bb2f130..37578b8 100644 --- a/src/mail/mod.rs +++ b/src/mail/mod.rs @@ -1,5 +1,4 @@ use std::convert::TryFrom; -use std::io::Write; pub mod incoming; pub mod mailbox; diff --git a/src/mail/query.rs b/src/mail/query.rs index 8de73e6..91bd6c1 100644 --- a/src/mail/query.rs +++ b/src/mail/query.rs @@ -19,6 +19,15 @@ pub enum QueryScope { Partial, Full, } +impl QueryScope { + pub fn union(&self, other: &QueryScope) -> QueryScope { + match (self, other) { + (QueryScope::Full, _) | (_, QueryScope::Full) => QueryScope::Full, + (QueryScope::Partial, _) | (_, QueryScope::Partial) => QueryScope::Partial, + (QueryScope::Index, QueryScope::Index) => QueryScope::Index, + } + } +} impl<'a, 'b> Query<'a, 'b> { pub async fn fetch(&self) -> Result>> { -- cgit v1.2.3