From f58904f5bb3dbd429555c406c867f850654843a6 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Sat, 6 Jan 2024 18:01:44 +0100 Subject: Search can now filter on index data --- src/imap/command/examined.rs | 12 ++-- src/imap/index.rs | 40 ++++++++++- src/imap/mail_view.rs | 74 +++++++++---------- src/imap/mailbox_view.rs | 12 +++- src/imap/search.rs | 166 +++++++++++++++++++++++++++++++++++++++---- 5 files changed, 245 insertions(+), 59 deletions(-) (limited to 'src/imap') diff --git a/src/imap/command/examined.rs b/src/imap/command/examined.rs index ec16973..3dd11e2 100644 --- a/src/imap/command/examined.rs +++ b/src/imap/command/examined.rs @@ -111,15 +111,17 @@ impl<'a> ExaminedContext<'a> { pub async fn search( self, - _charset: &Option>, - _criteria: &SearchKey<'a>, - _uid: &bool, + charset: &Option>, + criteria: &SearchKey<'a>, + uid: &bool, ) -> Result<(Response<'static>, flow::Transition)> { + let found = self.mailbox.search(charset, criteria, *uid).await?; Ok(( Response::build() .to_req(self.req) - .message("Not implemented") - .bad()?, + .set_body(found) + .message("SEARCH completed") + .ok()?, flow::Transition::None, )) } diff --git a/src/imap/index.rs b/src/imap/index.rs index 01dd2ef..48d5ebd 100644 --- a/src/imap/index.rs +++ b/src/imap/index.rs @@ -1,7 +1,7 @@ use std::num::NonZeroU32; use anyhow::{anyhow, bail, Result}; -use imap_codec::imap_types::sequence::{self, SequenceSet}; +use imap_codec::imap_types::sequence::{self, Sequence, SequenceSet, SeqOrUid}; use crate::mail::uidindex::{ImapUid, UidIndex}; use crate::mail::unique_ident::UniqueIdent; @@ -87,9 +87,47 @@ impl<'a> Index<'a> { } } +#[derive(Clone)] pub struct MailIndex<'a> { pub i: NonZeroU32, pub uid: ImapUid, pub uuid: UniqueIdent, pub flags: &'a Vec, } + +impl<'a> MailIndex<'a> { + // The following functions are used to implement the SEARCH command + pub fn is_in_sequence_i(&self, seq: &Sequence) -> bool { + match seq { + Sequence::Single(SeqOrUid::Asterisk) => true, + Sequence::Single(SeqOrUid::Value(target)) => target == &self.i, + Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x)) + | Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.i, + Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => if x1 < x2 { + x1 <= &self.i && &self.i <= x2 + } else { + x1 >= &self.i && &self.i >= x2 + }, + Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true, + } + } + + pub fn is_in_sequence_uid(&self, seq: &Sequence) -> bool { + match seq { + Sequence::Single(SeqOrUid::Asterisk) => true, + Sequence::Single(SeqOrUid::Value(target)) => target == &self.uid, + Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x)) + | Sequence::Range(SeqOrUid::Value(x),SeqOrUid::Asterisk) => x <= &self.uid, + Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => if x1 < x2 { + x1 <= &self.uid && &self.uid <= x2 + } else { + x1 >= &self.uid && &self.uid >= x2 + }, + Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true, + } + } + + pub fn is_flag_set(&self, flag: &str) -> bool { + self.flags.iter().any(|candidate| candidate.as_str() == flag) + } +} diff --git a/src/imap/mail_view.rs b/src/imap/mail_view.rs index de9bfe3..fc36e21 100644 --- a/src/imap/mail_view.rs +++ b/src/imap/mail_view.rs @@ -52,6 +52,44 @@ impl<'a> MailView<'a> { }) } + pub fn filter(&self, ap: &AttributesProxy) -> Result<(Body<'static>, SeenFlag)> { + let mut seen = SeenFlag::DoNothing; + let res_attrs = ap + .attrs + .iter() + .map(|attr| match attr { + MessageDataItemName::Uid => Ok(self.uid()), + MessageDataItemName::Flags => Ok(self.flags()), + MessageDataItemName::Rfc822Size => self.rfc_822_size(), + MessageDataItemName::Rfc822Header => self.rfc_822_header(), + MessageDataItemName::Rfc822Text => self.rfc_822_text(), + MessageDataItemName::Rfc822 => self.rfc822(), + MessageDataItemName::Envelope => Ok(self.envelope()), + MessageDataItemName::Body => self.body(), + MessageDataItemName::BodyStructure => self.body_structure(), + MessageDataItemName::BodyExt { + section, + partial, + peek, + } => { + let (body, has_seen) = self.body_ext(section, partial, peek)?; + seen = has_seen; + Ok(body) + } + MessageDataItemName::InternalDate => self.internal_date(), + }) + .collect::, _>>()?; + + Ok(( + Body::Data(Data::Fetch { + seq: self.in_idx.i, + items: res_attrs.try_into()?, + }), + seen, + )) + } + + // Private function, mainly for filter! fn uid(&self) -> MessageDataItem<'static> { MessageDataItem::Uid(self.in_idx.uid.clone()) } @@ -168,42 +206,6 @@ impl<'a> MailView<'a> { Ok(MessageDataItem::InternalDate(DateTime::unvalidated(dt))) } - pub fn filter(&self, ap: &AttributesProxy) -> Result<(Body<'static>, SeenFlag)> { - let mut seen = SeenFlag::DoNothing; - let res_attrs = ap - .attrs - .iter() - .map(|attr| match attr { - MessageDataItemName::Uid => Ok(self.uid()), - MessageDataItemName::Flags => Ok(self.flags()), - MessageDataItemName::Rfc822Size => self.rfc_822_size(), - MessageDataItemName::Rfc822Header => self.rfc_822_header(), - MessageDataItemName::Rfc822Text => self.rfc_822_text(), - MessageDataItemName::Rfc822 => self.rfc822(), - MessageDataItemName::Envelope => Ok(self.envelope()), - MessageDataItemName::Body => self.body(), - MessageDataItemName::BodyStructure => self.body_structure(), - MessageDataItemName::BodyExt { - section, - partial, - peek, - } => { - let (body, has_seen) = self.body_ext(section, partial, peek)?; - seen = has_seen; - Ok(body) - } - MessageDataItemName::InternalDate => self.internal_date(), - }) - .collect::, _>>()?; - - Ok(( - Body::Data(Data::Fetch { - seq: self.in_idx.i, - items: res_attrs.try_into()?, - }), - seen, - )) - } } pub enum SeenFlag { diff --git a/src/imap/mailbox_view.rs b/src/imap/mailbox_view.rs index e4ffdcd..a07f6a4 100644 --- a/src/imap/mailbox_view.rs +++ b/src/imap/mailbox_view.rs @@ -319,12 +319,18 @@ impl MailboxView { let selection = self.index().fetch(&seq_set, seq_type.is_uid())?; // 3. Filter the selection based on the ID / UID / Flags + let selection = crit.filter_on_idx(&selection); - // 4. If needed, filter the selection based on the metadata - let _need_meta = crit.need_meta(); + // 4. Fetch additional info about the emails + let query_scope = crit.query_scope(); + let uuids = selection + .iter() + .map(|midx| midx.uuid) + .collect::>(); + let query_result = self.0.query(&uuids, query_scope).fetch().await?; // 5. If needed, filter the selection based on the body - let _need_body = crit.need_body(); + let selection = crit.filter_on_query(&selection, &query_result); // 6. Format the result according to the client's taste: // either return UID or ID. diff --git a/src/imap/search.rs b/src/imap/search.rs index b3c6b05..2a1119c 100644 --- a/src/imap/search.rs +++ b/src/imap/search.rs @@ -3,6 +3,9 @@ use imap_codec::imap_types::search::SearchKey; use imap_codec::imap_types::sequence::{SeqOrUid, Sequence, SequenceSet}; use std::num::NonZeroU32; +use crate::mail::query::{QueryScope, QueryResult}; +use crate::imap::index::MailIndex; + pub enum SeqType { Undefined, NonUid, @@ -54,6 +57,10 @@ impl<'a> Criteria<'a> { tracing::debug!( "using AND in a search request is slow: no intersection is performed" ); + // As we perform no intersection, we don't care if we mix uid or id. + // We only keep the smallest range, being it ID or UID, depending of + // which one has the less items. This is an approximation as UID ranges + // can have holes while ID ones can't. search_list .as_ref() .iter() @@ -72,31 +79,119 @@ impl<'a> Criteria<'a> { /// Not really clever as we can have cases where we filter out /// the email before needing to inspect its meta. /// But for now we are seeking the most basic/stupid algorithm. - pub fn need_meta(&self) -> bool { + pub fn query_scope(&self) -> QueryScope { use SearchKey::*; match self.0 { // IMF Headers Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_) - | Subject(_) | To(_) => true, + | Subject(_) | To(_) => QueryScope::Partial, // Internal Date is also stored in MailMeta - Before(_) | On(_) | Since(_) => true, + Before(_) | On(_) | Since(_) => QueryScope::Partial, // Message size is also stored in MailMeta - Larger(_) | Smaller(_) => true, - And(and_list) => and_list.as_ref().iter().any(|sk| Criteria(sk).need_meta()), - Not(inner) => Criteria(inner).need_meta(), - Or(left, right) => Criteria(left).need_meta() || Criteria(right).need_meta(), - _ => false, + Larger(_) | Smaller(_) => QueryScope::Partial, + // Text and Body require that we fetch the full content! + Text(_) | Body(_) => QueryScope::Full, + And(and_list) => and_list.as_ref().iter().fold(QueryScope::Index, |prev, sk| { + prev.union(&Criteria(sk).query_scope()) + }), + Not(inner) => Criteria(inner).query_scope(), + Or(left, right) => Criteria(left).query_scope().union(&Criteria(right).query_scope()), + _ => QueryScope::Index, } } - pub fn need_body(&self) -> bool { + pub fn filter_on_idx<'b>(&self, midx_list: &[MailIndex<'b>]) -> Vec> { + midx_list + .iter() + .filter(|x| self.is_keep_on_idx(x).is_keep()) + .map(|x| (*x).clone()) + .collect::>() + } + + pub fn filter_on_query(&self, midx_list: &[MailIndex], query_result: &Vec>) -> Vec { + unimplemented!(); + } + + // ---- + + /// Here we are doing a partial filtering: we do not have access + /// to the headers or to the body, so every time we encounter a rule + /// based on them, we need to keep it. + /// + /// @TODO Could be optimized on a per-email basis by also returning the QueryScope + /// when more information is needed! + fn is_keep_on_idx(&self, midx: &MailIndex) -> PartialDecision { use SearchKey::*; match self.0 { - Text(_) | Body(_) => true, - And(and_list) => and_list.as_ref().iter().any(|sk| Criteria(sk).need_body()), - Not(inner) => Criteria(inner).need_body(), - Or(left, right) => Criteria(left).need_body() || Criteria(right).need_body(), - _ => false, + // Combinator logic + And(expr_list) => expr_list + .as_ref() + .iter() + .fold(PartialDecision::Keep, |acc, cur| acc.and(&Criteria(cur).is_keep_on_idx(midx))), + Or(left, right) => { + let left_decision = Criteria(left).is_keep_on_idx(midx); + let right_decision = Criteria(right).is_keep_on_idx(midx); + left_decision.or(&right_decision) + } + Not(expr) => Criteria(expr).is_keep_on_idx(midx).not(), + All => PartialDecision::Keep, + + // Sequence logic + SequenceSet(seq_set) => seq_set.0.as_ref().iter().fold(PartialDecision::Discard, |acc, seq| { + let local_decision: PartialDecision = midx.is_in_sequence_i(seq).into(); + acc.or(&local_decision) + }), + Uid(seq_set) => seq_set.0.as_ref().iter().fold(PartialDecision::Discard, |acc, seq| { + let local_decision: PartialDecision = midx.is_in_sequence_uid(seq).into(); + acc.or(&local_decision) + }), + + // Flag logic + Answered => midx.is_flag_set("\\Answered").into(), + Deleted => midx.is_flag_set("\\Deleted").into(), + Draft => midx.is_flag_set("\\Draft").into(), + Flagged => midx.is_flag_set("\\Flagged").into(), + Keyword(kw) => midx.is_flag_set(kw.inner()).into(), + New => { + let is_recent: PartialDecision = midx.is_flag_set("\\Recent").into(); + let is_seen: PartialDecision = midx.is_flag_set("\\Seen").into(); + is_recent.and(&is_seen.not()) + }, + Old => { + let is_recent: PartialDecision = midx.is_flag_set("\\Recent").into(); + is_recent.not() + }, + Recent => midx.is_flag_set("\\Recent").into(), + Seen => midx.is_flag_set("\\Seen").into(), + Unanswered => { + let is_answered: PartialDecision = midx.is_flag_set("\\Recent").into(); + is_answered.not() + }, + Undeleted => { + let is_deleted: PartialDecision = midx.is_flag_set("\\Deleted").into(); + is_deleted.not() + }, + Undraft => { + let is_draft: PartialDecision = midx.is_flag_set("\\Draft").into(); + is_draft.not() + }, + Unflagged => { + let is_flagged: PartialDecision = midx.is_flag_set("\\Flagged").into(); + is_flagged.not() + }, + Unkeyword(kw) => { + let is_keyword_set: PartialDecision = midx.is_flag_set(kw.inner()).into(); + is_keyword_set.not() + }, + Unseen => { + let is_seen: PartialDecision = midx.is_flag_set("\\Seen").into(); + is_seen.not() + }, + + // All the stuff we can't evaluate yet + Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_) + | Subject(_) | To(_) | Before(_) | On(_) | Since(_) | Larger(_) | Smaller(_) + | Text(_) | Body(_) => PartialDecision::Postpone, } } } @@ -128,3 +223,46 @@ fn approx_sequence_size(seq: &Sequence) -> u64 { } } } + +enum PartialDecision { + Keep, + Discard, + Postpone, +} +impl From for PartialDecision { + fn from(x: bool) -> Self { + match x { + true => PartialDecision::Keep, + _ => PartialDecision::Discard, + } + } +} +impl PartialDecision { + fn not(&self) -> Self { + match self { + Self::Keep => Self::Discard, + Self::Discard => Self::Keep, + Self::Postpone => Self::Postpone, + } + } + + fn or(&self, other: &Self) -> Self { + match (self, other) { + (Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone, + (Self::Keep, _) | (_, Self::Keep) => Self::Keep, + (Self::Discard, Self::Discard) => Self::Discard, + } + } + + fn and(&self, other: &Self) -> Self { + match (self, other) { + (Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone, + (Self::Discard, _) | (_, Self::Discard) => Self::Discard, + (Self::Keep, Self::Keep) => Self::Keep, + } + } + + fn is_keep(&self) -> bool { + !matches!(self, Self::Discard) + } +} -- cgit v1.2.3 From 870de493c84c6c3134d14ee8a234f124360354a7 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Sat, 6 Jan 2024 18:51:21 +0100 Subject: Search is made more clear --- src/imap/mailbox_view.rs | 11 +-- src/imap/search.rs | 192 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 136 insertions(+), 67 deletions(-) (limited to 'src/imap') diff --git a/src/imap/mailbox_view.rs b/src/imap/mailbox_view.rs index a07f6a4..3c43be8 100644 --- a/src/imap/mailbox_view.rs +++ b/src/imap/mailbox_view.rs @@ -319,24 +319,25 @@ impl MailboxView { let selection = self.index().fetch(&seq_set, seq_type.is_uid())?; // 3. Filter the selection based on the ID / UID / Flags - let selection = crit.filter_on_idx(&selection); + let (kept_idx, to_fetch) = crit.filter_on_idx(&selection); // 4. Fetch additional info about the emails let query_scope = crit.query_scope(); - let uuids = selection + let uuids = to_fetch .iter() .map(|midx| midx.uuid) .collect::>(); let query_result = self.0.query(&uuids, query_scope).fetch().await?; // 5. If needed, filter the selection based on the body - let selection = crit.filter_on_query(&selection, &query_result); + let kept_query = crit.filter_on_query(&to_fetch, &query_result); // 6. Format the result according to the client's taste: // either return UID or ID. + let final_selection = kept_idx.into_iter().chain(kept_query.into_iter()); let selection_fmt = match uid { - true => selection.into_iter().map(|in_idx| in_idx.uid).collect(), - _ => selection.into_iter().map(|in_idx| in_idx.i).collect(), + true => final_selection.map(|in_idx| in_idx.uid).collect(), + _ => final_selection.map(|in_idx| in_idx.i).collect(), }; Ok(vec![Body::Data(Data::Search(selection_fmt))]) diff --git a/src/imap/search.rs b/src/imap/search.rs index 2a1119c..0ab0300 100644 --- a/src/imap/search.rs +++ b/src/imap/search.rs @@ -17,6 +17,7 @@ impl SeqType { } } + pub struct Criteria<'a>(pub &'a SearchKey<'a>); impl<'a> Criteria<'a> { /// Returns a set of email identifiers that is greater or equal @@ -82,6 +83,14 @@ impl<'a> Criteria<'a> { pub fn query_scope(&self) -> QueryScope { use SearchKey::*; match self.0 { + // Combinators + And(and_list) => and_list.as_ref().iter().fold(QueryScope::Index, |prev, sk| { + prev.union(&Criteria(sk).query_scope()) + }), + Not(inner) => Criteria(inner).query_scope(), + Or(left, right) => Criteria(left).query_scope().union(&Criteria(right).query_scope()), + All => QueryScope::Index, + // IMF Headers Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_) | Subject(_) | To(_) => QueryScope::Partial, @@ -91,25 +100,34 @@ impl<'a> Criteria<'a> { Larger(_) | Smaller(_) => QueryScope::Partial, // Text and Body require that we fetch the full content! Text(_) | Body(_) => QueryScope::Full, - And(and_list) => and_list.as_ref().iter().fold(QueryScope::Index, |prev, sk| { - prev.union(&Criteria(sk).query_scope()) - }), - Not(inner) => Criteria(inner).query_scope(), - Or(left, right) => Criteria(left).query_scope().union(&Criteria(right).query_scope()), + _ => QueryScope::Index, } } - pub fn filter_on_idx<'b>(&self, midx_list: &[MailIndex<'b>]) -> Vec> { - midx_list + /// Returns emails that we now for sure we want to keep + /// but also a second list of emails we need to investigate further by + /// fetching some remote data + pub fn filter_on_idx<'b>(&self, midx_list: &[MailIndex<'b>]) -> (Vec>, Vec>) { + let (p1, p2): (Vec<_>, Vec<_>) = midx_list .iter() - .filter(|x| self.is_keep_on_idx(x).is_keep()) - .map(|x| (*x).clone()) - .collect::>() + .map(|x| (x, self.is_keep_on_idx(x))) + .filter(|(_midx, decision)| decision.is_keep()) + .map(|(midx, decision)| ((*midx).clone(), decision)) + .partition(|(_midx, decision)| matches!(decision, PartialDecision::Keep)); + + let to_keep = p1.into_iter().map(|(v, _)| v).collect(); + let to_fetch = p2.into_iter().map(|(v, _)| v).collect(); + (to_keep, to_fetch) } - pub fn filter_on_query(&self, midx_list: &[MailIndex], query_result: &Vec>) -> Vec { - unimplemented!(); + pub fn filter_on_query<'b>(&self, midx_list: &[MailIndex<'b>], query_result: &Vec>) -> Vec> { + midx_list + .iter() + .zip(query_result.iter()) + .filter(|(midx, qr)| self.is_keep_on_query(midx, qr)) + .map(|(midx, _qr)| midx.clone()) + .collect() } // ---- @@ -137,65 +155,37 @@ impl<'a> Criteria<'a> { All => PartialDecision::Keep, // Sequence logic - SequenceSet(seq_set) => seq_set.0.as_ref().iter().fold(PartialDecision::Discard, |acc, seq| { - let local_decision: PartialDecision = midx.is_in_sequence_i(seq).into(); - acc.or(&local_decision) - }), - Uid(seq_set) => seq_set.0.as_ref().iter().fold(PartialDecision::Discard, |acc, seq| { - let local_decision: PartialDecision = midx.is_in_sequence_uid(seq).into(); - acc.or(&local_decision) - }), - - // Flag logic - Answered => midx.is_flag_set("\\Answered").into(), - Deleted => midx.is_flag_set("\\Deleted").into(), - Draft => midx.is_flag_set("\\Draft").into(), - Flagged => midx.is_flag_set("\\Flagged").into(), - Keyword(kw) => midx.is_flag_set(kw.inner()).into(), - New => { - let is_recent: PartialDecision = midx.is_flag_set("\\Recent").into(); - let is_seen: PartialDecision = midx.is_flag_set("\\Seen").into(); - is_recent.and(&is_seen.not()) - }, - Old => { - let is_recent: PartialDecision = midx.is_flag_set("\\Recent").into(); - is_recent.not() - }, - Recent => midx.is_flag_set("\\Recent").into(), - Seen => midx.is_flag_set("\\Seen").into(), - Unanswered => { - let is_answered: PartialDecision = midx.is_flag_set("\\Recent").into(); - is_answered.not() - }, - Undeleted => { - let is_deleted: PartialDecision = midx.is_flag_set("\\Deleted").into(); - is_deleted.not() - }, - Undraft => { - let is_draft: PartialDecision = midx.is_flag_set("\\Draft").into(); - is_draft.not() - }, - Unflagged => { - let is_flagged: PartialDecision = midx.is_flag_set("\\Flagged").into(); - is_flagged.not() - }, - Unkeyword(kw) => { - let is_keyword_set: PartialDecision = midx.is_flag_set(kw.inner()).into(); - is_keyword_set.not() - }, - Unseen => { - let is_seen: PartialDecision = midx.is_flag_set("\\Seen").into(); - is_seen.not() - }, + maybe_seq if is_sk_seq(maybe_seq) => is_keep_seq(maybe_seq, midx).into(), + maybe_flag if is_sk_flag(maybe_flag) => is_keep_flag(maybe_flag, midx).into(), // All the stuff we can't evaluate yet Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_) | Subject(_) | To(_) | Before(_) | On(_) | Since(_) | Larger(_) | Smaller(_) | Text(_) | Body(_) => PartialDecision::Postpone, + + _ => unreachable!(), + } + } + + fn is_keep_on_query(&self, midx: &MailIndex, qr: &QueryResult) -> bool { + use SearchKey::*; + match self.0 { + // Combinator logic + And(expr_list) => expr_list + .as_ref() + .iter() + .any(|cur| Criteria(cur).is_keep_on_query(midx, qr)), + Or(left, right) => { + Criteria(left).is_keep_on_query(midx, qr) || Criteria(right).is_keep_on_query(midx, qr) + } + Not(expr) => !Criteria(expr).is_keep_on_query(midx, qr), + All => true, + _ => unimplemented!(), } } } +// ---- Sequence things ---- fn sequence_set_all() -> SequenceSet { SequenceSet::from(Sequence::Range( SeqOrUid::Value(NonZeroU32::MIN), @@ -224,6 +214,8 @@ fn approx_sequence_size(seq: &Sequence) -> u64 { } } +// --- Partial decision things ---- + enum PartialDecision { Keep, Discard, @@ -266,3 +258,79 @@ impl PartialDecision { !matches!(self, Self::Discard) } } + +// ----- Search Key things --- +fn is_sk_flag(sk: &SearchKey) -> bool { + use SearchKey::*; + match sk { + Answered | Deleted | Draft | Flagged | Keyword(..) | New | Old + | Recent | Seen | Unanswered | Undeleted | Undraft + | Unflagged | Unkeyword(..) | Unseen => true, + _ => false, + } +} + +fn is_keep_flag(sk: &SearchKey, midx: &MailIndex) -> bool { + use SearchKey::*; + match sk { + Answered => midx.is_flag_set("\\Answered"), + Deleted => midx.is_flag_set("\\Deleted"), + Draft => midx.is_flag_set("\\Draft"), + Flagged => midx.is_flag_set("\\Flagged"), + Keyword(kw) => midx.is_flag_set(kw.inner()), + New => { + let is_recent = midx.is_flag_set("\\Recent"); + let is_seen = midx.is_flag_set("\\Seen"); + is_recent && !is_seen + }, + Old => { + let is_recent = midx.is_flag_set("\\Recent"); + !is_recent + }, + Recent => midx.is_flag_set("\\Recent"), + Seen => midx.is_flag_set("\\Seen"), + Unanswered => { + let is_answered = midx.is_flag_set("\\Recent"); + !is_answered + }, + Undeleted => { + let is_deleted = midx.is_flag_set("\\Deleted"); + !is_deleted + }, + Undraft => { + let is_draft = midx.is_flag_set("\\Draft"); + !is_draft + }, + Unflagged => { + let is_flagged = midx.is_flag_set("\\Flagged"); + !is_flagged + }, + Unkeyword(kw) => { + let is_keyword_set = midx.is_flag_set(kw.inner()); + !is_keyword_set + }, + Unseen => { + let is_seen = midx.is_flag_set("\\Seen"); + !is_seen + }, + + // Not flag logic + _ => unreachable!(), + } +} + +fn is_sk_seq(sk: &SearchKey) -> bool { + use SearchKey::*; + match sk { + SequenceSet(..) | Uid(..) => true, + _ => false, + } +} +fn is_keep_seq(sk: &SearchKey, midx: &MailIndex) -> bool { + use SearchKey::*; + match sk { + SequenceSet(seq_set) => seq_set.0.as_ref().iter().any(|seq| midx.is_in_sequence_i(seq)), + Uid(seq_set) => seq_set.0.as_ref().iter().any(|seq| midx.is_in_sequence_uid(seq)), + _ => unreachable!(), + } +} -- cgit v1.2.3 From ea1772df425cb7faa4628b1c6c398ae3f77fca34 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Sat, 6 Jan 2024 20:40:18 +0100 Subject: Searching on storage date is now possible --- src/imap/imf_view.rs | 121 ++++++++++++++++++++++++----------------------- src/imap/mail_view.rs | 28 ++++++++--- src/imap/mailbox_view.rs | 2 +- src/imap/mime_view.rs | 4 +- src/imap/search.rs | 90 +++++++++++++++++++++++++++++------ 5 files changed, 163 insertions(+), 82 deletions(-) (limited to 'src/imap') diff --git a/src/imap/imf_view.rs b/src/imap/imf_view.rs index 4297769..8b52b9e 100644 --- a/src/imap/imf_view.rs +++ b/src/imap/imf_view.rs @@ -3,66 +3,71 @@ use imap_codec::imap_types::envelope::{Address, Envelope}; use eml_codec::imf; -/// Envelope rules are defined in RFC 3501, section 7.4.2 -/// https://datatracker.ietf.org/doc/html/rfc3501#section-7.4.2 -/// -/// Some important notes: -/// -/// If the Sender or Reply-To lines are absent in the [RFC-2822] -/// header, or are present but empty, the server sets the -/// corresponding member of the envelope to be the same value as -/// the from member (the client is not expected to know to do -/// this). Note: [RFC-2822] requires that all messages have a valid -/// From header. Therefore, the from, sender, and reply-to -/// members in the envelope can not be NIL. -/// -/// If the Date, Subject, In-Reply-To, and Message-ID header lines -/// are absent in the [RFC-2822] header, the corresponding member -/// of the envelope is NIL; if these header lines are present but -/// empty the corresponding member of the envelope is the empty -/// string. +pub struct ImfView<'a>(pub &'a imf::Imf<'a>); -//@FIXME return an error if the envelope is invalid instead of panicking -//@FIXME some fields must be defaulted if there are not set. -pub fn message_envelope(msg: &imf::Imf) -> Envelope<'static> { - let from = msg.from.iter().map(convert_mbx).collect::>(); +impl<'a> ImfView<'a> { + /// Envelope rules are defined in RFC 3501, section 7.4.2 + /// https://datatracker.ietf.org/doc/html/rfc3501#section-7.4.2 + /// + /// Some important notes: + /// + /// If the Sender or Reply-To lines are absent in the [RFC-2822] + /// header, or are present but empty, the server sets the + /// corresponding member of the envelope to be the same value as + /// the from member (the client is not expected to know to do + /// this). Note: [RFC-2822] requires that all messages have a valid + /// From header. Therefore, the from, sender, and reply-to + /// members in the envelope can not be NIL. + /// + /// If the Date, Subject, In-Reply-To, and Message-ID header lines + /// are absent in the [RFC-2822] header, the corresponding member + /// of the envelope is NIL; if these header lines are present but + /// empty the corresponding member of the envelope is the empty + /// string. - Envelope { - date: NString( - msg.date - .as_ref() - .map(|d| IString::try_from(d.to_rfc3339()).unwrap()), - ), - subject: NString( - msg.subject - .as_ref() - .map(|d| IString::try_from(d.to_string()).unwrap()), - ), - sender: msg - .sender - .as_ref() - .map(|v| vec![convert_mbx(v)]) - .unwrap_or(from.clone()), - reply_to: if msg.reply_to.is_empty() { - from.clone() - } else { - convert_addresses(&msg.reply_to) - }, - from, - to: convert_addresses(&msg.to), - cc: convert_addresses(&msg.cc), - bcc: convert_addresses(&msg.bcc), - in_reply_to: NString( - msg.in_reply_to - .iter() - .next() - .map(|d| IString::try_from(d.to_string()).unwrap()), - ), - message_id: NString( - msg.msg_id - .as_ref() - .map(|d| IString::try_from(d.to_string()).unwrap()), - ), + //@FIXME return an error if the envelope is invalid instead of panicking + //@FIXME some fields must be defaulted if there are not set. + pub fn message_envelope(&self) -> Envelope<'static> { + let msg = self.0; + let from = msg.from.iter().map(convert_mbx).collect::>(); + + Envelope { + date: NString( + msg.date + .as_ref() + .map(|d| IString::try_from(d.to_rfc3339()).unwrap()), + ), + subject: NString( + msg.subject + .as_ref() + .map(|d| IString::try_from(d.to_string()).unwrap()), + ), + sender: msg + .sender + .as_ref() + .map(|v| vec![convert_mbx(v)]) + .unwrap_or(from.clone()), + reply_to: if msg.reply_to.is_empty() { + from.clone() + } else { + convert_addresses(&msg.reply_to) + }, + from, + to: convert_addresses(&msg.to), + cc: convert_addresses(&msg.cc), + bcc: convert_addresses(&msg.bcc), + in_reply_to: NString( + msg.in_reply_to + .iter() + .next() + .map(|d| IString::try_from(d.to_string()).unwrap()), + ), + message_id: NString( + msg.msg_id + .as_ref() + .map(|d| IString::try_from(d.to_string()).unwrap()), + ), + } } } diff --git a/src/imap/mail_view.rs b/src/imap/mail_view.rs index fc36e21..3fef145 100644 --- a/src/imap/mail_view.rs +++ b/src/imap/mail_view.rs @@ -1,7 +1,7 @@ use std::num::NonZeroU32; use anyhow::{anyhow, bail, Result}; -use chrono::{Offset, TimeZone, Utc}; +use chrono::{Offset, TimeZone, Utc, DateTime as ChronoDateTime, Local, naive::NaiveDate}; use imap_codec::imap_types::core::NString; use imap_codec::imap_types::datetime::DateTime; @@ -20,7 +20,7 @@ use crate::mail::query::QueryResult; use crate::imap::attributes::AttributesProxy; use crate::imap::flags; -use crate::imap::imf_view::message_envelope; +use crate::imap::imf_view::ImfView; use crate::imap::index::MailIndex; use crate::imap::mime_view; use crate::imap::response::Body; @@ -52,6 +52,10 @@ impl<'a> MailView<'a> { }) } + pub fn imf(&self) -> Option { + self.content.imf().map(ImfView) + } + pub fn filter(&self, ap: &AttributesProxy) -> Result<(Body<'static>, SeenFlag)> { let mut seen = SeenFlag::DoNothing; let res_attrs = ap @@ -89,6 +93,16 @@ impl<'a> MailView<'a> { )) } + pub fn stored_naive_date(&self) -> Result { + let mail_meta = self.query_result.metadata().expect("metadata were fetched"); + let mail_ts: i64 = mail_meta.internaldate.try_into()?; + let msg_date: ChronoDateTime = ChronoDateTime::from_timestamp(mail_ts, 0) + .ok_or(anyhow!("unable to parse timestamp"))? + .with_timezone(&Local); + + Ok(msg_date.date_naive()) + } + // Private function, mainly for filter! fn uid(&self) -> MessageDataItem<'static> { MessageDataItem::Uid(self.in_idx.uid.clone()) @@ -135,7 +149,7 @@ impl<'a> MailView<'a> { } fn envelope(&self) -> MessageDataItem<'static> { - MessageDataItem::Envelope(message_envelope(self.content.imf().clone())) + MessageDataItem::Envelope(self.imf().expect("an imf object is derivable from fetchedmail").message_envelope()) } fn body(&self) -> Result> { @@ -239,11 +253,11 @@ impl<'a> FetchedMail<'a> { } } - fn imf(&self) -> &imf::Imf<'a> { + fn imf(&self) -> Option<&imf::Imf<'a>> { match self { - FetchedMail::Full(AnyPart::Msg(x)) => &x.imf, - FetchedMail::Partial(x) => &x, - _ => panic!("Can't contain AnyPart that is not a message"), + FetchedMail::Full(AnyPart::Msg(x)) => Some(&x.imf), + FetchedMail::Partial(x) => Some(&x), + _ => None, } } } diff --git a/src/imap/mailbox_view.rs b/src/imap/mailbox_view.rs index 3c43be8..362e2e2 100644 --- a/src/imap/mailbox_view.rs +++ b/src/imap/mailbox_view.rs @@ -330,7 +330,7 @@ impl MailboxView { let query_result = self.0.query(&uuids, query_scope).fetch().await?; // 5. If needed, filter the selection based on the body - let kept_query = crit.filter_on_query(&to_fetch, &query_result); + let kept_query = crit.filter_on_query(&to_fetch, &query_result)?; // 6. Format the result according to the client's taste: // either return UID or ID. diff --git a/src/imap/mime_view.rs b/src/imap/mime_view.rs index 1f36c47..5175c76 100644 --- a/src/imap/mime_view.rs +++ b/src/imap/mime_view.rs @@ -12,7 +12,7 @@ use eml_codec::{ header, mime, mime::r#type::Deductible, part::composite, part::discrete, part::AnyPart, }; -use crate::imap::imf_view::message_envelope; +use crate::imap::imf_view::ImfView; pub enum BodySection<'a> { Full(Cow<'a, [u8]>), @@ -347,7 +347,7 @@ impl<'a> NodeMsg<'a> { body: FetchBody { basic, specific: SpecificFields::Message { - envelope: Box::new(message_envelope(&self.1.imf)), + envelope: Box::new(ImfView(&self.1.imf).message_envelope()), body_structure: Box::new(NodeMime(&self.1.child).structure()?), number_of_lines: nol(self.1.raw_part), }, diff --git a/src/imap/search.rs b/src/imap/search.rs index 0ab0300..0e00025 100644 --- a/src/imap/search.rs +++ b/src/imap/search.rs @@ -1,10 +1,13 @@ +use std::num::NonZeroU32; + +use anyhow::Result; use imap_codec::imap_types::core::NonEmptyVec; use imap_codec::imap_types::search::SearchKey; use imap_codec::imap_types::sequence::{SeqOrUid, Sequence, SequenceSet}; -use std::num::NonZeroU32; use crate::mail::query::{QueryScope, QueryResult}; use crate::imap::index::MailIndex; +use crate::imap::mail_view::MailView; pub enum SeqType { Undefined, @@ -121,13 +124,16 @@ impl<'a> Criteria<'a> { (to_keep, to_fetch) } - pub fn filter_on_query<'b>(&self, midx_list: &[MailIndex<'b>], query_result: &Vec>) -> Vec> { - midx_list + pub fn filter_on_query<'b>(&self, midx_list: &[MailIndex<'b>], query_result: &'b Vec>) -> Result>> { + Ok(midx_list .iter() .zip(query_result.iter()) - .filter(|(midx, qr)| self.is_keep_on_query(midx, qr)) - .map(|(midx, _qr)| midx.clone()) - .collect() + .map(|(midx, qr)| MailView::new(qr, midx.clone())) + .collect::, _>>()? + .into_iter() + .filter(|mail_view| self.is_keep_on_query(mail_view)) + .map(|mail_view| mail_view.in_idx) + .collect()) } // ---- @@ -163,24 +169,80 @@ impl<'a> Criteria<'a> { | Subject(_) | To(_) | Before(_) | On(_) | Since(_) | Larger(_) | Smaller(_) | Text(_) | Body(_) => PartialDecision::Postpone, - _ => unreachable!(), + unknown => { + tracing::error!("Unknown filter {:?}", unknown); + PartialDecision::Discard + }, } } - fn is_keep_on_query(&self, midx: &MailIndex, qr: &QueryResult) -> bool { + + /// @TODO we re-eveluate twice the same logic. The correct way would be, on each pass, + /// to simplify the searck query, by removing the elements that were already checked. + /// For example if we have AND(OR(seqid(X), body(Y)), body(X)), we can't keep for sure + /// the email, as body(x) might be false. So we need to check it. But as seqid(x) is true, + /// we could simplify the request to just body(x) and truncate the first OR. Today, we are + /// not doing that, and thus we reevaluate everything. + fn is_keep_on_query(&self, mail_view: &MailView) -> bool { use SearchKey::*; match self.0 { // Combinator logic And(expr_list) => expr_list .as_ref() .iter() - .any(|cur| Criteria(cur).is_keep_on_query(midx, qr)), + .any(|cur| Criteria(cur).is_keep_on_query(mail_view)), Or(left, right) => { - Criteria(left).is_keep_on_query(midx, qr) || Criteria(right).is_keep_on_query(midx, qr) + Criteria(left).is_keep_on_query(mail_view) || Criteria(right).is_keep_on_query(mail_view) } - Not(expr) => !Criteria(expr).is_keep_on_query(midx, qr), + Not(expr) => !Criteria(expr).is_keep_on_query(mail_view), All => true, - _ => unimplemented!(), + + // Reevaluating our previous logic... + maybe_seq if is_sk_seq(maybe_seq) => is_keep_seq(maybe_seq, &mail_view.in_idx), + maybe_flag if is_sk_flag(maybe_flag) => is_keep_flag(maybe_flag, &mail_view.in_idx), + + // Filter on mail meta + Before(search_naive) => match mail_view.stored_naive_date() { + Ok(msg_naive) => &msg_naive < search_naive.as_ref(), + _ => false, + }, + On(search_naive) => match mail_view.stored_naive_date() { + Ok(msg_naive) => &msg_naive == search_naive.as_ref(), + _ => false, + }, + Since(search_naive) => match mail_view.stored_naive_date() { + Ok(msg_naive) => &msg_naive > search_naive.as_ref(), + _ => false, + }, + + // Message size is also stored in MailMeta + Larger(size_ref) => mail_view.query_result.metadata().expect("metadata were fetched").rfc822_size > *size_ref as usize, + Smaller(size_ref) => mail_view.query_result.metadata().expect("metadata were fetched").rfc822_size < *size_ref as usize, + + // Filter on well-known headers + Bcc(_) => unimplemented!(), + Cc(_) => unimplemented!(), + From(_) => unimplemented!(), + Subject(_)=> unimplemented!(), + To(_) => unimplemented!(), + + // Filter on arbitrary header + Header(..) => unimplemented!(), + + // Filter on Date header + SentBefore(_) => unimplemented!(), + SentOn(_) => unimplemented!(), + SentSince(_) => unimplemented!(), + + + // Filter on the full content of the email + Text(_) => unimplemented!(), + Body(_) => unimplemented!(), + + unknown => { + tracing::error!("Unknown filter {:?}", unknown); + false + }, } } } @@ -240,16 +302,16 @@ impl PartialDecision { fn or(&self, other: &Self) -> Self { match (self, other) { - (Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone, (Self::Keep, _) | (_, Self::Keep) => Self::Keep, + (Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone, (Self::Discard, Self::Discard) => Self::Discard, } } fn and(&self, other: &Self) -> Self { match (self, other) { - (Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone, (Self::Discard, _) | (_, Self::Discard) => Self::Discard, + (Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone, (Self::Keep, Self::Keep) => Self::Keep, } } -- cgit v1.2.3 From 5622a71cd163e4b18a3eabe8a28a5aedb23ee25d Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Sat, 6 Jan 2024 22:53:41 +0100 Subject: Search MIME headers --- src/imap/imf_view.rs | 7 +++++++ src/imap/mail_view.rs | 50 +++++++++++++++++++++++++++++++++++++------------- src/imap/mime_view.rs | 29 ++++++++++++++++++++++++----- src/imap/search.rs | 20 +++++++++----------- 4 files changed, 77 insertions(+), 29 deletions(-) (limited to 'src/imap') diff --git a/src/imap/imf_view.rs b/src/imap/imf_view.rs index 8b52b9e..b56e27d 100644 --- a/src/imap/imf_view.rs +++ b/src/imap/imf_view.rs @@ -1,3 +1,6 @@ +use anyhow::{anyhow, Result}; +use chrono::naive::NaiveDate; + use imap_codec::imap_types::core::{IString, NString}; use imap_codec::imap_types::envelope::{Address, Envelope}; @@ -6,6 +9,10 @@ use eml_codec::imf; pub struct ImfView<'a>(pub &'a imf::Imf<'a>); impl<'a> ImfView<'a> { + pub fn naive_date(&self) -> Result { + Ok(self.0.date.ok_or(anyhow!("date is not set"))?.date_naive()) + } + /// Envelope rules are defined in RFC 3501, section 7.4.2 /// https://datatracker.ietf.org/doc/html/rfc3501#section-7.4.2 /// diff --git a/src/imap/mail_view.rs b/src/imap/mail_view.rs index 3fef145..365e535 100644 --- a/src/imap/mail_view.rs +++ b/src/imap/mail_view.rs @@ -40,12 +40,12 @@ impl<'a> MailView<'a> { QueryResult::FullResult { content, .. } => { let (_, parsed) = eml_codec::parse_message(&content).or(Err(anyhow!("Invalid mail body")))?; - FetchedMail::new_from_message(parsed) + FetchedMail::full_from_message(parsed) } QueryResult::PartialResult { metadata, .. } => { - let (_, parsed) = eml_codec::parse_imf(&metadata.headers) + let (_, parsed) = eml_codec::parse_message(&metadata.headers) .or(Err(anyhow!("unable to parse email headers")))?; - FetchedMail::Partial(parsed) + FetchedMail::partial_from_message(parsed) } QueryResult::IndexResult { .. } => FetchedMail::IndexOnly, }, @@ -53,7 +53,11 @@ impl<'a> MailView<'a> { } pub fn imf(&self) -> Option { - self.content.imf().map(ImfView) + self.content.as_imf().map(ImfView) + } + + pub fn selected_mime(&'a self) -> Option> { + self.content.as_anypart().ok().map(mime_view::SelectedMime) } pub fn filter(&self, ap: &AttributesProxy) -> Result<(Body<'static>, SeenFlag)> { @@ -103,6 +107,20 @@ impl<'a> MailView<'a> { Ok(msg_date.date_naive()) } + pub fn is_header_contains_pattern(&self, hdr: &[u8], pattern: &[u8]) -> bool { + let mime = match self.selected_mime() { + None => return false, + Some(x) => x, + }; + + let val = match mime.header_value(hdr) { + None => return false, + Some(x) => x, + }; + + val.windows(pattern.len()).any(|win| win == pattern) + } + // Private function, mainly for filter! fn uid(&self) -> MessageDataItem<'static> { MessageDataItem::Uid(self.in_idx.uid.clone()) @@ -139,12 +157,12 @@ impl<'a> MailView<'a> { } fn rfc_822_text(&self) -> Result> { - let txt: NString = self.content.as_full()?.raw_body.to_vec().try_into()?; + let txt: NString = self.content.as_msg()?.raw_body.to_vec().try_into()?; Ok(MessageDataItem::Rfc822Text(txt)) } fn rfc822(&self) -> Result> { - let full: NString = self.content.as_full()?.raw_part.to_vec().try_into()?; + let full: NString = self.content.as_msg()?.raw_part.to_vec().try_into()?; Ok(MessageDataItem::Rfc822(full)) } @@ -154,13 +172,13 @@ impl<'a> MailView<'a> { fn body(&self) -> Result> { Ok(MessageDataItem::Body(mime_view::bodystructure( - self.content.as_full()?.child.as_ref(), + self.content.as_msg()?.child.as_ref(), )?)) } fn body_structure(&self) -> Result> { Ok(MessageDataItem::Body(mime_view::bodystructure( - self.content.as_full()?.child.as_ref(), + self.content.as_msg()?.child.as_ref(), )?)) } @@ -231,32 +249,38 @@ pub enum SeenFlag { pub enum FetchedMail<'a> { IndexOnly, - Partial(imf::Imf<'a>), + Partial(AnyPart<'a>), Full(AnyPart<'a>), } impl<'a> FetchedMail<'a> { - pub fn new_from_message(msg: Message<'a>) -> Self { + pub fn full_from_message(msg: Message<'a>) -> Self { Self::Full(AnyPart::Msg(msg)) } + pub fn partial_from_message(msg: Message<'a>) -> Self { + Self::Partial(AnyPart::Msg(msg)) + } + fn as_anypart(&self) -> Result<&AnyPart<'a>> { match self { FetchedMail::Full(x) => Ok(&x), + FetchedMail::Partial(x) => Ok(&x), _ => bail!("The full message must be fetched, not only its headers"), } } - fn as_full(&self) -> Result<&Message<'a>> { + fn as_msg(&self) -> Result<&Message<'a>> { match self { FetchedMail::Full(AnyPart::Msg(x)) => Ok(&x), + FetchedMail::Partial(AnyPart::Msg(x)) => Ok(&x), _ => bail!("The full message must be fetched, not only its headers AND it must be an AnyPart::Msg."), } } - fn imf(&self) -> Option<&imf::Imf<'a>> { + fn as_imf(&self) -> Option<&imf::Imf<'a>> { match self { FetchedMail::Full(AnyPart::Msg(x)) => Some(&x.imf), - FetchedMail::Partial(x) => Some(&x), + FetchedMail::Partial(AnyPart::Msg(x)) => Some(&x.imf), _ => None, } } diff --git a/src/imap/mime_view.rs b/src/imap/mime_view.rs index 5175c76..cf6c751 100644 --- a/src/imap/mime_view.rs +++ b/src/imap/mime_view.rs @@ -164,8 +164,23 @@ impl<'a> SubsettedSection<'a> { /// Used for current MIME inspection /// /// See NodeMime for recursive logic -struct SelectedMime<'a>(&'a AnyPart<'a>); +pub struct SelectedMime<'a>(pub &'a AnyPart<'a>); impl<'a> SelectedMime<'a> { + pub fn header_value(&'a self, to_match_ext: &[u8]) -> Option<&'a [u8]> { + let to_match = to_match_ext.to_ascii_lowercase(); + + self.eml_mime() + .kv + .iter() + .filter_map(|field| match field { + header::Field::Good(header::Kv2(k, v)) => Some((k, v)), + _ => None, + }) + .find(|(k, _)| k.to_ascii_lowercase() == to_match) + .map(|(_, v)| v) + .copied() + } + /// The subsetted fetch section basically tells us the /// extraction logic to apply on our selected MIME. /// This function acts as a router for these logic. @@ -200,6 +215,13 @@ impl<'a> SelectedMime<'a> { Ok(ExtractedFull(bytes.to_vec().into())) } + fn eml_mime(&self) -> &eml_codec::mime::NaiveMIME<'_> { + match &self.0 { + AnyPart::Msg(msg) => msg.child.mime(), + other => other.mime(), + } + } + /// The [...] HEADER.FIELDS, and HEADER.FIELDS.NOT part /// specifiers refer to the [RFC-2822] header of the message or of /// an encapsulated [MIME-IMT] MESSAGE/RFC822 message. @@ -231,10 +253,7 @@ impl<'a> SelectedMime<'a> { .collect::>(); // Extract MIME headers - let mime = match &self.0 { - AnyPart::Msg(msg) => msg.child.mime(), - other => other.mime(), - }; + let mime = self.eml_mime(); // Filter our MIME headers based on the field index // 1. Keep only the correctly formatted headers diff --git a/src/imap/search.rs b/src/imap/search.rs index 0e00025..2fbfdcc 100644 --- a/src/imap/search.rs +++ b/src/imap/search.rs @@ -220,19 +220,17 @@ impl<'a> Criteria<'a> { Smaller(size_ref) => mail_view.query_result.metadata().expect("metadata were fetched").rfc822_size < *size_ref as usize, // Filter on well-known headers - Bcc(_) => unimplemented!(), - Cc(_) => unimplemented!(), - From(_) => unimplemented!(), - Subject(_)=> unimplemented!(), - To(_) => unimplemented!(), - - // Filter on arbitrary header - Header(..) => unimplemented!(), + Bcc(txt) => mail_view.is_header_contains_pattern(&b"bcc"[..], txt.as_ref()), + Cc(txt) => mail_view.is_header_contains_pattern(&b"cc"[..], txt.as_ref()), + From(txt) => mail_view.is_header_contains_pattern(&b"from"[..], txt.as_ref()), + Subject(txt)=> mail_view.is_header_contains_pattern(&b"subject"[..], txt.as_ref()), + To(txt) => mail_view.is_header_contains_pattern(&b"to"[..], txt.as_ref()), + Header(hdr, txt) => mail_view.is_header_contains_pattern(hdr.as_ref(), txt.as_ref()), // Filter on Date header - SentBefore(_) => unimplemented!(), - SentOn(_) => unimplemented!(), - SentSince(_) => unimplemented!(), + SentBefore(search_naive) => mail_view.imf().map(|imf| imf.naive_date().ok()).flatten().map(|msg_naive| &msg_naive < search_naive.as_ref()).unwrap_or(false), + SentOn(search_naive) => mail_view.imf().map(|imf| imf.naive_date().ok()).flatten().map(|msg_naive| &msg_naive == search_naive.as_ref()).unwrap_or(false), + SentSince(search_naive) => mail_view.imf().map(|imf| imf.naive_date().ok()).flatten().map(|msg_naive| &msg_naive > search_naive.as_ref()).unwrap_or(false), // Filter on the full content of the email -- cgit v1.2.3 From 4e3cbf79d03c84028733b0ad5f9bd06a8a13757b Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Sat, 6 Jan 2024 23:24:44 +0100 Subject: implemented text search --- src/imap/mail_view.rs | 6 +++--- src/imap/search.rs | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/imap') diff --git a/src/imap/mail_view.rs b/src/imap/mail_view.rs index 365e535..baeb2af 100644 --- a/src/imap/mail_view.rs +++ b/src/imap/mail_view.rs @@ -261,7 +261,7 @@ impl<'a> FetchedMail<'a> { Self::Partial(AnyPart::Msg(msg)) } - fn as_anypart(&self) -> Result<&AnyPart<'a>> { + pub fn as_anypart(&self) -> Result<&AnyPart<'a>> { match self { FetchedMail::Full(x) => Ok(&x), FetchedMail::Partial(x) => Ok(&x), @@ -269,7 +269,7 @@ impl<'a> FetchedMail<'a> { } } - fn as_msg(&self) -> Result<&Message<'a>> { + pub fn as_msg(&self) -> Result<&Message<'a>> { match self { FetchedMail::Full(AnyPart::Msg(x)) => Ok(&x), FetchedMail::Partial(AnyPart::Msg(x)) => Ok(&x), @@ -277,7 +277,7 @@ impl<'a> FetchedMail<'a> { } } - fn as_imf(&self) -> Option<&imf::Imf<'a>> { + pub fn as_imf(&self) -> Option<&imf::Imf<'a>> { match self { FetchedMail::Full(AnyPart::Msg(x)) => Some(&x.imf), FetchedMail::Partial(AnyPart::Msg(x)) => Some(&x.imf), diff --git a/src/imap/search.rs b/src/imap/search.rs index 2fbfdcc..fb889d7 100644 --- a/src/imap/search.rs +++ b/src/imap/search.rs @@ -190,7 +190,7 @@ impl<'a> Criteria<'a> { And(expr_list) => expr_list .as_ref() .iter() - .any(|cur| Criteria(cur).is_keep_on_query(mail_view)), + .all(|cur| Criteria(cur).is_keep_on_query(mail_view)), Or(left, right) => { Criteria(left).is_keep_on_query(mail_view) || Criteria(right).is_keep_on_query(mail_view) } @@ -234,8 +234,8 @@ impl<'a> Criteria<'a> { // Filter on the full content of the email - Text(_) => unimplemented!(), - Body(_) => unimplemented!(), + Text(txt) => mail_view.content.as_msg().map(|msg| msg.raw_part.windows(txt.as_ref().len()).any(|win| win == txt.as_ref())).unwrap_or(false), + Body(txt) => mail_view.content.as_msg().map(|msg| msg.raw_body.windows(txt.as_ref().len()).any(|win| win == txt.as_ref())).unwrap_or(false), unknown => { tracing::error!("Unknown filter {:?}", unknown); -- cgit v1.2.3 From 1d84b0ffd006b895aeb15c621fcd0ced826a0599 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Sat, 6 Jan 2024 23:35:23 +0100 Subject: Format code --- src/imap/imf_view.rs | 68 ++++++++++---------- src/imap/index.rs | 38 ++++++----- src/imap/mail_view.rs | 9 ++- src/imap/mailbox_view.rs | 5 +- src/imap/search.rs | 160 +++++++++++++++++++++++++++++++++-------------- 5 files changed, 175 insertions(+), 105 deletions(-) (limited to 'src/imap') diff --git a/src/imap/imf_view.rs b/src/imap/imf_view.rs index b56e27d..a4ca2e8 100644 --- a/src/imap/imf_view.rs +++ b/src/imap/imf_view.rs @@ -40,40 +40,40 @@ impl<'a> ImfView<'a> { Envelope { date: NString( - msg.date - .as_ref() - .map(|d| IString::try_from(d.to_rfc3339()).unwrap()), - ), - subject: NString( - msg.subject - .as_ref() - .map(|d| IString::try_from(d.to_string()).unwrap()), - ), - sender: msg - .sender - .as_ref() - .map(|v| vec![convert_mbx(v)]) - .unwrap_or(from.clone()), - reply_to: if msg.reply_to.is_empty() { - from.clone() - } else { - convert_addresses(&msg.reply_to) - }, - from, - to: convert_addresses(&msg.to), - cc: convert_addresses(&msg.cc), - bcc: convert_addresses(&msg.bcc), - in_reply_to: NString( - msg.in_reply_to - .iter() - .next() - .map(|d| IString::try_from(d.to_string()).unwrap()), - ), - message_id: NString( - msg.msg_id - .as_ref() - .map(|d| IString::try_from(d.to_string()).unwrap()), - ), + msg.date + .as_ref() + .map(|d| IString::try_from(d.to_rfc3339()).unwrap()), + ), + subject: NString( + msg.subject + .as_ref() + .map(|d| IString::try_from(d.to_string()).unwrap()), + ), + sender: msg + .sender + .as_ref() + .map(|v| vec![convert_mbx(v)]) + .unwrap_or(from.clone()), + reply_to: if msg.reply_to.is_empty() { + from.clone() + } else { + convert_addresses(&msg.reply_to) + }, + from, + to: convert_addresses(&msg.to), + cc: convert_addresses(&msg.cc), + bcc: convert_addresses(&msg.bcc), + in_reply_to: NString( + msg.in_reply_to + .iter() + .next() + .map(|d| IString::try_from(d.to_string()).unwrap()), + ), + message_id: NString( + msg.msg_id + .as_ref() + .map(|d| IString::try_from(d.to_string()).unwrap()), + ), } } } diff --git a/src/imap/index.rs b/src/imap/index.rs index 48d5ebd..8ec3cca 100644 --- a/src/imap/index.rs +++ b/src/imap/index.rs @@ -1,7 +1,7 @@ use std::num::NonZeroU32; use anyhow::{anyhow, bail, Result}; -use imap_codec::imap_types::sequence::{self, Sequence, SequenceSet, SeqOrUid}; +use imap_codec::imap_types::sequence::{self, SeqOrUid, Sequence, SequenceSet}; use crate::mail::uidindex::{ImapUid, UidIndex}; use crate::mail::unique_ident::UniqueIdent; @@ -96,18 +96,20 @@ pub struct MailIndex<'a> { } impl<'a> MailIndex<'a> { - // The following functions are used to implement the SEARCH command + // The following functions are used to implement the SEARCH command pub fn is_in_sequence_i(&self, seq: &Sequence) -> bool { match seq { Sequence::Single(SeqOrUid::Asterisk) => true, Sequence::Single(SeqOrUid::Value(target)) => target == &self.i, - Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x)) - | Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.i, - Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => if x1 < x2 { - x1 <= &self.i && &self.i <= x2 - } else { - x1 >= &self.i && &self.i >= x2 - }, + Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x)) + | Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.i, + Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => { + if x1 < x2 { + x1 <= &self.i && &self.i <= x2 + } else { + x1 >= &self.i && &self.i >= x2 + } + } Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true, } } @@ -117,17 +119,21 @@ impl<'a> MailIndex<'a> { Sequence::Single(SeqOrUid::Asterisk) => true, Sequence::Single(SeqOrUid::Value(target)) => target == &self.uid, Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x)) - | Sequence::Range(SeqOrUid::Value(x),SeqOrUid::Asterisk) => x <= &self.uid, - Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => if x1 < x2 { - x1 <= &self.uid && &self.uid <= x2 - } else { - x1 >= &self.uid && &self.uid >= x2 - }, + | Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.uid, + Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => { + if x1 < x2 { + x1 <= &self.uid && &self.uid <= x2 + } else { + x1 >= &self.uid && &self.uid >= x2 + } + } Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true, } } pub fn is_flag_set(&self, flag: &str) -> bool { - self.flags.iter().any(|candidate| candidate.as_str() == flag) + self.flags + .iter() + .any(|candidate| candidate.as_str() == flag) } } diff --git a/src/imap/mail_view.rs b/src/imap/mail_view.rs index baeb2af..2c8723e 100644 --- a/src/imap/mail_view.rs +++ b/src/imap/mail_view.rs @@ -1,7 +1,7 @@ use std::num::NonZeroU32; use anyhow::{anyhow, bail, Result}; -use chrono::{Offset, TimeZone, Utc, DateTime as ChronoDateTime, Local, naive::NaiveDate}; +use chrono::{naive::NaiveDate, DateTime as ChronoDateTime, Local, Offset, TimeZone, Utc}; use imap_codec::imap_types::core::NString; use imap_codec::imap_types::datetime::DateTime; @@ -167,7 +167,11 @@ impl<'a> MailView<'a> { } fn envelope(&self) -> MessageDataItem<'static> { - MessageDataItem::Envelope(self.imf().expect("an imf object is derivable from fetchedmail").message_envelope()) + MessageDataItem::Envelope( + self.imf() + .expect("an imf object is derivable from fetchedmail") + .message_envelope(), + ) } fn body(&self) -> Result> { @@ -237,7 +241,6 @@ impl<'a> MailView<'a> { .ok_or(anyhow!("Unable to parse internal date"))?; Ok(MessageDataItem::InternalDate(DateTime::unvalidated(dt))) } - } pub enum SeenFlag { diff --git a/src/imap/mailbox_view.rs b/src/imap/mailbox_view.rs index 362e2e2..2841b7b 100644 --- a/src/imap/mailbox_view.rs +++ b/src/imap/mailbox_view.rs @@ -323,10 +323,7 @@ impl MailboxView { // 4. Fetch additional info about the emails let query_scope = crit.query_scope(); - let uuids = to_fetch - .iter() - .map(|midx| midx.uuid) - .collect::>(); + let uuids = to_fetch.iter().map(|midx| midx.uuid).collect::>(); let query_result = self.0.query(&uuids, query_scope).fetch().await?; // 5. If needed, filter the selection based on the body diff --git a/src/imap/search.rs b/src/imap/search.rs index fb889d7..2b0b34b 100644 --- a/src/imap/search.rs +++ b/src/imap/search.rs @@ -5,9 +5,9 @@ use imap_codec::imap_types::core::NonEmptyVec; use imap_codec::imap_types::search::SearchKey; use imap_codec::imap_types::sequence::{SeqOrUid, Sequence, SequenceSet}; -use crate::mail::query::{QueryScope, QueryResult}; use crate::imap::index::MailIndex; use crate::imap::mail_view::MailView; +use crate::mail::query::{QueryResult, QueryScope}; pub enum SeqType { Undefined, @@ -20,7 +20,6 @@ impl SeqType { } } - pub struct Criteria<'a>(pub &'a SearchKey<'a>); impl<'a> Criteria<'a> { /// Returns a set of email identifiers that is greater or equal @@ -87,11 +86,16 @@ impl<'a> Criteria<'a> { use SearchKey::*; match self.0 { // Combinators - And(and_list) => and_list.as_ref().iter().fold(QueryScope::Index, |prev, sk| { - prev.union(&Criteria(sk).query_scope()) - }), + And(and_list) => and_list + .as_ref() + .iter() + .fold(QueryScope::Index, |prev, sk| { + prev.union(&Criteria(sk).query_scope()) + }), Not(inner) => Criteria(inner).query_scope(), - Or(left, right) => Criteria(left).query_scope().union(&Criteria(right).query_scope()), + Or(left, right) => Criteria(left) + .query_scope() + .union(&Criteria(right).query_scope()), All => QueryScope::Index, // IMF Headers @@ -109,9 +113,12 @@ impl<'a> Criteria<'a> { } /// Returns emails that we now for sure we want to keep - /// but also a second list of emails we need to investigate further by + /// but also a second list of emails we need to investigate further by /// fetching some remote data - pub fn filter_on_idx<'b>(&self, midx_list: &[MailIndex<'b>]) -> (Vec>, Vec>) { + pub fn filter_on_idx<'b>( + &self, + midx_list: &[MailIndex<'b>], + ) -> (Vec>, Vec>) { let (p1, p2): (Vec<_>, Vec<_>) = midx_list .iter() .map(|x| (x, self.is_keep_on_idx(x))) @@ -124,7 +131,11 @@ impl<'a> Criteria<'a> { (to_keep, to_fetch) } - pub fn filter_on_query<'b>(&self, midx_list: &[MailIndex<'b>], query_result: &'b Vec>) -> Result>> { + pub fn filter_on_query<'b>( + &self, + midx_list: &[MailIndex<'b>], + query_result: &'b Vec>, + ) -> Result>> { Ok(midx_list .iter() .zip(query_result.iter()) @@ -137,8 +148,8 @@ impl<'a> Criteria<'a> { } // ---- - - /// Here we are doing a partial filtering: we do not have access + + /// Here we are doing a partial filtering: we do not have access /// to the headers or to the body, so every time we encounter a rule /// based on them, we need to keep it. /// @@ -151,7 +162,9 @@ impl<'a> Criteria<'a> { And(expr_list) => expr_list .as_ref() .iter() - .fold(PartialDecision::Keep, |acc, cur| acc.and(&Criteria(cur).is_keep_on_idx(midx))), + .fold(PartialDecision::Keep, |acc, cur| { + acc.and(&Criteria(cur).is_keep_on_idx(midx)) + }), Or(left, right) => { let left_decision = Criteria(left).is_keep_on_idx(midx); let right_decision = Criteria(right).is_keep_on_idx(midx); @@ -163,7 +176,7 @@ impl<'a> Criteria<'a> { // Sequence logic maybe_seq if is_sk_seq(maybe_seq) => is_keep_seq(maybe_seq, midx).into(), maybe_flag if is_sk_flag(maybe_flag) => is_keep_flag(maybe_flag, midx).into(), - + // All the stuff we can't evaluate yet Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_) | Subject(_) | To(_) | Before(_) | On(_) | Since(_) | Larger(_) | Smaller(_) @@ -172,16 +185,15 @@ impl<'a> Criteria<'a> { unknown => { tracing::error!("Unknown filter {:?}", unknown); PartialDecision::Discard - }, + } } } - /// @TODO we re-eveluate twice the same logic. The correct way would be, on each pass, /// to simplify the searck query, by removing the elements that were already checked. /// For example if we have AND(OR(seqid(X), body(Y)), body(X)), we can't keep for sure /// the email, as body(x) might be false. So we need to check it. But as seqid(x) is true, - /// we could simplify the request to just body(x) and truncate the first OR. Today, we are + /// we could simplify the request to just body(x) and truncate the first OR. Today, we are /// not doing that, and thus we reevaluate everything. fn is_keep_on_query(&self, mail_view: &MailView) -> bool { use SearchKey::*; @@ -192,7 +204,8 @@ impl<'a> Criteria<'a> { .iter() .all(|cur| Criteria(cur).is_keep_on_query(mail_view)), Or(left, right) => { - Criteria(left).is_keep_on_query(mail_view) || Criteria(right).is_keep_on_query(mail_view) + Criteria(left).is_keep_on_query(mail_view) + || Criteria(right).is_keep_on_query(mail_view) } Not(expr) => !Criteria(expr).is_keep_on_query(mail_view), All => true, @@ -209,38 +222,82 @@ impl<'a> Criteria<'a> { On(search_naive) => match mail_view.stored_naive_date() { Ok(msg_naive) => &msg_naive == search_naive.as_ref(), _ => false, - }, + }, Since(search_naive) => match mail_view.stored_naive_date() { Ok(msg_naive) => &msg_naive > search_naive.as_ref(), _ => false, }, // Message size is also stored in MailMeta - Larger(size_ref) => mail_view.query_result.metadata().expect("metadata were fetched").rfc822_size > *size_ref as usize, - Smaller(size_ref) => mail_view.query_result.metadata().expect("metadata were fetched").rfc822_size < *size_ref as usize, + Larger(size_ref) => { + mail_view + .query_result + .metadata() + .expect("metadata were fetched") + .rfc822_size + > *size_ref as usize + } + Smaller(size_ref) => { + mail_view + .query_result + .metadata() + .expect("metadata were fetched") + .rfc822_size + < *size_ref as usize + } // Filter on well-known headers - Bcc(txt) => mail_view.is_header_contains_pattern(&b"bcc"[..], txt.as_ref()), - Cc(txt) => mail_view.is_header_contains_pattern(&b"cc"[..], txt.as_ref()), + Bcc(txt) => mail_view.is_header_contains_pattern(&b"bcc"[..], txt.as_ref()), + Cc(txt) => mail_view.is_header_contains_pattern(&b"cc"[..], txt.as_ref()), From(txt) => mail_view.is_header_contains_pattern(&b"from"[..], txt.as_ref()), - Subject(txt)=> mail_view.is_header_contains_pattern(&b"subject"[..], txt.as_ref()), + Subject(txt) => mail_view.is_header_contains_pattern(&b"subject"[..], txt.as_ref()), To(txt) => mail_view.is_header_contains_pattern(&b"to"[..], txt.as_ref()), - Header(hdr, txt) => mail_view.is_header_contains_pattern(hdr.as_ref(), txt.as_ref()), + Header(hdr, txt) => mail_view.is_header_contains_pattern(hdr.as_ref(), txt.as_ref()), // Filter on Date header - SentBefore(search_naive) => mail_view.imf().map(|imf| imf.naive_date().ok()).flatten().map(|msg_naive| &msg_naive < search_naive.as_ref()).unwrap_or(false), - SentOn(search_naive) => mail_view.imf().map(|imf| imf.naive_date().ok()).flatten().map(|msg_naive| &msg_naive == search_naive.as_ref()).unwrap_or(false), - SentSince(search_naive) => mail_view.imf().map(|imf| imf.naive_date().ok()).flatten().map(|msg_naive| &msg_naive > search_naive.as_ref()).unwrap_or(false), - + SentBefore(search_naive) => mail_view + .imf() + .map(|imf| imf.naive_date().ok()) + .flatten() + .map(|msg_naive| &msg_naive < search_naive.as_ref()) + .unwrap_or(false), + SentOn(search_naive) => mail_view + .imf() + .map(|imf| imf.naive_date().ok()) + .flatten() + .map(|msg_naive| &msg_naive == search_naive.as_ref()) + .unwrap_or(false), + SentSince(search_naive) => mail_view + .imf() + .map(|imf| imf.naive_date().ok()) + .flatten() + .map(|msg_naive| &msg_naive > search_naive.as_ref()) + .unwrap_or(false), // Filter on the full content of the email - Text(txt) => mail_view.content.as_msg().map(|msg| msg.raw_part.windows(txt.as_ref().len()).any(|win| win == txt.as_ref())).unwrap_or(false), - Body(txt) => mail_view.content.as_msg().map(|msg| msg.raw_body.windows(txt.as_ref().len()).any(|win| win == txt.as_ref())).unwrap_or(false), + Text(txt) => mail_view + .content + .as_msg() + .map(|msg| { + msg.raw_part + .windows(txt.as_ref().len()) + .any(|win| win == txt.as_ref()) + }) + .unwrap_or(false), + Body(txt) => mail_view + .content + .as_msg() + .map(|msg| { + msg.raw_body + .windows(txt.as_ref().len()) + .any(|win| win == txt.as_ref()) + }) + .unwrap_or(false), unknown => { tracing::error!("Unknown filter {:?}", unknown); false - }, + } } } } @@ -281,7 +338,7 @@ enum PartialDecision { Discard, Postpone, } -impl From for PartialDecision { +impl From for PartialDecision { fn from(x: bool) -> Self { match x { true => PartialDecision::Keep, @@ -323,9 +380,8 @@ impl PartialDecision { fn is_sk_flag(sk: &SearchKey) -> bool { use SearchKey::*; match sk { - Answered | Deleted | Draft | Flagged | Keyword(..) | New | Old - | Recent | Seen | Unanswered | Undeleted | Undraft - | Unflagged | Unkeyword(..) | Unseen => true, + Answered | Deleted | Draft | Flagged | Keyword(..) | New | Old | Recent | Seen + | Unanswered | Undeleted | Undraft | Unflagged | Unkeyword(..) | Unseen => true, _ => false, } } @@ -342,37 +398,37 @@ fn is_keep_flag(sk: &SearchKey, midx: &MailIndex) -> bool { let is_recent = midx.is_flag_set("\\Recent"); let is_seen = midx.is_flag_set("\\Seen"); is_recent && !is_seen - }, + } Old => { let is_recent = midx.is_flag_set("\\Recent"); !is_recent - }, - Recent => midx.is_flag_set("\\Recent"), - Seen => midx.is_flag_set("\\Seen"), - Unanswered => { + } + Recent => midx.is_flag_set("\\Recent"), + Seen => midx.is_flag_set("\\Seen"), + Unanswered => { let is_answered = midx.is_flag_set("\\Recent"); !is_answered - }, + } Undeleted => { let is_deleted = midx.is_flag_set("\\Deleted"); !is_deleted - }, + } Undraft => { let is_draft = midx.is_flag_set("\\Draft"); !is_draft - }, + } Unflagged => { let is_flagged = midx.is_flag_set("\\Flagged"); !is_flagged - }, + } Unkeyword(kw) => { let is_keyword_set = midx.is_flag_set(kw.inner()); !is_keyword_set - }, + } Unseen => { let is_seen = midx.is_flag_set("\\Seen"); !is_seen - }, + } // Not flag logic _ => unreachable!(), @@ -389,8 +445,16 @@ fn is_sk_seq(sk: &SearchKey) -> bool { fn is_keep_seq(sk: &SearchKey, midx: &MailIndex) -> bool { use SearchKey::*; match sk { - SequenceSet(seq_set) => seq_set.0.as_ref().iter().any(|seq| midx.is_in_sequence_i(seq)), - Uid(seq_set) => seq_set.0.as_ref().iter().any(|seq| midx.is_in_sequence_uid(seq)), + SequenceSet(seq_set) => seq_set + .0 + .as_ref() + .iter() + .any(|seq| midx.is_in_sequence_i(seq)), + Uid(seq_set) => seq_set + .0 + .as_ref() + .iter() + .any(|seq| midx.is_in_sequence_uid(seq)), _ => unreachable!(), } } -- cgit v1.2.3 From 35fd24ee46d8162cffe3aebcb32d0db1f35bd220 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Mon, 8 Jan 2024 07:52:45 +0100 Subject: Add the ENABLE capability, reduce wild logging --- src/imap/capability.rs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/imap') diff --git a/src/imap/capability.rs b/src/imap/capability.rs index 631c3e2..feadb6b 100644 --- a/src/imap/capability.rs +++ b/src/imap/capability.rs @@ -22,6 +22,7 @@ impl Default for ServerCapability { fn default() -> Self { Self(HashSet::from([ Capability::Imap4Rev1, + Capability::Enable, Capability::Move, Capability::LiteralPlus, capability_unselect(), -- cgit v1.2.3 From 558e32fbd27be9a81144571b4baf318293be1344 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Mon, 8 Jan 2024 11:13:13 +0100 Subject: UID sequence are now correctly fetched --- src/imap/index.rs | 156 ++++++++++++++++++++++++++--------------------- src/imap/mail_view.rs | 4 +- src/imap/mailbox_view.rs | 23 ++++--- src/imap/search.rs | 12 ++-- 4 files changed, 112 insertions(+), 83 deletions(-) (limited to 'src/imap') diff --git a/src/imap/index.rs b/src/imap/index.rs index 8ec3cca..9adf8b2 100644 --- a/src/imap/index.rs +++ b/src/imap/index.rs @@ -1,93 +1,113 @@ use std::num::NonZeroU32; -use anyhow::{anyhow, bail, Result}; +use anyhow::{anyhow, Context, Result}; use imap_codec::imap_types::sequence::{self, SeqOrUid, Sequence, SequenceSet}; use crate::mail::uidindex::{ImapUid, UidIndex}; use crate::mail::unique_ident::UniqueIdent; -pub struct Index<'a>(pub &'a UidIndex); +pub struct Index<'a> { + pub imap_index: Vec>, + pub internal: &'a UidIndex, +} impl<'a> Index<'a> { - pub fn fetch( - self: &Index<'a>, - sequence_set: &SequenceSet, - by_uid: bool, - ) -> Result>> { - let mail_vec = self - .0 + pub fn new(internal: &'a UidIndex) -> Result { + let imap_index = internal .idx_by_uid .iter() - .map(|(uid, uuid)| (*uid, *uuid)) - .collect::>(); + .enumerate() + .map(|(i_enum, (&uid, &uuid))| { + let flags = internal.table.get(&uuid).ok_or(anyhow!("mail is missing from index"))?.1.as_ref(); + let i_int: u32 = (i_enum + 1).try_into()?; + let i: NonZeroU32 = i_int.try_into()?; - let mut mails = vec![]; + Ok(MailIndex { i, uid, uuid, flags }) + }) + .collect::>>()?; - if by_uid { - if mail_vec.is_empty() { - return Ok(vec![]); - } - let iter_strat = sequence::Strategy::Naive { - largest: mail_vec.last().unwrap().0, - }; + Ok(Self { imap_index, internal }) + } - let mut i = 0; - for uid in sequence_set.iter(iter_strat) { - while mail_vec.get(i).map(|mail| mail.0 < uid).unwrap_or(false) { - i += 1; - } - if let Some(mail) = mail_vec.get(i) { - if mail.0 == uid { - mails.push(MailIndex { - i: NonZeroU32::try_from(i as u32 + 1).unwrap(), - uid: mail.0, - uuid: mail.1, - flags: self - .0 - .table - .get(&mail.1) - .ok_or(anyhow!("mail is missing from index"))? - .1 - .as_ref(), - }); - } - } else { - break; - } - } - } else { - if mail_vec.is_empty() { - bail!("No such message (mailbox is empty)"); - } + pub fn last(&'a self) -> Option<&'a MailIndex<'a>> { + self.imap_index.last() + } - let iter_strat = sequence::Strategy::Naive { - largest: NonZeroU32::try_from((mail_vec.len()) as u32).unwrap(), + /// Fetch mail descriptors based on a sequence of UID + /// + /// Complexity analysis: + /// - Sort is O(n * log n) where n is the number of uid generated by the sequence + /// - Finding the starting point in the index O(log m) where m is the size of the mailbox + /// While n =< m, it's not clear if the difference is big or not. + /// + /// For now, the algorithm tries to be fast for small values of n, + /// as it is what is expected by clients. + /// + /// So we assume for our implementation that : n << m. + /// It's not true for full mailbox searches for example... + pub fn fetch_on_uid(&'a self, sequence_set: &SequenceSet) -> Vec<&'a MailIndex<'a>> { + if self.imap_index.is_empty() { + return vec![]; + } + let iter_strat = sequence::Strategy::Naive { + largest: self.last().expect("imap index is not empty").uid, + }; + let mut unroll_seq = sequence_set.iter(iter_strat).collect::>(); + unroll_seq.sort(); + + let start_seq = match unroll_seq.iter().next() { + Some(elem) => elem, + None => return vec![], + }; + + // Quickly jump to the right point in the mailbox vector O(log m) instead + // of iterating one by one O(m). Works only because both unroll_seq & imap_index are sorted per uid. + let mut imap_idx = { + let start_idx = self.imap_index.partition_point(|mail_idx| &mail_idx.uid < start_seq); + &self.imap_index[start_idx..] + }; + println!("win: {:?}", imap_idx.iter().map(|midx| midx.uid).collect::>()); + + let mut acc = vec![]; + for wanted_uid in unroll_seq.iter() { + // Slide the window forward as long as its first element is lower than our wanted uid. + let start_idx = match imap_idx.iter().position(|midx| &midx.uid >= wanted_uid) { + Some(v) => v, + None => break, }; + imap_idx = &imap_idx[start_idx..]; - for i in sequence_set.iter(iter_strat) { - if let Some(mail) = mail_vec.get(i.get() as usize - 1) { - mails.push(MailIndex { - i, - uid: mail.0, - uuid: mail.1, - flags: self - .0 - .table - .get(&mail.1) - .ok_or(anyhow!("mail is missing from index"))? - .1 - .as_ref(), - }); - } else { - bail!("No such mail: {}", i); - } + // If the beginning of our new window is the uid we want, we collect it + if &imap_idx[0].uid == wanted_uid { + acc.push(&imap_idx[0]); } } - Ok(mails) + acc + } + + pub fn fetch_on_id(&'a self, sequence_set: &SequenceSet) -> Result>> { + let iter_strat = sequence::Strategy::Naive { + largest: self.last().context("The mailbox is empty")?.uid, + }; + sequence_set + .iter(iter_strat) + .map(|wanted_id| self.imap_index.get((wanted_id.get() as usize) - 1).ok_or(anyhow!("Mail not found"))) + .collect::>>() + } + + pub fn fetch( + self: &'a Index<'a>, + sequence_set: &SequenceSet, + by_uid: bool, + ) -> Result>> { + match by_uid { + true => Ok(self.fetch_on_uid(sequence_set)), + _ => self.fetch_on_id(sequence_set), + } } } -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct MailIndex<'a> { pub i: NonZeroU32, pub uid: ImapUid, diff --git a/src/imap/mail_view.rs b/src/imap/mail_view.rs index 2c8723e..8dd68b5 100644 --- a/src/imap/mail_view.rs +++ b/src/imap/mail_view.rs @@ -26,13 +26,13 @@ use crate::imap::mime_view; use crate::imap::response::Body; pub struct MailView<'a> { - pub in_idx: MailIndex<'a>, + pub in_idx: &'a MailIndex<'a>, pub query_result: &'a QueryResult<'a>, pub content: FetchedMail<'a>, } impl<'a> MailView<'a> { - pub fn new(query_result: &'a QueryResult<'a>, in_idx: MailIndex<'a>) -> Result> { + pub fn new(query_result: &'a QueryResult<'a>, in_idx: &'a MailIndex<'a>) -> Result> { Ok(Self { in_idx, query_result, diff --git a/src/imap/mailbox_view.rs b/src/imap/mailbox_view.rs index 2841b7b..62ea6d2 100644 --- a/src/imap/mailbox_view.rs +++ b/src/imap/mailbox_view.rs @@ -146,7 +146,8 @@ impl MailboxView { let flags = flags.iter().map(|x| x.to_string()).collect::>(); - let mails = self.index().fetch(sequence_set, *is_uid_store)?; + let idx = self.index()?; + let mails = idx.fetch(sequence_set, *is_uid_store)?; for mi in mails.iter() { match kind { StoreType::Add => { @@ -189,7 +190,8 @@ impl MailboxView { to: Arc, is_uid_copy: &bool, ) -> Result<(ImapUidvalidity, Vec<(ImapUid, ImapUid)>)> { - let mails = self.index().fetch(sequence_set, *is_uid_copy)?; + let idx = self.index()?; + let mails = idx.fetch(sequence_set, *is_uid_copy)?; let mut new_uuids = vec![]; for mi in mails.iter() { @@ -216,7 +218,8 @@ impl MailboxView { to: Arc, is_uid_copy: &bool, ) -> Result<(ImapUidvalidity, Vec<(ImapUid, ImapUid)>, Vec>)> { - let mails = self.index().fetch(sequence_set, *is_uid_copy)?; + let idx = self.index()?; + let mails = idx.fetch(sequence_set, *is_uid_copy)?; for mi in mails.iter() { to.move_from(&self.0.mailbox, mi.uuid).await?; @@ -254,7 +257,8 @@ impl MailboxView { true => QueryScope::Full, _ => QueryScope::Partial, }; - let mail_idx_list = self.index().fetch(sequence_set, *is_uid_fetch)?; + let idx = self.index()?; + let mail_idx_list = idx.fetch(sequence_set, *is_uid_fetch)?; // [2/6] Fetch the emails let uuids = mail_idx_list @@ -316,7 +320,8 @@ impl MailboxView { let (seq_set, seq_type) = crit.to_sequence_set(); // 2. Get the selection - let selection = self.index().fetch(&seq_set, seq_type.is_uid())?; + let idx = self.index()?; + let selection = idx.fetch(&seq_set, seq_type.is_uid())?; // 3. Filter the selection based on the ID / UID / Flags let (kept_idx, to_fetch) = crit.filter_on_idx(&selection); @@ -341,8 +346,12 @@ impl MailboxView { } // ---- - fn index<'a>(&'a self) -> Index<'a> { - Index(&self.0.snapshot) + /// @FIXME index should be stored for longer than a single request + /// Instead they should be tied to the FrozenMailbox refresh + /// It's not trivial to refactor the code to do that, so we are doing + /// some useless computation for now... + fn index<'a>(&'a self) -> Result> { + Index::new(&self.0.snapshot) } /// Produce an OK [UIDVALIDITY _] message corresponding to `known_state` diff --git a/src/imap/search.rs b/src/imap/search.rs index 2b0b34b..22afd0c 100644 --- a/src/imap/search.rs +++ b/src/imap/search.rs @@ -117,13 +117,13 @@ impl<'a> Criteria<'a> { /// fetching some remote data pub fn filter_on_idx<'b>( &self, - midx_list: &[MailIndex<'b>], - ) -> (Vec>, Vec>) { + midx_list: &[&'b MailIndex<'b>], + ) -> (Vec<&'b MailIndex<'b>>, Vec<&'b MailIndex<'b>>) { let (p1, p2): (Vec<_>, Vec<_>) = midx_list .iter() .map(|x| (x, self.is_keep_on_idx(x))) .filter(|(_midx, decision)| decision.is_keep()) - .map(|(midx, decision)| ((*midx).clone(), decision)) + .map(|(midx, decision)| (*midx, decision)) .partition(|(_midx, decision)| matches!(decision, PartialDecision::Keep)); let to_keep = p1.into_iter().map(|(v, _)| v).collect(); @@ -133,13 +133,13 @@ impl<'a> Criteria<'a> { pub fn filter_on_query<'b>( &self, - midx_list: &[MailIndex<'b>], + midx_list: &[&'b MailIndex<'b>], query_result: &'b Vec>, - ) -> Result>> { + ) -> Result>> { Ok(midx_list .iter() .zip(query_result.iter()) - .map(|(midx, qr)| MailView::new(qr, midx.clone())) + .map(|(midx, qr)| MailView::new(qr, midx)) .collect::, _>>()? .into_iter() .filter(|mail_view| self.is_keep_on_query(mail_view)) -- cgit v1.2.3 From 72f9a221ed2318d8ca3452b6574c900be923d3d5 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Mon, 8 Jan 2024 11:14:34 +0100 Subject: Formatting & tests --- src/imap/index.rs | 40 +++++++++++++++++++++++++++++++--------- src/imap/mail_view.rs | 5 ++++- src/imap/mailbox_view.rs | 2 +- 3 files changed, 36 insertions(+), 11 deletions(-) (limited to 'src/imap') diff --git a/src/imap/index.rs b/src/imap/index.rs index 9adf8b2..3ca5562 100644 --- a/src/imap/index.rs +++ b/src/imap/index.rs @@ -6,7 +6,7 @@ use imap_codec::imap_types::sequence::{self, SeqOrUid, Sequence, SequenceSet}; use crate::mail::uidindex::{ImapUid, UidIndex}; use crate::mail::unique_ident::UniqueIdent; -pub struct Index<'a> { +pub struct Index<'a> { pub imap_index: Vec>, pub internal: &'a UidIndex, } @@ -17,19 +17,32 @@ impl<'a> Index<'a> { .iter() .enumerate() .map(|(i_enum, (&uid, &uuid))| { - let flags = internal.table.get(&uuid).ok_or(anyhow!("mail is missing from index"))?.1.as_ref(); + let flags = internal + .table + .get(&uuid) + .ok_or(anyhow!("mail is missing from index"))? + .1 + .as_ref(); let i_int: u32 = (i_enum + 1).try_into()?; let i: NonZeroU32 = i_int.try_into()?; - Ok(MailIndex { i, uid, uuid, flags }) + Ok(MailIndex { + i, + uid, + uuid, + flags, + }) }) .collect::>>()?; - Ok(Self { imap_index, internal }) + Ok(Self { + imap_index, + internal, + }) } pub fn last(&'a self) -> Option<&'a MailIndex<'a>> { - self.imap_index.last() + self.imap_index.last() } /// Fetch mail descriptors based on a sequence of UID @@ -62,10 +75,15 @@ impl<'a> Index<'a> { // Quickly jump to the right point in the mailbox vector O(log m) instead // of iterating one by one O(m). Works only because both unroll_seq & imap_index are sorted per uid. let mut imap_idx = { - let start_idx = self.imap_index.partition_point(|mail_idx| &mail_idx.uid < start_seq); + let start_idx = self + .imap_index + .partition_point(|mail_idx| &mail_idx.uid < start_seq); &self.imap_index[start_idx..] }; - println!("win: {:?}", imap_idx.iter().map(|midx| midx.uid).collect::>()); + println!( + "win: {:?}", + imap_idx.iter().map(|midx| midx.uid).collect::>() + ); let mut acc = vec![]; for wanted_uid in unroll_seq.iter() { @@ -91,10 +109,14 @@ impl<'a> Index<'a> { }; sequence_set .iter(iter_strat) - .map(|wanted_id| self.imap_index.get((wanted_id.get() as usize) - 1).ok_or(anyhow!("Mail not found"))) + .map(|wanted_id| { + self.imap_index + .get((wanted_id.get() as usize) - 1) + .ok_or(anyhow!("Mail not found")) + }) .collect::>>() } - + pub fn fetch( self: &'a Index<'a>, sequence_set: &SequenceSet, diff --git a/src/imap/mail_view.rs b/src/imap/mail_view.rs index 8dd68b5..7da21c4 100644 --- a/src/imap/mail_view.rs +++ b/src/imap/mail_view.rs @@ -32,7 +32,10 @@ pub struct MailView<'a> { } impl<'a> MailView<'a> { - pub fn new(query_result: &'a QueryResult<'a>, in_idx: &'a MailIndex<'a>) -> Result> { + pub fn new( + query_result: &'a QueryResult<'a>, + in_idx: &'a MailIndex<'a>, + ) -> Result> { Ok(Self { in_idx, query_result, diff --git a/src/imap/mailbox_view.rs b/src/imap/mailbox_view.rs index 62ea6d2..77fe7f7 100644 --- a/src/imap/mailbox_view.rs +++ b/src/imap/mailbox_view.rs @@ -526,7 +526,7 @@ mod tests { content: rfc822.to_vec(), }; - let mv = MailView::new(&qr, mail_in_idx)?; + let mv = MailView::new(&qr, &mail_in_idx)?; let (res_body, _seen) = mv.filter(&ap)?; let fattr = match res_body { -- cgit v1.2.3