diff options
author | Quentin <quentin@dufour.io> | 2024-01-08 10:39:26 +0000 |
---|---|---|
committer | Quentin <quentin@dufour.io> | 2024-01-08 10:39:26 +0000 |
commit | d7788e29a8a64550e9b274001ff3fb9a7bf3473b (patch) | |
tree | e43a11753472f1917ce4aa6ddba24ae3a513bd50 /src/imap | |
parent | 152d5b7604337fe19a7aea7fc37b3d4615ca7393 (diff) | |
parent | 42a54b2c500294c594f3efdd25db28c18f5ac238 (diff) | |
download | aerogramme-d7788e29a8a64550e9b274001ff3fb9a7bf3473b.tar.gz aerogramme-d7788e29a8a64550e9b274001ff3fb9a7bf3473b.zip |
Merge pull request 'Implement search' (#61) from feat/search into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/aerogramme/pulls/61
Diffstat (limited to 'src/imap')
-rw-r--r-- | src/imap/capability.rs | 1 | ||||
-rw-r--r-- | src/imap/command/examined.rs | 12 | ||||
-rw-r--r-- | src/imap/imf_view.rs | 126 | ||||
-rw-r--r-- | src/imap/index.rs | 222 | ||||
-rw-r--r-- | src/imap/mail_view.rs | 160 | ||||
-rw-r--r-- | src/imap/mailbox_view.rs | 39 | ||||
-rw-r--r-- | src/imap/mime_view.rs | 33 | ||||
-rw-r--r-- | src/imap/search.rs | 360 |
8 files changed, 731 insertions, 222 deletions
diff --git a/src/imap/capability.rs b/src/imap/capability.rs index 631c3e2..feadb6b 100644 --- a/src/imap/capability.rs +++ b/src/imap/capability.rs @@ -22,6 +22,7 @@ impl Default for ServerCapability { fn default() -> Self { Self(HashSet::from([ Capability::Imap4Rev1, + Capability::Enable, Capability::Move, Capability::LiteralPlus, capability_unselect(), diff --git a/src/imap/command/examined.rs b/src/imap/command/examined.rs index ec16973..3dd11e2 100644 --- a/src/imap/command/examined.rs +++ b/src/imap/command/examined.rs @@ -111,15 +111,17 @@ impl<'a> ExaminedContext<'a> { pub async fn search( self, - _charset: &Option<Charset<'a>>, - _criteria: &SearchKey<'a>, - _uid: &bool, + charset: &Option<Charset<'a>>, + criteria: &SearchKey<'a>, + uid: &bool, ) -> Result<(Response<'static>, flow::Transition)> { + let found = self.mailbox.search(charset, criteria, *uid).await?; Ok(( Response::build() .to_req(self.req) - .message("Not implemented") - .bad()?, + .set_body(found) + .message("SEARCH completed") + .ok()?, flow::Transition::None, )) } diff --git a/src/imap/imf_view.rs b/src/imap/imf_view.rs index 4297769..a4ca2e8 100644 --- a/src/imap/imf_view.rs +++ b/src/imap/imf_view.rs @@ -1,68 +1,80 @@ +use anyhow::{anyhow, Result}; +use chrono::naive::NaiveDate; + use imap_codec::imap_types::core::{IString, NString}; use imap_codec::imap_types::envelope::{Address, Envelope}; use eml_codec::imf; -/// Envelope rules are defined in RFC 3501, section 7.4.2 -/// https://datatracker.ietf.org/doc/html/rfc3501#section-7.4.2 -/// -/// Some important notes: -/// -/// If the Sender or Reply-To lines are absent in the [RFC-2822] -/// header, or are present but empty, the server sets the -/// corresponding member of the envelope to be the same value as -/// the from member (the client is not expected to know to do -/// this). Note: [RFC-2822] requires that all messages have a valid -/// From header. Therefore, the from, sender, and reply-to -/// members in the envelope can not be NIL. -/// -/// If the Date, Subject, In-Reply-To, and Message-ID header lines -/// are absent in the [RFC-2822] header, the corresponding member -/// of the envelope is NIL; if these header lines are present but -/// empty the corresponding member of the envelope is the empty -/// string. +pub struct ImfView<'a>(pub &'a imf::Imf<'a>); -//@FIXME return an error if the envelope is invalid instead of panicking -//@FIXME some fields must be defaulted if there are not set. -pub fn message_envelope(msg: &imf::Imf) -> Envelope<'static> { - let from = msg.from.iter().map(convert_mbx).collect::<Vec<_>>(); +impl<'a> ImfView<'a> { + pub fn naive_date(&self) -> Result<NaiveDate> { + Ok(self.0.date.ok_or(anyhow!("date is not set"))?.date_naive()) + } - Envelope { - date: NString( - msg.date - .as_ref() - .map(|d| IString::try_from(d.to_rfc3339()).unwrap()), - ), - subject: NString( - msg.subject - .as_ref() - .map(|d| IString::try_from(d.to_string()).unwrap()), - ), - sender: msg - .sender - .as_ref() - .map(|v| vec![convert_mbx(v)]) - .unwrap_or(from.clone()), - reply_to: if msg.reply_to.is_empty() { - from.clone() - } else { - convert_addresses(&msg.reply_to) - }, - from, - to: convert_addresses(&msg.to), - cc: convert_addresses(&msg.cc), - bcc: convert_addresses(&msg.bcc), - in_reply_to: NString( - msg.in_reply_to - .iter() - .next() - .map(|d| IString::try_from(d.to_string()).unwrap()), - ), - message_id: NString( - msg.msg_id + /// Envelope rules are defined in RFC 3501, section 7.4.2 + /// https://datatracker.ietf.org/doc/html/rfc3501#section-7.4.2 + /// + /// Some important notes: + /// + /// If the Sender or Reply-To lines are absent in the [RFC-2822] + /// header, or are present but empty, the server sets the + /// corresponding member of the envelope to be the same value as + /// the from member (the client is not expected to know to do + /// this). Note: [RFC-2822] requires that all messages have a valid + /// From header. Therefore, the from, sender, and reply-to + /// members in the envelope can not be NIL. + /// + /// If the Date, Subject, In-Reply-To, and Message-ID header lines + /// are absent in the [RFC-2822] header, the corresponding member + /// of the envelope is NIL; if these header lines are present but + /// empty the corresponding member of the envelope is the empty + /// string. + + //@FIXME return an error if the envelope is invalid instead of panicking + //@FIXME some fields must be defaulted if there are not set. + pub fn message_envelope(&self) -> Envelope<'static> { + let msg = self.0; + let from = msg.from.iter().map(convert_mbx).collect::<Vec<_>>(); + + Envelope { + date: NString( + msg.date + .as_ref() + .map(|d| IString::try_from(d.to_rfc3339()).unwrap()), + ), + subject: NString( + msg.subject + .as_ref() + .map(|d| IString::try_from(d.to_string()).unwrap()), + ), + sender: msg + .sender .as_ref() - .map(|d| IString::try_from(d.to_string()).unwrap()), - ), + .map(|v| vec![convert_mbx(v)]) + .unwrap_or(from.clone()), + reply_to: if msg.reply_to.is_empty() { + from.clone() + } else { + convert_addresses(&msg.reply_to) + }, + from, + to: convert_addresses(&msg.to), + cc: convert_addresses(&msg.cc), + bcc: convert_addresses(&msg.bcc), + in_reply_to: NString( + msg.in_reply_to + .iter() + .next() + .map(|d| IString::try_from(d.to_string()).unwrap()), + ), + message_id: NString( + msg.msg_id + .as_ref() + .map(|d| IString::try_from(d.to_string()).unwrap()), + ), + } } } diff --git a/src/imap/index.rs b/src/imap/index.rs index 01dd2ef..3ca5562 100644 --- a/src/imap/index.rs +++ b/src/imap/index.rs @@ -1,95 +1,181 @@ use std::num::NonZeroU32; -use anyhow::{anyhow, bail, Result}; -use imap_codec::imap_types::sequence::{self, SequenceSet}; +use anyhow::{anyhow, Context, Result}; +use imap_codec::imap_types::sequence::{self, SeqOrUid, Sequence, SequenceSet}; use crate::mail::uidindex::{ImapUid, UidIndex}; use crate::mail::unique_ident::UniqueIdent; -pub struct Index<'a>(pub &'a UidIndex); +pub struct Index<'a> { + pub imap_index: Vec<MailIndex<'a>>, + pub internal: &'a UidIndex, +} impl<'a> Index<'a> { - pub fn fetch( - self: &Index<'a>, - sequence_set: &SequenceSet, - by_uid: bool, - ) -> Result<Vec<MailIndex<'a>>> { - let mail_vec = self - .0 + pub fn new(internal: &'a UidIndex) -> Result<Self> { + let imap_index = internal .idx_by_uid .iter() - .map(|(uid, uuid)| (*uid, *uuid)) - .collect::<Vec<_>>(); + .enumerate() + .map(|(i_enum, (&uid, &uuid))| { + let flags = internal + .table + .get(&uuid) + .ok_or(anyhow!("mail is missing from index"))? + .1 + .as_ref(); + let i_int: u32 = (i_enum + 1).try_into()?; + let i: NonZeroU32 = i_int.try_into()?; - let mut mails = vec![]; + Ok(MailIndex { + i, + uid, + uuid, + flags, + }) + }) + .collect::<Result<Vec<_>>>()?; - if by_uid { - if mail_vec.is_empty() { - return Ok(vec![]); - } - let iter_strat = sequence::Strategy::Naive { - largest: mail_vec.last().unwrap().0, - }; + Ok(Self { + imap_index, + internal, + }) + } - let mut i = 0; - for uid in sequence_set.iter(iter_strat) { - while mail_vec.get(i).map(|mail| mail.0 < uid).unwrap_or(false) { - i += 1; - } - if let Some(mail) = mail_vec.get(i) { - if mail.0 == uid { - mails.push(MailIndex { - i: NonZeroU32::try_from(i as u32 + 1).unwrap(), - uid: mail.0, - uuid: mail.1, - flags: self - .0 - .table - .get(&mail.1) - .ok_or(anyhow!("mail is missing from index"))? - .1 - .as_ref(), - }); - } - } else { - break; - } - } - } else { - if mail_vec.is_empty() { - bail!("No such message (mailbox is empty)"); - } + pub fn last(&'a self) -> Option<&'a MailIndex<'a>> { + self.imap_index.last() + } + + /// Fetch mail descriptors based on a sequence of UID + /// + /// Complexity analysis: + /// - Sort is O(n * log n) where n is the number of uid generated by the sequence + /// - Finding the starting point in the index O(log m) where m is the size of the mailbox + /// While n =< m, it's not clear if the difference is big or not. + /// + /// For now, the algorithm tries to be fast for small values of n, + /// as it is what is expected by clients. + /// + /// So we assume for our implementation that : n << m. + /// It's not true for full mailbox searches for example... + pub fn fetch_on_uid(&'a self, sequence_set: &SequenceSet) -> Vec<&'a MailIndex<'a>> { + if self.imap_index.is_empty() { + return vec![]; + } + let iter_strat = sequence::Strategy::Naive { + largest: self.last().expect("imap index is not empty").uid, + }; + let mut unroll_seq = sequence_set.iter(iter_strat).collect::<Vec<_>>(); + unroll_seq.sort(); + + let start_seq = match unroll_seq.iter().next() { + Some(elem) => elem, + None => return vec![], + }; + + // Quickly jump to the right point in the mailbox vector O(log m) instead + // of iterating one by one O(m). Works only because both unroll_seq & imap_index are sorted per uid. + let mut imap_idx = { + let start_idx = self + .imap_index + .partition_point(|mail_idx| &mail_idx.uid < start_seq); + &self.imap_index[start_idx..] + }; + println!( + "win: {:?}", + imap_idx.iter().map(|midx| midx.uid).collect::<Vec<_>>() + ); - let iter_strat = sequence::Strategy::Naive { - largest: NonZeroU32::try_from((mail_vec.len()) as u32).unwrap(), + let mut acc = vec![]; + for wanted_uid in unroll_seq.iter() { + // Slide the window forward as long as its first element is lower than our wanted uid. + let start_idx = match imap_idx.iter().position(|midx| &midx.uid >= wanted_uid) { + Some(v) => v, + None => break, }; + imap_idx = &imap_idx[start_idx..]; - for i in sequence_set.iter(iter_strat) { - if let Some(mail) = mail_vec.get(i.get() as usize - 1) { - mails.push(MailIndex { - i, - uid: mail.0, - uuid: mail.1, - flags: self - .0 - .table - .get(&mail.1) - .ok_or(anyhow!("mail is missing from index"))? - .1 - .as_ref(), - }); - } else { - bail!("No such mail: {}", i); - } + // If the beginning of our new window is the uid we want, we collect it + if &imap_idx[0].uid == wanted_uid { + acc.push(&imap_idx[0]); } } - Ok(mails) + acc + } + + pub fn fetch_on_id(&'a self, sequence_set: &SequenceSet) -> Result<Vec<&'a MailIndex<'a>>> { + let iter_strat = sequence::Strategy::Naive { + largest: self.last().context("The mailbox is empty")?.uid, + }; + sequence_set + .iter(iter_strat) + .map(|wanted_id| { + self.imap_index + .get((wanted_id.get() as usize) - 1) + .ok_or(anyhow!("Mail not found")) + }) + .collect::<Result<Vec<_>>>() + } + + pub fn fetch( + self: &'a Index<'a>, + sequence_set: &SequenceSet, + by_uid: bool, + ) -> Result<Vec<&'a MailIndex<'a>>> { + match by_uid { + true => Ok(self.fetch_on_uid(sequence_set)), + _ => self.fetch_on_id(sequence_set), + } } } +#[derive(Clone, Debug)] pub struct MailIndex<'a> { pub i: NonZeroU32, pub uid: ImapUid, pub uuid: UniqueIdent, pub flags: &'a Vec<String>, } + +impl<'a> MailIndex<'a> { + // The following functions are used to implement the SEARCH command + pub fn is_in_sequence_i(&self, seq: &Sequence) -> bool { + match seq { + Sequence::Single(SeqOrUid::Asterisk) => true, + Sequence::Single(SeqOrUid::Value(target)) => target == &self.i, + Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x)) + | Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.i, + Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => { + if x1 < x2 { + x1 <= &self.i && &self.i <= x2 + } else { + x1 >= &self.i && &self.i >= x2 + } + } + Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true, + } + } + + pub fn is_in_sequence_uid(&self, seq: &Sequence) -> bool { + match seq { + Sequence::Single(SeqOrUid::Asterisk) => true, + Sequence::Single(SeqOrUid::Value(target)) => target == &self.uid, + Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x)) + | Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.uid, + Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => { + if x1 < x2 { + x1 <= &self.uid && &self.uid <= x2 + } else { + x1 >= &self.uid && &self.uid >= x2 + } + } + Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true, + } + } + + pub fn is_flag_set(&self, flag: &str) -> bool { + self.flags + .iter() + .any(|candidate| candidate.as_str() == flag) + } +} diff --git a/src/imap/mail_view.rs b/src/imap/mail_view.rs index de9bfe3..7da21c4 100644 --- a/src/imap/mail_view.rs +++ b/src/imap/mail_view.rs @@ -1,7 +1,7 @@ use std::num::NonZeroU32; use anyhow::{anyhow, bail, Result}; -use chrono::{Offset, TimeZone, Utc}; +use chrono::{naive::NaiveDate, DateTime as ChronoDateTime, Local, Offset, TimeZone, Utc}; use imap_codec::imap_types::core::NString; use imap_codec::imap_types::datetime::DateTime; @@ -20,19 +20,22 @@ use crate::mail::query::QueryResult; use crate::imap::attributes::AttributesProxy; use crate::imap::flags; -use crate::imap::imf_view::message_envelope; +use crate::imap::imf_view::ImfView; use crate::imap::index::MailIndex; use crate::imap::mime_view; use crate::imap::response::Body; pub struct MailView<'a> { - pub in_idx: MailIndex<'a>, + pub in_idx: &'a MailIndex<'a>, pub query_result: &'a QueryResult<'a>, pub content: FetchedMail<'a>, } impl<'a> MailView<'a> { - pub fn new(query_result: &'a QueryResult<'a>, in_idx: MailIndex<'a>) -> Result<MailView<'a>> { + pub fn new( + query_result: &'a QueryResult<'a>, + in_idx: &'a MailIndex<'a>, + ) -> Result<MailView<'a>> { Ok(Self { in_idx, query_result, @@ -40,18 +43,88 @@ impl<'a> MailView<'a> { QueryResult::FullResult { content, .. } => { let (_, parsed) = eml_codec::parse_message(&content).or(Err(anyhow!("Invalid mail body")))?; - FetchedMail::new_from_message(parsed) + FetchedMail::full_from_message(parsed) } QueryResult::PartialResult { metadata, .. } => { - let (_, parsed) = eml_codec::parse_imf(&metadata.headers) + let (_, parsed) = eml_codec::parse_message(&metadata.headers) .or(Err(anyhow!("unable to parse email headers")))?; - FetchedMail::Partial(parsed) + FetchedMail::partial_from_message(parsed) } QueryResult::IndexResult { .. } => FetchedMail::IndexOnly, }, }) } + pub fn imf(&self) -> Option<ImfView> { + self.content.as_imf().map(ImfView) + } + + pub fn selected_mime(&'a self) -> Option<mime_view::SelectedMime<'a>> { + self.content.as_anypart().ok().map(mime_view::SelectedMime) + } + + pub fn filter(&self, ap: &AttributesProxy) -> Result<(Body<'static>, SeenFlag)> { + let mut seen = SeenFlag::DoNothing; + let res_attrs = ap + .attrs + .iter() + .map(|attr| match attr { + MessageDataItemName::Uid => Ok(self.uid()), + MessageDataItemName::Flags => Ok(self.flags()), + MessageDataItemName::Rfc822Size => self.rfc_822_size(), + MessageDataItemName::Rfc822Header => self.rfc_822_header(), + MessageDataItemName::Rfc822Text => self.rfc_822_text(), + MessageDataItemName::Rfc822 => self.rfc822(), + MessageDataItemName::Envelope => Ok(self.envelope()), + MessageDataItemName::Body => self.body(), + MessageDataItemName::BodyStructure => self.body_structure(), + MessageDataItemName::BodyExt { + section, + partial, + peek, + } => { + let (body, has_seen) = self.body_ext(section, partial, peek)?; + seen = has_seen; + Ok(body) + } + MessageDataItemName::InternalDate => self.internal_date(), + }) + .collect::<Result<Vec<_>, _>>()?; + + Ok(( + Body::Data(Data::Fetch { + seq: self.in_idx.i, + items: res_attrs.try_into()?, + }), + seen, + )) + } + + pub fn stored_naive_date(&self) -> Result<NaiveDate> { + let mail_meta = self.query_result.metadata().expect("metadata were fetched"); + let mail_ts: i64 = mail_meta.internaldate.try_into()?; + let msg_date: ChronoDateTime<Local> = ChronoDateTime::from_timestamp(mail_ts, 0) + .ok_or(anyhow!("unable to parse timestamp"))? + .with_timezone(&Local); + + Ok(msg_date.date_naive()) + } + + pub fn is_header_contains_pattern(&self, hdr: &[u8], pattern: &[u8]) -> bool { + let mime = match self.selected_mime() { + None => return false, + Some(x) => x, + }; + + let val = match mime.header_value(hdr) { + None => return false, + Some(x) => x, + }; + + val.windows(pattern.len()).any(|win| win == pattern) + } + + // Private function, mainly for filter! fn uid(&self) -> MessageDataItem<'static> { MessageDataItem::Uid(self.in_idx.uid.clone()) } @@ -87,28 +160,32 @@ impl<'a> MailView<'a> { } fn rfc_822_text(&self) -> Result<MessageDataItem<'static>> { - let txt: NString = self.content.as_full()?.raw_body.to_vec().try_into()?; + let txt: NString = self.content.as_msg()?.raw_body.to_vec().try_into()?; Ok(MessageDataItem::Rfc822Text(txt)) } fn rfc822(&self) -> Result<MessageDataItem<'static>> { - let full: NString = self.content.as_full()?.raw_part.to_vec().try_into()?; + let full: NString = self.content.as_msg()?.raw_part.to_vec().try_into()?; Ok(MessageDataItem::Rfc822(full)) } fn envelope(&self) -> MessageDataItem<'static> { - MessageDataItem::Envelope(message_envelope(self.content.imf().clone())) + MessageDataItem::Envelope( + self.imf() + .expect("an imf object is derivable from fetchedmail") + .message_envelope(), + ) } fn body(&self) -> Result<MessageDataItem<'static>> { Ok(MessageDataItem::Body(mime_view::bodystructure( - self.content.as_full()?.child.as_ref(), + self.content.as_msg()?.child.as_ref(), )?)) } fn body_structure(&self) -> Result<MessageDataItem<'static>> { Ok(MessageDataItem::Body(mime_view::bodystructure( - self.content.as_full()?.child.as_ref(), + self.content.as_msg()?.child.as_ref(), )?)) } @@ -167,43 +244,6 @@ impl<'a> MailView<'a> { .ok_or(anyhow!("Unable to parse internal date"))?; Ok(MessageDataItem::InternalDate(DateTime::unvalidated(dt))) } - - pub fn filter(&self, ap: &AttributesProxy) -> Result<(Body<'static>, SeenFlag)> { - let mut seen = SeenFlag::DoNothing; - let res_attrs = ap - .attrs - .iter() - .map(|attr| match attr { - MessageDataItemName::Uid => Ok(self.uid()), - MessageDataItemName::Flags => Ok(self.flags()), - MessageDataItemName::Rfc822Size => self.rfc_822_size(), - MessageDataItemName::Rfc822Header => self.rfc_822_header(), - MessageDataItemName::Rfc822Text => self.rfc_822_text(), - MessageDataItemName::Rfc822 => self.rfc822(), - MessageDataItemName::Envelope => Ok(self.envelope()), - MessageDataItemName::Body => self.body(), - MessageDataItemName::BodyStructure => self.body_structure(), - MessageDataItemName::BodyExt { - section, - partial, - peek, - } => { - let (body, has_seen) = self.body_ext(section, partial, peek)?; - seen = has_seen; - Ok(body) - } - MessageDataItemName::InternalDate => self.internal_date(), - }) - .collect::<Result<Vec<_>, _>>()?; - - Ok(( - Body::Data(Data::Fetch { - seq: self.in_idx.i, - items: res_attrs.try_into()?, - }), - seen, - )) - } } pub enum SeenFlag { @@ -215,33 +255,39 @@ pub enum SeenFlag { pub enum FetchedMail<'a> { IndexOnly, - Partial(imf::Imf<'a>), + Partial(AnyPart<'a>), Full(AnyPart<'a>), } impl<'a> FetchedMail<'a> { - pub fn new_from_message(msg: Message<'a>) -> Self { + pub fn full_from_message(msg: Message<'a>) -> Self { Self::Full(AnyPart::Msg(msg)) } - fn as_anypart(&self) -> Result<&AnyPart<'a>> { + pub fn partial_from_message(msg: Message<'a>) -> Self { + Self::Partial(AnyPart::Msg(msg)) + } + + pub fn as_anypart(&self) -> Result<&AnyPart<'a>> { match self { FetchedMail::Full(x) => Ok(&x), + FetchedMail::Partial(x) => Ok(&x), _ => bail!("The full message must be fetched, not only its headers"), } } - fn as_full(&self) -> Result<&Message<'a>> { + pub fn as_msg(&self) -> Result<&Message<'a>> { match self { FetchedMail::Full(AnyPart::Msg(x)) => Ok(&x), + FetchedMail::Partial(AnyPart::Msg(x)) => Ok(&x), _ => bail!("The full message must be fetched, not only its headers AND it must be an AnyPart::Msg."), } } - fn imf(&self) -> &imf::Imf<'a> { + pub fn as_imf(&self) -> Option<&imf::Imf<'a>> { match self { - FetchedMail::Full(AnyPart::Msg(x)) => &x.imf, - FetchedMail::Partial(x) => &x, - _ => panic!("Can't contain AnyPart that is not a message"), + FetchedMail::Full(AnyPart::Msg(x)) => Some(&x.imf), + FetchedMail::Partial(AnyPart::Msg(x)) => Some(&x.imf), + _ => None, } } } diff --git a/src/imap/mailbox_view.rs b/src/imap/mailbox_view.rs index e4ffdcd..77fe7f7 100644 --- a/src/imap/mailbox_view.rs +++ b/src/imap/mailbox_view.rs @@ -146,7 +146,8 @@ impl MailboxView { let flags = flags.iter().map(|x| x.to_string()).collect::<Vec<_>>(); - let mails = self.index().fetch(sequence_set, *is_uid_store)?; + let idx = self.index()?; + let mails = idx.fetch(sequence_set, *is_uid_store)?; for mi in mails.iter() { match kind { StoreType::Add => { @@ -189,7 +190,8 @@ impl MailboxView { to: Arc<Mailbox>, is_uid_copy: &bool, ) -> Result<(ImapUidvalidity, Vec<(ImapUid, ImapUid)>)> { - let mails = self.index().fetch(sequence_set, *is_uid_copy)?; + let idx = self.index()?; + let mails = idx.fetch(sequence_set, *is_uid_copy)?; let mut new_uuids = vec![]; for mi in mails.iter() { @@ -216,7 +218,8 @@ impl MailboxView { to: Arc<Mailbox>, is_uid_copy: &bool, ) -> Result<(ImapUidvalidity, Vec<(ImapUid, ImapUid)>, Vec<Body<'static>>)> { - let mails = self.index().fetch(sequence_set, *is_uid_copy)?; + let idx = self.index()?; + let mails = idx.fetch(sequence_set, *is_uid_copy)?; for mi in mails.iter() { to.move_from(&self.0.mailbox, mi.uuid).await?; @@ -254,7 +257,8 @@ impl MailboxView { true => QueryScope::Full, _ => QueryScope::Partial, }; - let mail_idx_list = self.index().fetch(sequence_set, *is_uid_fetch)?; + let idx = self.index()?; + let mail_idx_list = idx.fetch(sequence_set, *is_uid_fetch)?; // [2/6] Fetch the emails let uuids = mail_idx_list @@ -316,29 +320,38 @@ impl MailboxView { let (seq_set, seq_type) = crit.to_sequence_set(); // 2. Get the selection - let selection = self.index().fetch(&seq_set, seq_type.is_uid())?; + let idx = self.index()?; + let selection = idx.fetch(&seq_set, seq_type.is_uid())?; // 3. Filter the selection based on the ID / UID / Flags + let (kept_idx, to_fetch) = crit.filter_on_idx(&selection); - // 4. If needed, filter the selection based on the metadata - let _need_meta = crit.need_meta(); + // 4. Fetch additional info about the emails + let query_scope = crit.query_scope(); + let uuids = to_fetch.iter().map(|midx| midx.uuid).collect::<Vec<_>>(); + let query_result = self.0.query(&uuids, query_scope).fetch().await?; // 5. If needed, filter the selection based on the body - let _need_body = crit.need_body(); + let kept_query = crit.filter_on_query(&to_fetch, &query_result)?; // 6. Format the result according to the client's taste: // either return UID or ID. + let final_selection = kept_idx.into_iter().chain(kept_query.into_iter()); let selection_fmt = match uid { - true => selection.into_iter().map(|in_idx| in_idx.uid).collect(), - _ => selection.into_iter().map(|in_idx| in_idx.i).collect(), + true => final_selection.map(|in_idx| in_idx.uid).collect(), + _ => final_selection.map(|in_idx| in_idx.i).collect(), }; Ok(vec![Body::Data(Data::Search(selection_fmt))]) } // ---- - fn index<'a>(&'a self) -> Index<'a> { - Index(&self.0.snapshot) + /// @FIXME index should be stored for longer than a single request + /// Instead they should be tied to the FrozenMailbox refresh + /// It's not trivial to refactor the code to do that, so we are doing + /// some useless computation for now... + fn index<'a>(&'a self) -> Result<Index<'a>> { + Index::new(&self.0.snapshot) } /// Produce an OK [UIDVALIDITY _] message corresponding to `known_state` @@ -513,7 +526,7 @@ mod tests { content: rfc822.to_vec(), }; - let mv = MailView::new(&qr, mail_in_idx)?; + let mv = MailView::new(&qr, &mail_in_idx)?; let (res_body, _seen) = mv.filter(&ap)?; let fattr = match res_body { diff --git a/src/imap/mime_view.rs b/src/imap/mime_view.rs index 1f36c47..cf6c751 100644 --- a/src/imap/mime_view.rs +++ b/src/imap/mime_view.rs @@ -12,7 +12,7 @@ use eml_codec::{ header, mime, mime::r#type::Deductible, part::composite, part::discrete, part::AnyPart, }; -use crate::imap::imf_view::message_envelope; +use crate::imap::imf_view::ImfView; pub enum BodySection<'a> { Full(Cow<'a, [u8]>), @@ -164,8 +164,23 @@ impl<'a> SubsettedSection<'a> { /// Used for current MIME inspection /// /// See NodeMime for recursive logic -struct SelectedMime<'a>(&'a AnyPart<'a>); +pub struct SelectedMime<'a>(pub &'a AnyPart<'a>); impl<'a> SelectedMime<'a> { + pub fn header_value(&'a self, to_match_ext: &[u8]) -> Option<&'a [u8]> { + let to_match = to_match_ext.to_ascii_lowercase(); + + self.eml_mime() + .kv + .iter() + .filter_map(|field| match field { + header::Field::Good(header::Kv2(k, v)) => Some((k, v)), + _ => None, + }) + .find(|(k, _)| k.to_ascii_lowercase() == to_match) + .map(|(_, v)| v) + .copied() + } + /// The subsetted fetch section basically tells us the /// extraction logic to apply on our selected MIME. /// This function acts as a router for these logic. @@ -200,6 +215,13 @@ impl<'a> SelectedMime<'a> { Ok(ExtractedFull(bytes.to_vec().into())) } + fn eml_mime(&self) -> &eml_codec::mime::NaiveMIME<'_> { + match &self.0 { + AnyPart::Msg(msg) => msg.child.mime(), + other => other.mime(), + } + } + /// The [...] HEADER.FIELDS, and HEADER.FIELDS.NOT part /// specifiers refer to the [RFC-2822] header of the message or of /// an encapsulated [MIME-IMT] MESSAGE/RFC822 message. @@ -231,10 +253,7 @@ impl<'a> SelectedMime<'a> { .collect::<HashSet<_>>(); // Extract MIME headers - let mime = match &self.0 { - AnyPart::Msg(msg) => msg.child.mime(), - other => other.mime(), - }; + let mime = self.eml_mime(); // Filter our MIME headers based on the field index // 1. Keep only the correctly formatted headers @@ -347,7 +366,7 @@ impl<'a> NodeMsg<'a> { body: FetchBody { basic, specific: SpecificFields::Message { - envelope: Box::new(message_envelope(&self.1.imf)), + envelope: Box::new(ImfView(&self.1.imf).message_envelope()), body_structure: Box::new(NodeMime(&self.1.child).structure()?), number_of_lines: nol(self.1.raw_part), }, diff --git a/src/imap/search.rs b/src/imap/search.rs index b3c6b05..22afd0c 100644 --- a/src/imap/search.rs +++ b/src/imap/search.rs @@ -1,7 +1,13 @@ +use std::num::NonZeroU32; + +use anyhow::Result; use imap_codec::imap_types::core::NonEmptyVec; use imap_codec::imap_types::search::SearchKey; use imap_codec::imap_types::sequence::{SeqOrUid, Sequence, SequenceSet}; -use std::num::NonZeroU32; + +use crate::imap::index::MailIndex; +use crate::imap::mail_view::MailView; +use crate::mail::query::{QueryResult, QueryScope}; pub enum SeqType { Undefined, @@ -54,6 +60,10 @@ impl<'a> Criteria<'a> { tracing::debug!( "using AND in a search request is slow: no intersection is performed" ); + // As we perform no intersection, we don't care if we mix uid or id. + // We only keep the smallest range, being it ID or UID, depending of + // which one has the less items. This is an approximation as UID ranges + // can have holes while ID ones can't. search_list .as_ref() .iter() @@ -72,35 +82,227 @@ impl<'a> Criteria<'a> { /// Not really clever as we can have cases where we filter out /// the email before needing to inspect its meta. /// But for now we are seeking the most basic/stupid algorithm. - pub fn need_meta(&self) -> bool { + pub fn query_scope(&self) -> QueryScope { use SearchKey::*; match self.0 { + // Combinators + And(and_list) => and_list + .as_ref() + .iter() + .fold(QueryScope::Index, |prev, sk| { + prev.union(&Criteria(sk).query_scope()) + }), + Not(inner) => Criteria(inner).query_scope(), + Or(left, right) => Criteria(left) + .query_scope() + .union(&Criteria(right).query_scope()), + All => QueryScope::Index, + // IMF Headers Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_) - | Subject(_) | To(_) => true, + | Subject(_) | To(_) => QueryScope::Partial, // Internal Date is also stored in MailMeta - Before(_) | On(_) | Since(_) => true, + Before(_) | On(_) | Since(_) => QueryScope::Partial, // Message size is also stored in MailMeta - Larger(_) | Smaller(_) => true, - And(and_list) => and_list.as_ref().iter().any(|sk| Criteria(sk).need_meta()), - Not(inner) => Criteria(inner).need_meta(), - Or(left, right) => Criteria(left).need_meta() || Criteria(right).need_meta(), - _ => false, + Larger(_) | Smaller(_) => QueryScope::Partial, + // Text and Body require that we fetch the full content! + Text(_) | Body(_) => QueryScope::Full, + + _ => QueryScope::Index, + } + } + + /// Returns emails that we now for sure we want to keep + /// but also a second list of emails we need to investigate further by + /// fetching some remote data + pub fn filter_on_idx<'b>( + &self, + midx_list: &[&'b MailIndex<'b>], + ) -> (Vec<&'b MailIndex<'b>>, Vec<&'b MailIndex<'b>>) { + let (p1, p2): (Vec<_>, Vec<_>) = midx_list + .iter() + .map(|x| (x, self.is_keep_on_idx(x))) + .filter(|(_midx, decision)| decision.is_keep()) + .map(|(midx, decision)| (*midx, decision)) + .partition(|(_midx, decision)| matches!(decision, PartialDecision::Keep)); + + let to_keep = p1.into_iter().map(|(v, _)| v).collect(); + let to_fetch = p2.into_iter().map(|(v, _)| v).collect(); + (to_keep, to_fetch) + } + + pub fn filter_on_query<'b>( + &self, + midx_list: &[&'b MailIndex<'b>], + query_result: &'b Vec<QueryResult<'b>>, + ) -> Result<Vec<&'b MailIndex<'b>>> { + Ok(midx_list + .iter() + .zip(query_result.iter()) + .map(|(midx, qr)| MailView::new(qr, midx)) + .collect::<Result<Vec<_>, _>>()? + .into_iter() + .filter(|mail_view| self.is_keep_on_query(mail_view)) + .map(|mail_view| mail_view.in_idx) + .collect()) + } + + // ---- + + /// Here we are doing a partial filtering: we do not have access + /// to the headers or to the body, so every time we encounter a rule + /// based on them, we need to keep it. + /// + /// @TODO Could be optimized on a per-email basis by also returning the QueryScope + /// when more information is needed! + fn is_keep_on_idx(&self, midx: &MailIndex) -> PartialDecision { + use SearchKey::*; + match self.0 { + // Combinator logic + And(expr_list) => expr_list + .as_ref() + .iter() + .fold(PartialDecision::Keep, |acc, cur| { + acc.and(&Criteria(cur).is_keep_on_idx(midx)) + }), + Or(left, right) => { + let left_decision = Criteria(left).is_keep_on_idx(midx); + let right_decision = Criteria(right).is_keep_on_idx(midx); + left_decision.or(&right_decision) + } + Not(expr) => Criteria(expr).is_keep_on_idx(midx).not(), + All => PartialDecision::Keep, + + // Sequence logic + maybe_seq if is_sk_seq(maybe_seq) => is_keep_seq(maybe_seq, midx).into(), + maybe_flag if is_sk_flag(maybe_flag) => is_keep_flag(maybe_flag, midx).into(), + + // All the stuff we can't evaluate yet + Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_) + | Subject(_) | To(_) | Before(_) | On(_) | Since(_) | Larger(_) | Smaller(_) + | Text(_) | Body(_) => PartialDecision::Postpone, + + unknown => { + tracing::error!("Unknown filter {:?}", unknown); + PartialDecision::Discard + } } } - pub fn need_body(&self) -> bool { + /// @TODO we re-eveluate twice the same logic. The correct way would be, on each pass, + /// to simplify the searck query, by removing the elements that were already checked. + /// For example if we have AND(OR(seqid(X), body(Y)), body(X)), we can't keep for sure + /// the email, as body(x) might be false. So we need to check it. But as seqid(x) is true, + /// we could simplify the request to just body(x) and truncate the first OR. Today, we are + /// not doing that, and thus we reevaluate everything. + fn is_keep_on_query(&self, mail_view: &MailView) -> bool { use SearchKey::*; match self.0 { - Text(_) | Body(_) => true, - And(and_list) => and_list.as_ref().iter().any(|sk| Criteria(sk).need_body()), - Not(inner) => Criteria(inner).need_body(), - Or(left, right) => Criteria(left).need_body() || Criteria(right).need_body(), - _ => false, + // Combinator logic + And(expr_list) => expr_list + .as_ref() + .iter() + .all(|cur| Criteria(cur).is_keep_on_query(mail_view)), + Or(left, right) => { + Criteria(left).is_keep_on_query(mail_view) + || Criteria(right).is_keep_on_query(mail_view) + } + Not(expr) => !Criteria(expr).is_keep_on_query(mail_view), + All => true, + + // Reevaluating our previous logic... + maybe_seq if is_sk_seq(maybe_seq) => is_keep_seq(maybe_seq, &mail_view.in_idx), + maybe_flag if is_sk_flag(maybe_flag) => is_keep_flag(maybe_flag, &mail_view.in_idx), + + // Filter on mail meta + Before(search_naive) => match mail_view.stored_naive_date() { + Ok(msg_naive) => &msg_naive < search_naive.as_ref(), + _ => false, + }, + On(search_naive) => match mail_view.stored_naive_date() { + Ok(msg_naive) => &msg_naive == search_naive.as_ref(), + _ => false, + }, + Since(search_naive) => match mail_view.stored_naive_date() { + Ok(msg_naive) => &msg_naive > search_naive.as_ref(), + _ => false, + }, + + // Message size is also stored in MailMeta + Larger(size_ref) => { + mail_view + .query_result + .metadata() + .expect("metadata were fetched") + .rfc822_size + > *size_ref as usize + } + Smaller(size_ref) => { + mail_view + .query_result + .metadata() + .expect("metadata were fetched") + .rfc822_size + < *size_ref as usize + } + + // Filter on well-known headers + Bcc(txt) => mail_view.is_header_contains_pattern(&b"bcc"[..], txt.as_ref()), + Cc(txt) => mail_view.is_header_contains_pattern(&b"cc"[..], txt.as_ref()), + From(txt) => mail_view.is_header_contains_pattern(&b"from"[..], txt.as_ref()), + Subject(txt) => mail_view.is_header_contains_pattern(&b"subject"[..], txt.as_ref()), + To(txt) => mail_view.is_header_contains_pattern(&b"to"[..], txt.as_ref()), + Header(hdr, txt) => mail_view.is_header_contains_pattern(hdr.as_ref(), txt.as_ref()), + + // Filter on Date header + SentBefore(search_naive) => mail_view + .imf() + .map(|imf| imf.naive_date().ok()) + .flatten() + .map(|msg_naive| &msg_naive < search_naive.as_ref()) + .unwrap_or(false), + SentOn(search_naive) => mail_view + .imf() + .map(|imf| imf.naive_date().ok()) + .flatten() + .map(|msg_naive| &msg_naive == search_naive.as_ref()) + .unwrap_or(false), + SentSince(search_naive) => mail_view + .imf() + .map(|imf| imf.naive_date().ok()) + .flatten() + .map(|msg_naive| &msg_naive > search_naive.as_ref()) + .unwrap_or(false), + + // Filter on the full content of the email + Text(txt) => mail_view + .content + .as_msg() + .map(|msg| { + msg.raw_part + .windows(txt.as_ref().len()) + .any(|win| win == txt.as_ref()) + }) + .unwrap_or(false), + Body(txt) => mail_view + .content + .as_msg() + .map(|msg| { + msg.raw_body + .windows(txt.as_ref().len()) + .any(|win| win == txt.as_ref()) + }) + .unwrap_or(false), + + unknown => { + tracing::error!("Unknown filter {:?}", unknown); + false + } } } } +// ---- Sequence things ---- fn sequence_set_all() -> SequenceSet { SequenceSet::from(Sequence::Range( SeqOrUid::Value(NonZeroU32::MIN), @@ -128,3 +330,131 @@ fn approx_sequence_size(seq: &Sequence) -> u64 { } } } + +// --- Partial decision things ---- + +enum PartialDecision { + Keep, + Discard, + Postpone, +} +impl From<bool> for PartialDecision { + fn from(x: bool) -> Self { + match x { + true => PartialDecision::Keep, + _ => PartialDecision::Discard, + } + } +} +impl PartialDecision { + fn not(&self) -> Self { + match self { + Self::Keep => Self::Discard, + Self::Discard => Self::Keep, + Self::Postpone => Self::Postpone, + } + } + + fn or(&self, other: &Self) -> Self { + match (self, other) { + (Self::Keep, _) | (_, Self::Keep) => Self::Keep, + (Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone, + (Self::Discard, Self::Discard) => Self::Discard, + } + } + + fn and(&self, other: &Self) -> Self { + match (self, other) { + (Self::Discard, _) | (_, Self::Discard) => Self::Discard, + (Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone, + (Self::Keep, Self::Keep) => Self::Keep, + } + } + + fn is_keep(&self) -> bool { + !matches!(self, Self::Discard) + } +} + +// ----- Search Key things --- +fn is_sk_flag(sk: &SearchKey) -> bool { + use SearchKey::*; + match sk { + Answered | Deleted | Draft | Flagged | Keyword(..) | New | Old | Recent | Seen + | Unanswered | Undeleted | Undraft | Unflagged | Unkeyword(..) | Unseen => true, + _ => false, + } +} + +fn is_keep_flag(sk: &SearchKey, midx: &MailIndex) -> bool { + use SearchKey::*; + match sk { + Answered => midx.is_flag_set("\\Answered"), + Deleted => midx.is_flag_set("\\Deleted"), + Draft => midx.is_flag_set("\\Draft"), + Flagged => midx.is_flag_set("\\Flagged"), + Keyword(kw) => midx.is_flag_set(kw.inner()), + New => { + let is_recent = midx.is_flag_set("\\Recent"); + let is_seen = midx.is_flag_set("\\Seen"); + is_recent && !is_seen + } + Old => { + let is_recent = midx.is_flag_set("\\Recent"); + !is_recent + } + Recent => midx.is_flag_set("\\Recent"), + Seen => midx.is_flag_set("\\Seen"), + Unanswered => { + let is_answered = midx.is_flag_set("\\Recent"); + !is_answered + } + Undeleted => { + let is_deleted = midx.is_flag_set("\\Deleted"); + !is_deleted + } + Undraft => { + let is_draft = midx.is_flag_set("\\Draft"); + !is_draft + } + Unflagged => { + let is_flagged = midx.is_flag_set("\\Flagged"); + !is_flagged + } + Unkeyword(kw) => { + let is_keyword_set = midx.is_flag_set(kw.inner()); + !is_keyword_set + } + Unseen => { + let is_seen = midx.is_flag_set("\\Seen"); + !is_seen + } + + // Not flag logic + _ => unreachable!(), + } +} + +fn is_sk_seq(sk: &SearchKey) -> bool { + use SearchKey::*; + match sk { + SequenceSet(..) | Uid(..) => true, + _ => false, + } +} +fn is_keep_seq(sk: &SearchKey, midx: &MailIndex) -> bool { + use SearchKey::*; + match sk { + SequenceSet(seq_set) => seq_set + .0 + .as_ref() + .iter() + .any(|seq| midx.is_in_sequence_i(seq)), + Uid(seq_set) => seq_set + .0 + .as_ref() + .iter() + .any(|seq| midx.is_in_sequence_uid(seq)), + _ => unreachable!(), + } +} |