aboutsummaryrefslogtreecommitdiff
path: root/src/imap
diff options
context:
space:
mode:
authorQuentin <quentin@dufour.io>2024-01-08 10:39:26 +0000
committerQuentin <quentin@dufour.io>2024-01-08 10:39:26 +0000
commitd7788e29a8a64550e9b274001ff3fb9a7bf3473b (patch)
treee43a11753472f1917ce4aa6ddba24ae3a513bd50 /src/imap
parent152d5b7604337fe19a7aea7fc37b3d4615ca7393 (diff)
parent42a54b2c500294c594f3efdd25db28c18f5ac238 (diff)
downloadaerogramme-d7788e29a8a64550e9b274001ff3fb9a7bf3473b.tar.gz
aerogramme-d7788e29a8a64550e9b274001ff3fb9a7bf3473b.zip
Merge pull request 'Implement search' (#61) from feat/search into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/aerogramme/pulls/61
Diffstat (limited to 'src/imap')
-rw-r--r--src/imap/capability.rs1
-rw-r--r--src/imap/command/examined.rs12
-rw-r--r--src/imap/imf_view.rs126
-rw-r--r--src/imap/index.rs222
-rw-r--r--src/imap/mail_view.rs160
-rw-r--r--src/imap/mailbox_view.rs39
-rw-r--r--src/imap/mime_view.rs33
-rw-r--r--src/imap/search.rs360
8 files changed, 731 insertions, 222 deletions
diff --git a/src/imap/capability.rs b/src/imap/capability.rs
index 631c3e2..feadb6b 100644
--- a/src/imap/capability.rs
+++ b/src/imap/capability.rs
@@ -22,6 +22,7 @@ impl Default for ServerCapability {
fn default() -> Self {
Self(HashSet::from([
Capability::Imap4Rev1,
+ Capability::Enable,
Capability::Move,
Capability::LiteralPlus,
capability_unselect(),
diff --git a/src/imap/command/examined.rs b/src/imap/command/examined.rs
index ec16973..3dd11e2 100644
--- a/src/imap/command/examined.rs
+++ b/src/imap/command/examined.rs
@@ -111,15 +111,17 @@ impl<'a> ExaminedContext<'a> {
pub async fn search(
self,
- _charset: &Option<Charset<'a>>,
- _criteria: &SearchKey<'a>,
- _uid: &bool,
+ charset: &Option<Charset<'a>>,
+ criteria: &SearchKey<'a>,
+ uid: &bool,
) -> Result<(Response<'static>, flow::Transition)> {
+ let found = self.mailbox.search(charset, criteria, *uid).await?;
Ok((
Response::build()
.to_req(self.req)
- .message("Not implemented")
- .bad()?,
+ .set_body(found)
+ .message("SEARCH completed")
+ .ok()?,
flow::Transition::None,
))
}
diff --git a/src/imap/imf_view.rs b/src/imap/imf_view.rs
index 4297769..a4ca2e8 100644
--- a/src/imap/imf_view.rs
+++ b/src/imap/imf_view.rs
@@ -1,68 +1,80 @@
+use anyhow::{anyhow, Result};
+use chrono::naive::NaiveDate;
+
use imap_codec::imap_types::core::{IString, NString};
use imap_codec::imap_types::envelope::{Address, Envelope};
use eml_codec::imf;
-/// Envelope rules are defined in RFC 3501, section 7.4.2
-/// https://datatracker.ietf.org/doc/html/rfc3501#section-7.4.2
-///
-/// Some important notes:
-///
-/// If the Sender or Reply-To lines are absent in the [RFC-2822]
-/// header, or are present but empty, the server sets the
-/// corresponding member of the envelope to be the same value as
-/// the from member (the client is not expected to know to do
-/// this). Note: [RFC-2822] requires that all messages have a valid
-/// From header. Therefore, the from, sender, and reply-to
-/// members in the envelope can not be NIL.
-///
-/// If the Date, Subject, In-Reply-To, and Message-ID header lines
-/// are absent in the [RFC-2822] header, the corresponding member
-/// of the envelope is NIL; if these header lines are present but
-/// empty the corresponding member of the envelope is the empty
-/// string.
+pub struct ImfView<'a>(pub &'a imf::Imf<'a>);
-//@FIXME return an error if the envelope is invalid instead of panicking
-//@FIXME some fields must be defaulted if there are not set.
-pub fn message_envelope(msg: &imf::Imf) -> Envelope<'static> {
- let from = msg.from.iter().map(convert_mbx).collect::<Vec<_>>();
+impl<'a> ImfView<'a> {
+ pub fn naive_date(&self) -> Result<NaiveDate> {
+ Ok(self.0.date.ok_or(anyhow!("date is not set"))?.date_naive())
+ }
- Envelope {
- date: NString(
- msg.date
- .as_ref()
- .map(|d| IString::try_from(d.to_rfc3339()).unwrap()),
- ),
- subject: NString(
- msg.subject
- .as_ref()
- .map(|d| IString::try_from(d.to_string()).unwrap()),
- ),
- sender: msg
- .sender
- .as_ref()
- .map(|v| vec![convert_mbx(v)])
- .unwrap_or(from.clone()),
- reply_to: if msg.reply_to.is_empty() {
- from.clone()
- } else {
- convert_addresses(&msg.reply_to)
- },
- from,
- to: convert_addresses(&msg.to),
- cc: convert_addresses(&msg.cc),
- bcc: convert_addresses(&msg.bcc),
- in_reply_to: NString(
- msg.in_reply_to
- .iter()
- .next()
- .map(|d| IString::try_from(d.to_string()).unwrap()),
- ),
- message_id: NString(
- msg.msg_id
+ /// Envelope rules are defined in RFC 3501, section 7.4.2
+ /// https://datatracker.ietf.org/doc/html/rfc3501#section-7.4.2
+ ///
+ /// Some important notes:
+ ///
+ /// If the Sender or Reply-To lines are absent in the [RFC-2822]
+ /// header, or are present but empty, the server sets the
+ /// corresponding member of the envelope to be the same value as
+ /// the from member (the client is not expected to know to do
+ /// this). Note: [RFC-2822] requires that all messages have a valid
+ /// From header. Therefore, the from, sender, and reply-to
+ /// members in the envelope can not be NIL.
+ ///
+ /// If the Date, Subject, In-Reply-To, and Message-ID header lines
+ /// are absent in the [RFC-2822] header, the corresponding member
+ /// of the envelope is NIL; if these header lines are present but
+ /// empty the corresponding member of the envelope is the empty
+ /// string.
+
+ //@FIXME return an error if the envelope is invalid instead of panicking
+ //@FIXME some fields must be defaulted if there are not set.
+ pub fn message_envelope(&self) -> Envelope<'static> {
+ let msg = self.0;
+ let from = msg.from.iter().map(convert_mbx).collect::<Vec<_>>();
+
+ Envelope {
+ date: NString(
+ msg.date
+ .as_ref()
+ .map(|d| IString::try_from(d.to_rfc3339()).unwrap()),
+ ),
+ subject: NString(
+ msg.subject
+ .as_ref()
+ .map(|d| IString::try_from(d.to_string()).unwrap()),
+ ),
+ sender: msg
+ .sender
.as_ref()
- .map(|d| IString::try_from(d.to_string()).unwrap()),
- ),
+ .map(|v| vec![convert_mbx(v)])
+ .unwrap_or(from.clone()),
+ reply_to: if msg.reply_to.is_empty() {
+ from.clone()
+ } else {
+ convert_addresses(&msg.reply_to)
+ },
+ from,
+ to: convert_addresses(&msg.to),
+ cc: convert_addresses(&msg.cc),
+ bcc: convert_addresses(&msg.bcc),
+ in_reply_to: NString(
+ msg.in_reply_to
+ .iter()
+ .next()
+ .map(|d| IString::try_from(d.to_string()).unwrap()),
+ ),
+ message_id: NString(
+ msg.msg_id
+ .as_ref()
+ .map(|d| IString::try_from(d.to_string()).unwrap()),
+ ),
+ }
}
}
diff --git a/src/imap/index.rs b/src/imap/index.rs
index 01dd2ef..3ca5562 100644
--- a/src/imap/index.rs
+++ b/src/imap/index.rs
@@ -1,95 +1,181 @@
use std::num::NonZeroU32;
-use anyhow::{anyhow, bail, Result};
-use imap_codec::imap_types::sequence::{self, SequenceSet};
+use anyhow::{anyhow, Context, Result};
+use imap_codec::imap_types::sequence::{self, SeqOrUid, Sequence, SequenceSet};
use crate::mail::uidindex::{ImapUid, UidIndex};
use crate::mail::unique_ident::UniqueIdent;
-pub struct Index<'a>(pub &'a UidIndex);
+pub struct Index<'a> {
+ pub imap_index: Vec<MailIndex<'a>>,
+ pub internal: &'a UidIndex,
+}
impl<'a> Index<'a> {
- pub fn fetch(
- self: &Index<'a>,
- sequence_set: &SequenceSet,
- by_uid: bool,
- ) -> Result<Vec<MailIndex<'a>>> {
- let mail_vec = self
- .0
+ pub fn new(internal: &'a UidIndex) -> Result<Self> {
+ let imap_index = internal
.idx_by_uid
.iter()
- .map(|(uid, uuid)| (*uid, *uuid))
- .collect::<Vec<_>>();
+ .enumerate()
+ .map(|(i_enum, (&uid, &uuid))| {
+ let flags = internal
+ .table
+ .get(&uuid)
+ .ok_or(anyhow!("mail is missing from index"))?
+ .1
+ .as_ref();
+ let i_int: u32 = (i_enum + 1).try_into()?;
+ let i: NonZeroU32 = i_int.try_into()?;
- let mut mails = vec![];
+ Ok(MailIndex {
+ i,
+ uid,
+ uuid,
+ flags,
+ })
+ })
+ .collect::<Result<Vec<_>>>()?;
- if by_uid {
- if mail_vec.is_empty() {
- return Ok(vec![]);
- }
- let iter_strat = sequence::Strategy::Naive {
- largest: mail_vec.last().unwrap().0,
- };
+ Ok(Self {
+ imap_index,
+ internal,
+ })
+ }
- let mut i = 0;
- for uid in sequence_set.iter(iter_strat) {
- while mail_vec.get(i).map(|mail| mail.0 < uid).unwrap_or(false) {
- i += 1;
- }
- if let Some(mail) = mail_vec.get(i) {
- if mail.0 == uid {
- mails.push(MailIndex {
- i: NonZeroU32::try_from(i as u32 + 1).unwrap(),
- uid: mail.0,
- uuid: mail.1,
- flags: self
- .0
- .table
- .get(&mail.1)
- .ok_or(anyhow!("mail is missing from index"))?
- .1
- .as_ref(),
- });
- }
- } else {
- break;
- }
- }
- } else {
- if mail_vec.is_empty() {
- bail!("No such message (mailbox is empty)");
- }
+ pub fn last(&'a self) -> Option<&'a MailIndex<'a>> {
+ self.imap_index.last()
+ }
+
+ /// Fetch mail descriptors based on a sequence of UID
+ ///
+ /// Complexity analysis:
+ /// - Sort is O(n * log n) where n is the number of uid generated by the sequence
+ /// - Finding the starting point in the index O(log m) where m is the size of the mailbox
+ /// While n =< m, it's not clear if the difference is big or not.
+ ///
+ /// For now, the algorithm tries to be fast for small values of n,
+ /// as it is what is expected by clients.
+ ///
+ /// So we assume for our implementation that : n << m.
+ /// It's not true for full mailbox searches for example...
+ pub fn fetch_on_uid(&'a self, sequence_set: &SequenceSet) -> Vec<&'a MailIndex<'a>> {
+ if self.imap_index.is_empty() {
+ return vec![];
+ }
+ let iter_strat = sequence::Strategy::Naive {
+ largest: self.last().expect("imap index is not empty").uid,
+ };
+ let mut unroll_seq = sequence_set.iter(iter_strat).collect::<Vec<_>>();
+ unroll_seq.sort();
+
+ let start_seq = match unroll_seq.iter().next() {
+ Some(elem) => elem,
+ None => return vec![],
+ };
+
+ // Quickly jump to the right point in the mailbox vector O(log m) instead
+ // of iterating one by one O(m). Works only because both unroll_seq & imap_index are sorted per uid.
+ let mut imap_idx = {
+ let start_idx = self
+ .imap_index
+ .partition_point(|mail_idx| &mail_idx.uid < start_seq);
+ &self.imap_index[start_idx..]
+ };
+ println!(
+ "win: {:?}",
+ imap_idx.iter().map(|midx| midx.uid).collect::<Vec<_>>()
+ );
- let iter_strat = sequence::Strategy::Naive {
- largest: NonZeroU32::try_from((mail_vec.len()) as u32).unwrap(),
+ let mut acc = vec![];
+ for wanted_uid in unroll_seq.iter() {
+ // Slide the window forward as long as its first element is lower than our wanted uid.
+ let start_idx = match imap_idx.iter().position(|midx| &midx.uid >= wanted_uid) {
+ Some(v) => v,
+ None => break,
};
+ imap_idx = &imap_idx[start_idx..];
- for i in sequence_set.iter(iter_strat) {
- if let Some(mail) = mail_vec.get(i.get() as usize - 1) {
- mails.push(MailIndex {
- i,
- uid: mail.0,
- uuid: mail.1,
- flags: self
- .0
- .table
- .get(&mail.1)
- .ok_or(anyhow!("mail is missing from index"))?
- .1
- .as_ref(),
- });
- } else {
- bail!("No such mail: {}", i);
- }
+ // If the beginning of our new window is the uid we want, we collect it
+ if &imap_idx[0].uid == wanted_uid {
+ acc.push(&imap_idx[0]);
}
}
- Ok(mails)
+ acc
+ }
+
+ pub fn fetch_on_id(&'a self, sequence_set: &SequenceSet) -> Result<Vec<&'a MailIndex<'a>>> {
+ let iter_strat = sequence::Strategy::Naive {
+ largest: self.last().context("The mailbox is empty")?.uid,
+ };
+ sequence_set
+ .iter(iter_strat)
+ .map(|wanted_id| {
+ self.imap_index
+ .get((wanted_id.get() as usize) - 1)
+ .ok_or(anyhow!("Mail not found"))
+ })
+ .collect::<Result<Vec<_>>>()
+ }
+
+ pub fn fetch(
+ self: &'a Index<'a>,
+ sequence_set: &SequenceSet,
+ by_uid: bool,
+ ) -> Result<Vec<&'a MailIndex<'a>>> {
+ match by_uid {
+ true => Ok(self.fetch_on_uid(sequence_set)),
+ _ => self.fetch_on_id(sequence_set),
+ }
}
}
+#[derive(Clone, Debug)]
pub struct MailIndex<'a> {
pub i: NonZeroU32,
pub uid: ImapUid,
pub uuid: UniqueIdent,
pub flags: &'a Vec<String>,
}
+
+impl<'a> MailIndex<'a> {
+ // The following functions are used to implement the SEARCH command
+ pub fn is_in_sequence_i(&self, seq: &Sequence) -> bool {
+ match seq {
+ Sequence::Single(SeqOrUid::Asterisk) => true,
+ Sequence::Single(SeqOrUid::Value(target)) => target == &self.i,
+ Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x))
+ | Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.i,
+ Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => {
+ if x1 < x2 {
+ x1 <= &self.i && &self.i <= x2
+ } else {
+ x1 >= &self.i && &self.i >= x2
+ }
+ }
+ Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true,
+ }
+ }
+
+ pub fn is_in_sequence_uid(&self, seq: &Sequence) -> bool {
+ match seq {
+ Sequence::Single(SeqOrUid::Asterisk) => true,
+ Sequence::Single(SeqOrUid::Value(target)) => target == &self.uid,
+ Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x))
+ | Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.uid,
+ Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => {
+ if x1 < x2 {
+ x1 <= &self.uid && &self.uid <= x2
+ } else {
+ x1 >= &self.uid && &self.uid >= x2
+ }
+ }
+ Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true,
+ }
+ }
+
+ pub fn is_flag_set(&self, flag: &str) -> bool {
+ self.flags
+ .iter()
+ .any(|candidate| candidate.as_str() == flag)
+ }
+}
diff --git a/src/imap/mail_view.rs b/src/imap/mail_view.rs
index de9bfe3..7da21c4 100644
--- a/src/imap/mail_view.rs
+++ b/src/imap/mail_view.rs
@@ -1,7 +1,7 @@
use std::num::NonZeroU32;
use anyhow::{anyhow, bail, Result};
-use chrono::{Offset, TimeZone, Utc};
+use chrono::{naive::NaiveDate, DateTime as ChronoDateTime, Local, Offset, TimeZone, Utc};
use imap_codec::imap_types::core::NString;
use imap_codec::imap_types::datetime::DateTime;
@@ -20,19 +20,22 @@ use crate::mail::query::QueryResult;
use crate::imap::attributes::AttributesProxy;
use crate::imap::flags;
-use crate::imap::imf_view::message_envelope;
+use crate::imap::imf_view::ImfView;
use crate::imap::index::MailIndex;
use crate::imap::mime_view;
use crate::imap::response::Body;
pub struct MailView<'a> {
- pub in_idx: MailIndex<'a>,
+ pub in_idx: &'a MailIndex<'a>,
pub query_result: &'a QueryResult<'a>,
pub content: FetchedMail<'a>,
}
impl<'a> MailView<'a> {
- pub fn new(query_result: &'a QueryResult<'a>, in_idx: MailIndex<'a>) -> Result<MailView<'a>> {
+ pub fn new(
+ query_result: &'a QueryResult<'a>,
+ in_idx: &'a MailIndex<'a>,
+ ) -> Result<MailView<'a>> {
Ok(Self {
in_idx,
query_result,
@@ -40,18 +43,88 @@ impl<'a> MailView<'a> {
QueryResult::FullResult { content, .. } => {
let (_, parsed) =
eml_codec::parse_message(&content).or(Err(anyhow!("Invalid mail body")))?;
- FetchedMail::new_from_message(parsed)
+ FetchedMail::full_from_message(parsed)
}
QueryResult::PartialResult { metadata, .. } => {
- let (_, parsed) = eml_codec::parse_imf(&metadata.headers)
+ let (_, parsed) = eml_codec::parse_message(&metadata.headers)
.or(Err(anyhow!("unable to parse email headers")))?;
- FetchedMail::Partial(parsed)
+ FetchedMail::partial_from_message(parsed)
}
QueryResult::IndexResult { .. } => FetchedMail::IndexOnly,
},
})
}
+ pub fn imf(&self) -> Option<ImfView> {
+ self.content.as_imf().map(ImfView)
+ }
+
+ pub fn selected_mime(&'a self) -> Option<mime_view::SelectedMime<'a>> {
+ self.content.as_anypart().ok().map(mime_view::SelectedMime)
+ }
+
+ pub fn filter(&self, ap: &AttributesProxy) -> Result<(Body<'static>, SeenFlag)> {
+ let mut seen = SeenFlag::DoNothing;
+ let res_attrs = ap
+ .attrs
+ .iter()
+ .map(|attr| match attr {
+ MessageDataItemName::Uid => Ok(self.uid()),
+ MessageDataItemName::Flags => Ok(self.flags()),
+ MessageDataItemName::Rfc822Size => self.rfc_822_size(),
+ MessageDataItemName::Rfc822Header => self.rfc_822_header(),
+ MessageDataItemName::Rfc822Text => self.rfc_822_text(),
+ MessageDataItemName::Rfc822 => self.rfc822(),
+ MessageDataItemName::Envelope => Ok(self.envelope()),
+ MessageDataItemName::Body => self.body(),
+ MessageDataItemName::BodyStructure => self.body_structure(),
+ MessageDataItemName::BodyExt {
+ section,
+ partial,
+ peek,
+ } => {
+ let (body, has_seen) = self.body_ext(section, partial, peek)?;
+ seen = has_seen;
+ Ok(body)
+ }
+ MessageDataItemName::InternalDate => self.internal_date(),
+ })
+ .collect::<Result<Vec<_>, _>>()?;
+
+ Ok((
+ Body::Data(Data::Fetch {
+ seq: self.in_idx.i,
+ items: res_attrs.try_into()?,
+ }),
+ seen,
+ ))
+ }
+
+ pub fn stored_naive_date(&self) -> Result<NaiveDate> {
+ let mail_meta = self.query_result.metadata().expect("metadata were fetched");
+ let mail_ts: i64 = mail_meta.internaldate.try_into()?;
+ let msg_date: ChronoDateTime<Local> = ChronoDateTime::from_timestamp(mail_ts, 0)
+ .ok_or(anyhow!("unable to parse timestamp"))?
+ .with_timezone(&Local);
+
+ Ok(msg_date.date_naive())
+ }
+
+ pub fn is_header_contains_pattern(&self, hdr: &[u8], pattern: &[u8]) -> bool {
+ let mime = match self.selected_mime() {
+ None => return false,
+ Some(x) => x,
+ };
+
+ let val = match mime.header_value(hdr) {
+ None => return false,
+ Some(x) => x,
+ };
+
+ val.windows(pattern.len()).any(|win| win == pattern)
+ }
+
+ // Private function, mainly for filter!
fn uid(&self) -> MessageDataItem<'static> {
MessageDataItem::Uid(self.in_idx.uid.clone())
}
@@ -87,28 +160,32 @@ impl<'a> MailView<'a> {
}
fn rfc_822_text(&self) -> Result<MessageDataItem<'static>> {
- let txt: NString = self.content.as_full()?.raw_body.to_vec().try_into()?;
+ let txt: NString = self.content.as_msg()?.raw_body.to_vec().try_into()?;
Ok(MessageDataItem::Rfc822Text(txt))
}
fn rfc822(&self) -> Result<MessageDataItem<'static>> {
- let full: NString = self.content.as_full()?.raw_part.to_vec().try_into()?;
+ let full: NString = self.content.as_msg()?.raw_part.to_vec().try_into()?;
Ok(MessageDataItem::Rfc822(full))
}
fn envelope(&self) -> MessageDataItem<'static> {
- MessageDataItem::Envelope(message_envelope(self.content.imf().clone()))
+ MessageDataItem::Envelope(
+ self.imf()
+ .expect("an imf object is derivable from fetchedmail")
+ .message_envelope(),
+ )
}
fn body(&self) -> Result<MessageDataItem<'static>> {
Ok(MessageDataItem::Body(mime_view::bodystructure(
- self.content.as_full()?.child.as_ref(),
+ self.content.as_msg()?.child.as_ref(),
)?))
}
fn body_structure(&self) -> Result<MessageDataItem<'static>> {
Ok(MessageDataItem::Body(mime_view::bodystructure(
- self.content.as_full()?.child.as_ref(),
+ self.content.as_msg()?.child.as_ref(),
)?))
}
@@ -167,43 +244,6 @@ impl<'a> MailView<'a> {
.ok_or(anyhow!("Unable to parse internal date"))?;
Ok(MessageDataItem::InternalDate(DateTime::unvalidated(dt)))
}
-
- pub fn filter(&self, ap: &AttributesProxy) -> Result<(Body<'static>, SeenFlag)> {
- let mut seen = SeenFlag::DoNothing;
- let res_attrs = ap
- .attrs
- .iter()
- .map(|attr| match attr {
- MessageDataItemName::Uid => Ok(self.uid()),
- MessageDataItemName::Flags => Ok(self.flags()),
- MessageDataItemName::Rfc822Size => self.rfc_822_size(),
- MessageDataItemName::Rfc822Header => self.rfc_822_header(),
- MessageDataItemName::Rfc822Text => self.rfc_822_text(),
- MessageDataItemName::Rfc822 => self.rfc822(),
- MessageDataItemName::Envelope => Ok(self.envelope()),
- MessageDataItemName::Body => self.body(),
- MessageDataItemName::BodyStructure => self.body_structure(),
- MessageDataItemName::BodyExt {
- section,
- partial,
- peek,
- } => {
- let (body, has_seen) = self.body_ext(section, partial, peek)?;
- seen = has_seen;
- Ok(body)
- }
- MessageDataItemName::InternalDate => self.internal_date(),
- })
- .collect::<Result<Vec<_>, _>>()?;
-
- Ok((
- Body::Data(Data::Fetch {
- seq: self.in_idx.i,
- items: res_attrs.try_into()?,
- }),
- seen,
- ))
- }
}
pub enum SeenFlag {
@@ -215,33 +255,39 @@ pub enum SeenFlag {
pub enum FetchedMail<'a> {
IndexOnly,
- Partial(imf::Imf<'a>),
+ Partial(AnyPart<'a>),
Full(AnyPart<'a>),
}
impl<'a> FetchedMail<'a> {
- pub fn new_from_message(msg: Message<'a>) -> Self {
+ pub fn full_from_message(msg: Message<'a>) -> Self {
Self::Full(AnyPart::Msg(msg))
}
- fn as_anypart(&self) -> Result<&AnyPart<'a>> {
+ pub fn partial_from_message(msg: Message<'a>) -> Self {
+ Self::Partial(AnyPart::Msg(msg))
+ }
+
+ pub fn as_anypart(&self) -> Result<&AnyPart<'a>> {
match self {
FetchedMail::Full(x) => Ok(&x),
+ FetchedMail::Partial(x) => Ok(&x),
_ => bail!("The full message must be fetched, not only its headers"),
}
}
- fn as_full(&self) -> Result<&Message<'a>> {
+ pub fn as_msg(&self) -> Result<&Message<'a>> {
match self {
FetchedMail::Full(AnyPart::Msg(x)) => Ok(&x),
+ FetchedMail::Partial(AnyPart::Msg(x)) => Ok(&x),
_ => bail!("The full message must be fetched, not only its headers AND it must be an AnyPart::Msg."),
}
}
- fn imf(&self) -> &imf::Imf<'a> {
+ pub fn as_imf(&self) -> Option<&imf::Imf<'a>> {
match self {
- FetchedMail::Full(AnyPart::Msg(x)) => &x.imf,
- FetchedMail::Partial(x) => &x,
- _ => panic!("Can't contain AnyPart that is not a message"),
+ FetchedMail::Full(AnyPart::Msg(x)) => Some(&x.imf),
+ FetchedMail::Partial(AnyPart::Msg(x)) => Some(&x.imf),
+ _ => None,
}
}
}
diff --git a/src/imap/mailbox_view.rs b/src/imap/mailbox_view.rs
index e4ffdcd..77fe7f7 100644
--- a/src/imap/mailbox_view.rs
+++ b/src/imap/mailbox_view.rs
@@ -146,7 +146,8 @@ impl MailboxView {
let flags = flags.iter().map(|x| x.to_string()).collect::<Vec<_>>();
- let mails = self.index().fetch(sequence_set, *is_uid_store)?;
+ let idx = self.index()?;
+ let mails = idx.fetch(sequence_set, *is_uid_store)?;
for mi in mails.iter() {
match kind {
StoreType::Add => {
@@ -189,7 +190,8 @@ impl MailboxView {
to: Arc<Mailbox>,
is_uid_copy: &bool,
) -> Result<(ImapUidvalidity, Vec<(ImapUid, ImapUid)>)> {
- let mails = self.index().fetch(sequence_set, *is_uid_copy)?;
+ let idx = self.index()?;
+ let mails = idx.fetch(sequence_set, *is_uid_copy)?;
let mut new_uuids = vec![];
for mi in mails.iter() {
@@ -216,7 +218,8 @@ impl MailboxView {
to: Arc<Mailbox>,
is_uid_copy: &bool,
) -> Result<(ImapUidvalidity, Vec<(ImapUid, ImapUid)>, Vec<Body<'static>>)> {
- let mails = self.index().fetch(sequence_set, *is_uid_copy)?;
+ let idx = self.index()?;
+ let mails = idx.fetch(sequence_set, *is_uid_copy)?;
for mi in mails.iter() {
to.move_from(&self.0.mailbox, mi.uuid).await?;
@@ -254,7 +257,8 @@ impl MailboxView {
true => QueryScope::Full,
_ => QueryScope::Partial,
};
- let mail_idx_list = self.index().fetch(sequence_set, *is_uid_fetch)?;
+ let idx = self.index()?;
+ let mail_idx_list = idx.fetch(sequence_set, *is_uid_fetch)?;
// [2/6] Fetch the emails
let uuids = mail_idx_list
@@ -316,29 +320,38 @@ impl MailboxView {
let (seq_set, seq_type) = crit.to_sequence_set();
// 2. Get the selection
- let selection = self.index().fetch(&seq_set, seq_type.is_uid())?;
+ let idx = self.index()?;
+ let selection = idx.fetch(&seq_set, seq_type.is_uid())?;
// 3. Filter the selection based on the ID / UID / Flags
+ let (kept_idx, to_fetch) = crit.filter_on_idx(&selection);
- // 4. If needed, filter the selection based on the metadata
- let _need_meta = crit.need_meta();
+ // 4. Fetch additional info about the emails
+ let query_scope = crit.query_scope();
+ let uuids = to_fetch.iter().map(|midx| midx.uuid).collect::<Vec<_>>();
+ let query_result = self.0.query(&uuids, query_scope).fetch().await?;
// 5. If needed, filter the selection based on the body
- let _need_body = crit.need_body();
+ let kept_query = crit.filter_on_query(&to_fetch, &query_result)?;
// 6. Format the result according to the client's taste:
// either return UID or ID.
+ let final_selection = kept_idx.into_iter().chain(kept_query.into_iter());
let selection_fmt = match uid {
- true => selection.into_iter().map(|in_idx| in_idx.uid).collect(),
- _ => selection.into_iter().map(|in_idx| in_idx.i).collect(),
+ true => final_selection.map(|in_idx| in_idx.uid).collect(),
+ _ => final_selection.map(|in_idx| in_idx.i).collect(),
};
Ok(vec![Body::Data(Data::Search(selection_fmt))])
}
// ----
- fn index<'a>(&'a self) -> Index<'a> {
- Index(&self.0.snapshot)
+ /// @FIXME index should be stored for longer than a single request
+ /// Instead they should be tied to the FrozenMailbox refresh
+ /// It's not trivial to refactor the code to do that, so we are doing
+ /// some useless computation for now...
+ fn index<'a>(&'a self) -> Result<Index<'a>> {
+ Index::new(&self.0.snapshot)
}
/// Produce an OK [UIDVALIDITY _] message corresponding to `known_state`
@@ -513,7 +526,7 @@ mod tests {
content: rfc822.to_vec(),
};
- let mv = MailView::new(&qr, mail_in_idx)?;
+ let mv = MailView::new(&qr, &mail_in_idx)?;
let (res_body, _seen) = mv.filter(&ap)?;
let fattr = match res_body {
diff --git a/src/imap/mime_view.rs b/src/imap/mime_view.rs
index 1f36c47..cf6c751 100644
--- a/src/imap/mime_view.rs
+++ b/src/imap/mime_view.rs
@@ -12,7 +12,7 @@ use eml_codec::{
header, mime, mime::r#type::Deductible, part::composite, part::discrete, part::AnyPart,
};
-use crate::imap::imf_view::message_envelope;
+use crate::imap::imf_view::ImfView;
pub enum BodySection<'a> {
Full(Cow<'a, [u8]>),
@@ -164,8 +164,23 @@ impl<'a> SubsettedSection<'a> {
/// Used for current MIME inspection
///
/// See NodeMime for recursive logic
-struct SelectedMime<'a>(&'a AnyPart<'a>);
+pub struct SelectedMime<'a>(pub &'a AnyPart<'a>);
impl<'a> SelectedMime<'a> {
+ pub fn header_value(&'a self, to_match_ext: &[u8]) -> Option<&'a [u8]> {
+ let to_match = to_match_ext.to_ascii_lowercase();
+
+ self.eml_mime()
+ .kv
+ .iter()
+ .filter_map(|field| match field {
+ header::Field::Good(header::Kv2(k, v)) => Some((k, v)),
+ _ => None,
+ })
+ .find(|(k, _)| k.to_ascii_lowercase() == to_match)
+ .map(|(_, v)| v)
+ .copied()
+ }
+
/// The subsetted fetch section basically tells us the
/// extraction logic to apply on our selected MIME.
/// This function acts as a router for these logic.
@@ -200,6 +215,13 @@ impl<'a> SelectedMime<'a> {
Ok(ExtractedFull(bytes.to_vec().into()))
}
+ fn eml_mime(&self) -> &eml_codec::mime::NaiveMIME<'_> {
+ match &self.0 {
+ AnyPart::Msg(msg) => msg.child.mime(),
+ other => other.mime(),
+ }
+ }
+
/// The [...] HEADER.FIELDS, and HEADER.FIELDS.NOT part
/// specifiers refer to the [RFC-2822] header of the message or of
/// an encapsulated [MIME-IMT] MESSAGE/RFC822 message.
@@ -231,10 +253,7 @@ impl<'a> SelectedMime<'a> {
.collect::<HashSet<_>>();
// Extract MIME headers
- let mime = match &self.0 {
- AnyPart::Msg(msg) => msg.child.mime(),
- other => other.mime(),
- };
+ let mime = self.eml_mime();
// Filter our MIME headers based on the field index
// 1. Keep only the correctly formatted headers
@@ -347,7 +366,7 @@ impl<'a> NodeMsg<'a> {
body: FetchBody {
basic,
specific: SpecificFields::Message {
- envelope: Box::new(message_envelope(&self.1.imf)),
+ envelope: Box::new(ImfView(&self.1.imf).message_envelope()),
body_structure: Box::new(NodeMime(&self.1.child).structure()?),
number_of_lines: nol(self.1.raw_part),
},
diff --git a/src/imap/search.rs b/src/imap/search.rs
index b3c6b05..22afd0c 100644
--- a/src/imap/search.rs
+++ b/src/imap/search.rs
@@ -1,7 +1,13 @@
+use std::num::NonZeroU32;
+
+use anyhow::Result;
use imap_codec::imap_types::core::NonEmptyVec;
use imap_codec::imap_types::search::SearchKey;
use imap_codec::imap_types::sequence::{SeqOrUid, Sequence, SequenceSet};
-use std::num::NonZeroU32;
+
+use crate::imap::index::MailIndex;
+use crate::imap::mail_view::MailView;
+use crate::mail::query::{QueryResult, QueryScope};
pub enum SeqType {
Undefined,
@@ -54,6 +60,10 @@ impl<'a> Criteria<'a> {
tracing::debug!(
"using AND in a search request is slow: no intersection is performed"
);
+ // As we perform no intersection, we don't care if we mix uid or id.
+ // We only keep the smallest range, being it ID or UID, depending of
+ // which one has the less items. This is an approximation as UID ranges
+ // can have holes while ID ones can't.
search_list
.as_ref()
.iter()
@@ -72,35 +82,227 @@ impl<'a> Criteria<'a> {
/// Not really clever as we can have cases where we filter out
/// the email before needing to inspect its meta.
/// But for now we are seeking the most basic/stupid algorithm.
- pub fn need_meta(&self) -> bool {
+ pub fn query_scope(&self) -> QueryScope {
use SearchKey::*;
match self.0 {
+ // Combinators
+ And(and_list) => and_list
+ .as_ref()
+ .iter()
+ .fold(QueryScope::Index, |prev, sk| {
+ prev.union(&Criteria(sk).query_scope())
+ }),
+ Not(inner) => Criteria(inner).query_scope(),
+ Or(left, right) => Criteria(left)
+ .query_scope()
+ .union(&Criteria(right).query_scope()),
+ All => QueryScope::Index,
+
// IMF Headers
Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_)
- | Subject(_) | To(_) => true,
+ | Subject(_) | To(_) => QueryScope::Partial,
// Internal Date is also stored in MailMeta
- Before(_) | On(_) | Since(_) => true,
+ Before(_) | On(_) | Since(_) => QueryScope::Partial,
// Message size is also stored in MailMeta
- Larger(_) | Smaller(_) => true,
- And(and_list) => and_list.as_ref().iter().any(|sk| Criteria(sk).need_meta()),
- Not(inner) => Criteria(inner).need_meta(),
- Or(left, right) => Criteria(left).need_meta() || Criteria(right).need_meta(),
- _ => false,
+ Larger(_) | Smaller(_) => QueryScope::Partial,
+ // Text and Body require that we fetch the full content!
+ Text(_) | Body(_) => QueryScope::Full,
+
+ _ => QueryScope::Index,
+ }
+ }
+
+ /// Returns emails that we now for sure we want to keep
+ /// but also a second list of emails we need to investigate further by
+ /// fetching some remote data
+ pub fn filter_on_idx<'b>(
+ &self,
+ midx_list: &[&'b MailIndex<'b>],
+ ) -> (Vec<&'b MailIndex<'b>>, Vec<&'b MailIndex<'b>>) {
+ let (p1, p2): (Vec<_>, Vec<_>) = midx_list
+ .iter()
+ .map(|x| (x, self.is_keep_on_idx(x)))
+ .filter(|(_midx, decision)| decision.is_keep())
+ .map(|(midx, decision)| (*midx, decision))
+ .partition(|(_midx, decision)| matches!(decision, PartialDecision::Keep));
+
+ let to_keep = p1.into_iter().map(|(v, _)| v).collect();
+ let to_fetch = p2.into_iter().map(|(v, _)| v).collect();
+ (to_keep, to_fetch)
+ }
+
+ pub fn filter_on_query<'b>(
+ &self,
+ midx_list: &[&'b MailIndex<'b>],
+ query_result: &'b Vec<QueryResult<'b>>,
+ ) -> Result<Vec<&'b MailIndex<'b>>> {
+ Ok(midx_list
+ .iter()
+ .zip(query_result.iter())
+ .map(|(midx, qr)| MailView::new(qr, midx))
+ .collect::<Result<Vec<_>, _>>()?
+ .into_iter()
+ .filter(|mail_view| self.is_keep_on_query(mail_view))
+ .map(|mail_view| mail_view.in_idx)
+ .collect())
+ }
+
+ // ----
+
+ /// Here we are doing a partial filtering: we do not have access
+ /// to the headers or to the body, so every time we encounter a rule
+ /// based on them, we need to keep it.
+ ///
+ /// @TODO Could be optimized on a per-email basis by also returning the QueryScope
+ /// when more information is needed!
+ fn is_keep_on_idx(&self, midx: &MailIndex) -> PartialDecision {
+ use SearchKey::*;
+ match self.0 {
+ // Combinator logic
+ And(expr_list) => expr_list
+ .as_ref()
+ .iter()
+ .fold(PartialDecision::Keep, |acc, cur| {
+ acc.and(&Criteria(cur).is_keep_on_idx(midx))
+ }),
+ Or(left, right) => {
+ let left_decision = Criteria(left).is_keep_on_idx(midx);
+ let right_decision = Criteria(right).is_keep_on_idx(midx);
+ left_decision.or(&right_decision)
+ }
+ Not(expr) => Criteria(expr).is_keep_on_idx(midx).not(),
+ All => PartialDecision::Keep,
+
+ // Sequence logic
+ maybe_seq if is_sk_seq(maybe_seq) => is_keep_seq(maybe_seq, midx).into(),
+ maybe_flag if is_sk_flag(maybe_flag) => is_keep_flag(maybe_flag, midx).into(),
+
+ // All the stuff we can't evaluate yet
+ Bcc(_) | Cc(_) | From(_) | Header(..) | SentBefore(_) | SentOn(_) | SentSince(_)
+ | Subject(_) | To(_) | Before(_) | On(_) | Since(_) | Larger(_) | Smaller(_)
+ | Text(_) | Body(_) => PartialDecision::Postpone,
+
+ unknown => {
+ tracing::error!("Unknown filter {:?}", unknown);
+ PartialDecision::Discard
+ }
}
}
- pub fn need_body(&self) -> bool {
+ /// @TODO we re-eveluate twice the same logic. The correct way would be, on each pass,
+ /// to simplify the searck query, by removing the elements that were already checked.
+ /// For example if we have AND(OR(seqid(X), body(Y)), body(X)), we can't keep for sure
+ /// the email, as body(x) might be false. So we need to check it. But as seqid(x) is true,
+ /// we could simplify the request to just body(x) and truncate the first OR. Today, we are
+ /// not doing that, and thus we reevaluate everything.
+ fn is_keep_on_query(&self, mail_view: &MailView) -> bool {
use SearchKey::*;
match self.0 {
- Text(_) | Body(_) => true,
- And(and_list) => and_list.as_ref().iter().any(|sk| Criteria(sk).need_body()),
- Not(inner) => Criteria(inner).need_body(),
- Or(left, right) => Criteria(left).need_body() || Criteria(right).need_body(),
- _ => false,
+ // Combinator logic
+ And(expr_list) => expr_list
+ .as_ref()
+ .iter()
+ .all(|cur| Criteria(cur).is_keep_on_query(mail_view)),
+ Or(left, right) => {
+ Criteria(left).is_keep_on_query(mail_view)
+ || Criteria(right).is_keep_on_query(mail_view)
+ }
+ Not(expr) => !Criteria(expr).is_keep_on_query(mail_view),
+ All => true,
+
+ // Reevaluating our previous logic...
+ maybe_seq if is_sk_seq(maybe_seq) => is_keep_seq(maybe_seq, &mail_view.in_idx),
+ maybe_flag if is_sk_flag(maybe_flag) => is_keep_flag(maybe_flag, &mail_view.in_idx),
+
+ // Filter on mail meta
+ Before(search_naive) => match mail_view.stored_naive_date() {
+ Ok(msg_naive) => &msg_naive < search_naive.as_ref(),
+ _ => false,
+ },
+ On(search_naive) => match mail_view.stored_naive_date() {
+ Ok(msg_naive) => &msg_naive == search_naive.as_ref(),
+ _ => false,
+ },
+ Since(search_naive) => match mail_view.stored_naive_date() {
+ Ok(msg_naive) => &msg_naive > search_naive.as_ref(),
+ _ => false,
+ },
+
+ // Message size is also stored in MailMeta
+ Larger(size_ref) => {
+ mail_view
+ .query_result
+ .metadata()
+ .expect("metadata were fetched")
+ .rfc822_size
+ > *size_ref as usize
+ }
+ Smaller(size_ref) => {
+ mail_view
+ .query_result
+ .metadata()
+ .expect("metadata were fetched")
+ .rfc822_size
+ < *size_ref as usize
+ }
+
+ // Filter on well-known headers
+ Bcc(txt) => mail_view.is_header_contains_pattern(&b"bcc"[..], txt.as_ref()),
+ Cc(txt) => mail_view.is_header_contains_pattern(&b"cc"[..], txt.as_ref()),
+ From(txt) => mail_view.is_header_contains_pattern(&b"from"[..], txt.as_ref()),
+ Subject(txt) => mail_view.is_header_contains_pattern(&b"subject"[..], txt.as_ref()),
+ To(txt) => mail_view.is_header_contains_pattern(&b"to"[..], txt.as_ref()),
+ Header(hdr, txt) => mail_view.is_header_contains_pattern(hdr.as_ref(), txt.as_ref()),
+
+ // Filter on Date header
+ SentBefore(search_naive) => mail_view
+ .imf()
+ .map(|imf| imf.naive_date().ok())
+ .flatten()
+ .map(|msg_naive| &msg_naive < search_naive.as_ref())
+ .unwrap_or(false),
+ SentOn(search_naive) => mail_view
+ .imf()
+ .map(|imf| imf.naive_date().ok())
+ .flatten()
+ .map(|msg_naive| &msg_naive == search_naive.as_ref())
+ .unwrap_or(false),
+ SentSince(search_naive) => mail_view
+ .imf()
+ .map(|imf| imf.naive_date().ok())
+ .flatten()
+ .map(|msg_naive| &msg_naive > search_naive.as_ref())
+ .unwrap_or(false),
+
+ // Filter on the full content of the email
+ Text(txt) => mail_view
+ .content
+ .as_msg()
+ .map(|msg| {
+ msg.raw_part
+ .windows(txt.as_ref().len())
+ .any(|win| win == txt.as_ref())
+ })
+ .unwrap_or(false),
+ Body(txt) => mail_view
+ .content
+ .as_msg()
+ .map(|msg| {
+ msg.raw_body
+ .windows(txt.as_ref().len())
+ .any(|win| win == txt.as_ref())
+ })
+ .unwrap_or(false),
+
+ unknown => {
+ tracing::error!("Unknown filter {:?}", unknown);
+ false
+ }
}
}
}
+// ---- Sequence things ----
fn sequence_set_all() -> SequenceSet {
SequenceSet::from(Sequence::Range(
SeqOrUid::Value(NonZeroU32::MIN),
@@ -128,3 +330,131 @@ fn approx_sequence_size(seq: &Sequence) -> u64 {
}
}
}
+
+// --- Partial decision things ----
+
+enum PartialDecision {
+ Keep,
+ Discard,
+ Postpone,
+}
+impl From<bool> for PartialDecision {
+ fn from(x: bool) -> Self {
+ match x {
+ true => PartialDecision::Keep,
+ _ => PartialDecision::Discard,
+ }
+ }
+}
+impl PartialDecision {
+ fn not(&self) -> Self {
+ match self {
+ Self::Keep => Self::Discard,
+ Self::Discard => Self::Keep,
+ Self::Postpone => Self::Postpone,
+ }
+ }
+
+ fn or(&self, other: &Self) -> Self {
+ match (self, other) {
+ (Self::Keep, _) | (_, Self::Keep) => Self::Keep,
+ (Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone,
+ (Self::Discard, Self::Discard) => Self::Discard,
+ }
+ }
+
+ fn and(&self, other: &Self) -> Self {
+ match (self, other) {
+ (Self::Discard, _) | (_, Self::Discard) => Self::Discard,
+ (Self::Postpone, _) | (_, Self::Postpone) => Self::Postpone,
+ (Self::Keep, Self::Keep) => Self::Keep,
+ }
+ }
+
+ fn is_keep(&self) -> bool {
+ !matches!(self, Self::Discard)
+ }
+}
+
+// ----- Search Key things ---
+fn is_sk_flag(sk: &SearchKey) -> bool {
+ use SearchKey::*;
+ match sk {
+ Answered | Deleted | Draft | Flagged | Keyword(..) | New | Old | Recent | Seen
+ | Unanswered | Undeleted | Undraft | Unflagged | Unkeyword(..) | Unseen => true,
+ _ => false,
+ }
+}
+
+fn is_keep_flag(sk: &SearchKey, midx: &MailIndex) -> bool {
+ use SearchKey::*;
+ match sk {
+ Answered => midx.is_flag_set("\\Answered"),
+ Deleted => midx.is_flag_set("\\Deleted"),
+ Draft => midx.is_flag_set("\\Draft"),
+ Flagged => midx.is_flag_set("\\Flagged"),
+ Keyword(kw) => midx.is_flag_set(kw.inner()),
+ New => {
+ let is_recent = midx.is_flag_set("\\Recent");
+ let is_seen = midx.is_flag_set("\\Seen");
+ is_recent && !is_seen
+ }
+ Old => {
+ let is_recent = midx.is_flag_set("\\Recent");
+ !is_recent
+ }
+ Recent => midx.is_flag_set("\\Recent"),
+ Seen => midx.is_flag_set("\\Seen"),
+ Unanswered => {
+ let is_answered = midx.is_flag_set("\\Recent");
+ !is_answered
+ }
+ Undeleted => {
+ let is_deleted = midx.is_flag_set("\\Deleted");
+ !is_deleted
+ }
+ Undraft => {
+ let is_draft = midx.is_flag_set("\\Draft");
+ !is_draft
+ }
+ Unflagged => {
+ let is_flagged = midx.is_flag_set("\\Flagged");
+ !is_flagged
+ }
+ Unkeyword(kw) => {
+ let is_keyword_set = midx.is_flag_set(kw.inner());
+ !is_keyword_set
+ }
+ Unseen => {
+ let is_seen = midx.is_flag_set("\\Seen");
+ !is_seen
+ }
+
+ // Not flag logic
+ _ => unreachable!(),
+ }
+}
+
+fn is_sk_seq(sk: &SearchKey) -> bool {
+ use SearchKey::*;
+ match sk {
+ SequenceSet(..) | Uid(..) => true,
+ _ => false,
+ }
+}
+fn is_keep_seq(sk: &SearchKey, midx: &MailIndex) -> bool {
+ use SearchKey::*;
+ match sk {
+ SequenceSet(seq_set) => seq_set
+ .0
+ .as_ref()
+ .iter()
+ .any(|seq| midx.is_in_sequence_i(seq)),
+ Uid(seq_set) => seq_set
+ .0
+ .as_ref()
+ .iter()
+ .any(|seq| midx.is_in_sequence_uid(seq)),
+ _ => unreachable!(),
+ }
+}