From 558e32fbd27be9a81144571b4baf318293be1344 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Mon, 8 Jan 2024 11:13:13 +0100 Subject: UID sequence are now correctly fetched --- src/imap/index.rs | 156 +++++++++++++++++++--------------- src/imap/mail_view.rs | 4 +- src/imap/mailbox_view.rs | 23 +++-- src/imap/search.rs | 12 +-- src/mail/snapshot.rs | 2 - tests/instrumentation/mbox-to-imap.py | 2 + 6 files changed, 114 insertions(+), 85 deletions(-) diff --git a/src/imap/index.rs b/src/imap/index.rs index 8ec3cca..9adf8b2 100644 --- a/src/imap/index.rs +++ b/src/imap/index.rs @@ -1,93 +1,113 @@ use std::num::NonZeroU32; -use anyhow::{anyhow, bail, Result}; +use anyhow::{anyhow, Context, Result}; use imap_codec::imap_types::sequence::{self, SeqOrUid, Sequence, SequenceSet}; use crate::mail::uidindex::{ImapUid, UidIndex}; use crate::mail::unique_ident::UniqueIdent; -pub struct Index<'a>(pub &'a UidIndex); +pub struct Index<'a> { + pub imap_index: Vec>, + pub internal: &'a UidIndex, +} impl<'a> Index<'a> { - pub fn fetch( - self: &Index<'a>, - sequence_set: &SequenceSet, - by_uid: bool, - ) -> Result>> { - let mail_vec = self - .0 + pub fn new(internal: &'a UidIndex) -> Result { + let imap_index = internal .idx_by_uid .iter() - .map(|(uid, uuid)| (*uid, *uuid)) - .collect::>(); + .enumerate() + .map(|(i_enum, (&uid, &uuid))| { + let flags = internal.table.get(&uuid).ok_or(anyhow!("mail is missing from index"))?.1.as_ref(); + let i_int: u32 = (i_enum + 1).try_into()?; + let i: NonZeroU32 = i_int.try_into()?; - let mut mails = vec![]; + Ok(MailIndex { i, uid, uuid, flags }) + }) + .collect::>>()?; - if by_uid { - if mail_vec.is_empty() { - return Ok(vec![]); - } - let iter_strat = sequence::Strategy::Naive { - largest: mail_vec.last().unwrap().0, - }; + Ok(Self { imap_index, internal }) + } - let mut i = 0; - for uid in sequence_set.iter(iter_strat) { - while mail_vec.get(i).map(|mail| mail.0 < uid).unwrap_or(false) { - i += 1; - } - if let Some(mail) = mail_vec.get(i) { - if mail.0 == uid { - mails.push(MailIndex { - i: NonZeroU32::try_from(i as u32 + 1).unwrap(), - uid: mail.0, - uuid: mail.1, - flags: self - .0 - .table - .get(&mail.1) - .ok_or(anyhow!("mail is missing from index"))? - .1 - .as_ref(), - }); - } - } else { - break; - } - } - } else { - if mail_vec.is_empty() { - bail!("No such message (mailbox is empty)"); - } + pub fn last(&'a self) -> Option<&'a MailIndex<'a>> { + self.imap_index.last() + } - let iter_strat = sequence::Strategy::Naive { - largest: NonZeroU32::try_from((mail_vec.len()) as u32).unwrap(), + /// Fetch mail descriptors based on a sequence of UID + /// + /// Complexity analysis: + /// - Sort is O(n * log n) where n is the number of uid generated by the sequence + /// - Finding the starting point in the index O(log m) where m is the size of the mailbox + /// While n =< m, it's not clear if the difference is big or not. + /// + /// For now, the algorithm tries to be fast for small values of n, + /// as it is what is expected by clients. + /// + /// So we assume for our implementation that : n << m. + /// It's not true for full mailbox searches for example... + pub fn fetch_on_uid(&'a self, sequence_set: &SequenceSet) -> Vec<&'a MailIndex<'a>> { + if self.imap_index.is_empty() { + return vec![]; + } + let iter_strat = sequence::Strategy::Naive { + largest: self.last().expect("imap index is not empty").uid, + }; + let mut unroll_seq = sequence_set.iter(iter_strat).collect::>(); + unroll_seq.sort(); + + let start_seq = match unroll_seq.iter().next() { + Some(elem) => elem, + None => return vec![], + }; + + // Quickly jump to the right point in the mailbox vector O(log m) instead + // of iterating one by one O(m). Works only because both unroll_seq & imap_index are sorted per uid. + let mut imap_idx = { + let start_idx = self.imap_index.partition_point(|mail_idx| &mail_idx.uid < start_seq); + &self.imap_index[start_idx..] + }; + println!("win: {:?}", imap_idx.iter().map(|midx| midx.uid).collect::>()); + + let mut acc = vec![]; + for wanted_uid in unroll_seq.iter() { + // Slide the window forward as long as its first element is lower than our wanted uid. + let start_idx = match imap_idx.iter().position(|midx| &midx.uid >= wanted_uid) { + Some(v) => v, + None => break, }; + imap_idx = &imap_idx[start_idx..]; - for i in sequence_set.iter(iter_strat) { - if let Some(mail) = mail_vec.get(i.get() as usize - 1) { - mails.push(MailIndex { - i, - uid: mail.0, - uuid: mail.1, - flags: self - .0 - .table - .get(&mail.1) - .ok_or(anyhow!("mail is missing from index"))? - .1 - .as_ref(), - }); - } else { - bail!("No such mail: {}", i); - } + // If the beginning of our new window is the uid we want, we collect it + if &imap_idx[0].uid == wanted_uid { + acc.push(&imap_idx[0]); } } - Ok(mails) + acc + } + + pub fn fetch_on_id(&'a self, sequence_set: &SequenceSet) -> Result>> { + let iter_strat = sequence::Strategy::Naive { + largest: self.last().context("The mailbox is empty")?.uid, + }; + sequence_set + .iter(iter_strat) + .map(|wanted_id| self.imap_index.get((wanted_id.get() as usize) - 1).ok_or(anyhow!("Mail not found"))) + .collect::>>() + } + + pub fn fetch( + self: &'a Index<'a>, + sequence_set: &SequenceSet, + by_uid: bool, + ) -> Result>> { + match by_uid { + true => Ok(self.fetch_on_uid(sequence_set)), + _ => self.fetch_on_id(sequence_set), + } } } -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct MailIndex<'a> { pub i: NonZeroU32, pub uid: ImapUid, diff --git a/src/imap/mail_view.rs b/src/imap/mail_view.rs index 2c8723e..8dd68b5 100644 --- a/src/imap/mail_view.rs +++ b/src/imap/mail_view.rs @@ -26,13 +26,13 @@ use crate::imap::mime_view; use crate::imap::response::Body; pub struct MailView<'a> { - pub in_idx: MailIndex<'a>, + pub in_idx: &'a MailIndex<'a>, pub query_result: &'a QueryResult<'a>, pub content: FetchedMail<'a>, } impl<'a> MailView<'a> { - pub fn new(query_result: &'a QueryResult<'a>, in_idx: MailIndex<'a>) -> Result> { + pub fn new(query_result: &'a QueryResult<'a>, in_idx: &'a MailIndex<'a>) -> Result> { Ok(Self { in_idx, query_result, diff --git a/src/imap/mailbox_view.rs b/src/imap/mailbox_view.rs index 2841b7b..62ea6d2 100644 --- a/src/imap/mailbox_view.rs +++ b/src/imap/mailbox_view.rs @@ -146,7 +146,8 @@ impl MailboxView { let flags = flags.iter().map(|x| x.to_string()).collect::>(); - let mails = self.index().fetch(sequence_set, *is_uid_store)?; + let idx = self.index()?; + let mails = idx.fetch(sequence_set, *is_uid_store)?; for mi in mails.iter() { match kind { StoreType::Add => { @@ -189,7 +190,8 @@ impl MailboxView { to: Arc, is_uid_copy: &bool, ) -> Result<(ImapUidvalidity, Vec<(ImapUid, ImapUid)>)> { - let mails = self.index().fetch(sequence_set, *is_uid_copy)?; + let idx = self.index()?; + let mails = idx.fetch(sequence_set, *is_uid_copy)?; let mut new_uuids = vec![]; for mi in mails.iter() { @@ -216,7 +218,8 @@ impl MailboxView { to: Arc, is_uid_copy: &bool, ) -> Result<(ImapUidvalidity, Vec<(ImapUid, ImapUid)>, Vec>)> { - let mails = self.index().fetch(sequence_set, *is_uid_copy)?; + let idx = self.index()?; + let mails = idx.fetch(sequence_set, *is_uid_copy)?; for mi in mails.iter() { to.move_from(&self.0.mailbox, mi.uuid).await?; @@ -254,7 +257,8 @@ impl MailboxView { true => QueryScope::Full, _ => QueryScope::Partial, }; - let mail_idx_list = self.index().fetch(sequence_set, *is_uid_fetch)?; + let idx = self.index()?; + let mail_idx_list = idx.fetch(sequence_set, *is_uid_fetch)?; // [2/6] Fetch the emails let uuids = mail_idx_list @@ -316,7 +320,8 @@ impl MailboxView { let (seq_set, seq_type) = crit.to_sequence_set(); // 2. Get the selection - let selection = self.index().fetch(&seq_set, seq_type.is_uid())?; + let idx = self.index()?; + let selection = idx.fetch(&seq_set, seq_type.is_uid())?; // 3. Filter the selection based on the ID / UID / Flags let (kept_idx, to_fetch) = crit.filter_on_idx(&selection); @@ -341,8 +346,12 @@ impl MailboxView { } // ---- - fn index<'a>(&'a self) -> Index<'a> { - Index(&self.0.snapshot) + /// @FIXME index should be stored for longer than a single request + /// Instead they should be tied to the FrozenMailbox refresh + /// It's not trivial to refactor the code to do that, so we are doing + /// some useless computation for now... + fn index<'a>(&'a self) -> Result> { + Index::new(&self.0.snapshot) } /// Produce an OK [UIDVALIDITY _] message corresponding to `known_state` diff --git a/src/imap/search.rs b/src/imap/search.rs index 2b0b34b..22afd0c 100644 --- a/src/imap/search.rs +++ b/src/imap/search.rs @@ -117,13 +117,13 @@ impl<'a> Criteria<'a> { /// fetching some remote data pub fn filter_on_idx<'b>( &self, - midx_list: &[MailIndex<'b>], - ) -> (Vec>, Vec>) { + midx_list: &[&'b MailIndex<'b>], + ) -> (Vec<&'b MailIndex<'b>>, Vec<&'b MailIndex<'b>>) { let (p1, p2): (Vec<_>, Vec<_>) = midx_list .iter() .map(|x| (x, self.is_keep_on_idx(x))) .filter(|(_midx, decision)| decision.is_keep()) - .map(|(midx, decision)| ((*midx).clone(), decision)) + .map(|(midx, decision)| (*midx, decision)) .partition(|(_midx, decision)| matches!(decision, PartialDecision::Keep)); let to_keep = p1.into_iter().map(|(v, _)| v).collect(); @@ -133,13 +133,13 @@ impl<'a> Criteria<'a> { pub fn filter_on_query<'b>( &self, - midx_list: &[MailIndex<'b>], + midx_list: &[&'b MailIndex<'b>], query_result: &'b Vec>, - ) -> Result>> { + ) -> Result>> { Ok(midx_list .iter() .zip(query_result.iter()) - .map(|(midx, qr)| MailView::new(qr, midx.clone())) + .map(|(midx, qr)| MailView::new(qr, midx)) .collect::, _>>()? .into_iter() .filter(|mail_view| self.is_keep_on_query(mail_view)) diff --git a/src/mail/snapshot.rs b/src/mail/snapshot.rs index 0834f09..ed756b5 100644 --- a/src/mail/snapshot.rs +++ b/src/mail/snapshot.rs @@ -11,8 +11,6 @@ use super::unique_ident::UniqueIdent; /// state that is desynchronized with the real mailbox state. /// It's up to the user to choose when their snapshot must be updated /// to give useful information to their clients -/// -/// pub struct FrozenMailbox { pub mailbox: Arc, pub snapshot: UidIndex, diff --git a/tests/instrumentation/mbox-to-imap.py b/tests/instrumentation/mbox-to-imap.py index 63ff391..79e8b41 100644 --- a/tests/instrumentation/mbox-to-imap.py +++ b/tests/instrumentation/mbox-to-imap.py @@ -25,6 +25,8 @@ if args.tls: else: imap = IMAP4 + +print(args) with imap(host=args.host, port=args.port) as M: print(M.login(args.user, args.password)) print(M.select(args.mailbox)) -- cgit v1.2.3