diff options
author | Quentin <quentin@dufour.io> | 2024-01-08 10:39:26 +0000 |
---|---|---|
committer | Quentin <quentin@dufour.io> | 2024-01-08 10:39:26 +0000 |
commit | d7788e29a8a64550e9b274001ff3fb9a7bf3473b (patch) | |
tree | e43a11753472f1917ce4aa6ddba24ae3a513bd50 /src/imap/index.rs | |
parent | 152d5b7604337fe19a7aea7fc37b3d4615ca7393 (diff) | |
parent | 42a54b2c500294c594f3efdd25db28c18f5ac238 (diff) | |
download | aerogramme-d7788e29a8a64550e9b274001ff3fb9a7bf3473b.tar.gz aerogramme-d7788e29a8a64550e9b274001ff3fb9a7bf3473b.zip |
Merge pull request 'Implement search' (#61) from feat/search into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/aerogramme/pulls/61
Diffstat (limited to 'src/imap/index.rs')
-rw-r--r-- | src/imap/index.rs | 222 |
1 files changed, 154 insertions, 68 deletions
diff --git a/src/imap/index.rs b/src/imap/index.rs index 01dd2ef..3ca5562 100644 --- a/src/imap/index.rs +++ b/src/imap/index.rs @@ -1,95 +1,181 @@ use std::num::NonZeroU32; -use anyhow::{anyhow, bail, Result}; -use imap_codec::imap_types::sequence::{self, SequenceSet}; +use anyhow::{anyhow, Context, Result}; +use imap_codec::imap_types::sequence::{self, SeqOrUid, Sequence, SequenceSet}; use crate::mail::uidindex::{ImapUid, UidIndex}; use crate::mail::unique_ident::UniqueIdent; -pub struct Index<'a>(pub &'a UidIndex); +pub struct Index<'a> { + pub imap_index: Vec<MailIndex<'a>>, + pub internal: &'a UidIndex, +} impl<'a> Index<'a> { - pub fn fetch( - self: &Index<'a>, - sequence_set: &SequenceSet, - by_uid: bool, - ) -> Result<Vec<MailIndex<'a>>> { - let mail_vec = self - .0 + pub fn new(internal: &'a UidIndex) -> Result<Self> { + let imap_index = internal .idx_by_uid .iter() - .map(|(uid, uuid)| (*uid, *uuid)) - .collect::<Vec<_>>(); + .enumerate() + .map(|(i_enum, (&uid, &uuid))| { + let flags = internal + .table + .get(&uuid) + .ok_or(anyhow!("mail is missing from index"))? + .1 + .as_ref(); + let i_int: u32 = (i_enum + 1).try_into()?; + let i: NonZeroU32 = i_int.try_into()?; - let mut mails = vec![]; + Ok(MailIndex { + i, + uid, + uuid, + flags, + }) + }) + .collect::<Result<Vec<_>>>()?; - if by_uid { - if mail_vec.is_empty() { - return Ok(vec![]); - } - let iter_strat = sequence::Strategy::Naive { - largest: mail_vec.last().unwrap().0, - }; + Ok(Self { + imap_index, + internal, + }) + } - let mut i = 0; - for uid in sequence_set.iter(iter_strat) { - while mail_vec.get(i).map(|mail| mail.0 < uid).unwrap_or(false) { - i += 1; - } - if let Some(mail) = mail_vec.get(i) { - if mail.0 == uid { - mails.push(MailIndex { - i: NonZeroU32::try_from(i as u32 + 1).unwrap(), - uid: mail.0, - uuid: mail.1, - flags: self - .0 - .table - .get(&mail.1) - .ok_or(anyhow!("mail is missing from index"))? - .1 - .as_ref(), - }); - } - } else { - break; - } - } - } else { - if mail_vec.is_empty() { - bail!("No such message (mailbox is empty)"); - } + pub fn last(&'a self) -> Option<&'a MailIndex<'a>> { + self.imap_index.last() + } + + /// Fetch mail descriptors based on a sequence of UID + /// + /// Complexity analysis: + /// - Sort is O(n * log n) where n is the number of uid generated by the sequence + /// - Finding the starting point in the index O(log m) where m is the size of the mailbox + /// While n =< m, it's not clear if the difference is big or not. + /// + /// For now, the algorithm tries to be fast for small values of n, + /// as it is what is expected by clients. + /// + /// So we assume for our implementation that : n << m. + /// It's not true for full mailbox searches for example... + pub fn fetch_on_uid(&'a self, sequence_set: &SequenceSet) -> Vec<&'a MailIndex<'a>> { + if self.imap_index.is_empty() { + return vec![]; + } + let iter_strat = sequence::Strategy::Naive { + largest: self.last().expect("imap index is not empty").uid, + }; + let mut unroll_seq = sequence_set.iter(iter_strat).collect::<Vec<_>>(); + unroll_seq.sort(); + + let start_seq = match unroll_seq.iter().next() { + Some(elem) => elem, + None => return vec![], + }; + + // Quickly jump to the right point in the mailbox vector O(log m) instead + // of iterating one by one O(m). Works only because both unroll_seq & imap_index are sorted per uid. + let mut imap_idx = { + let start_idx = self + .imap_index + .partition_point(|mail_idx| &mail_idx.uid < start_seq); + &self.imap_index[start_idx..] + }; + println!( + "win: {:?}", + imap_idx.iter().map(|midx| midx.uid).collect::<Vec<_>>() + ); - let iter_strat = sequence::Strategy::Naive { - largest: NonZeroU32::try_from((mail_vec.len()) as u32).unwrap(), + let mut acc = vec![]; + for wanted_uid in unroll_seq.iter() { + // Slide the window forward as long as its first element is lower than our wanted uid. + let start_idx = match imap_idx.iter().position(|midx| &midx.uid >= wanted_uid) { + Some(v) => v, + None => break, }; + imap_idx = &imap_idx[start_idx..]; - for i in sequence_set.iter(iter_strat) { - if let Some(mail) = mail_vec.get(i.get() as usize - 1) { - mails.push(MailIndex { - i, - uid: mail.0, - uuid: mail.1, - flags: self - .0 - .table - .get(&mail.1) - .ok_or(anyhow!("mail is missing from index"))? - .1 - .as_ref(), - }); - } else { - bail!("No such mail: {}", i); - } + // If the beginning of our new window is the uid we want, we collect it + if &imap_idx[0].uid == wanted_uid { + acc.push(&imap_idx[0]); } } - Ok(mails) + acc + } + + pub fn fetch_on_id(&'a self, sequence_set: &SequenceSet) -> Result<Vec<&'a MailIndex<'a>>> { + let iter_strat = sequence::Strategy::Naive { + largest: self.last().context("The mailbox is empty")?.uid, + }; + sequence_set + .iter(iter_strat) + .map(|wanted_id| { + self.imap_index + .get((wanted_id.get() as usize) - 1) + .ok_or(anyhow!("Mail not found")) + }) + .collect::<Result<Vec<_>>>() + } + + pub fn fetch( + self: &'a Index<'a>, + sequence_set: &SequenceSet, + by_uid: bool, + ) -> Result<Vec<&'a MailIndex<'a>>> { + match by_uid { + true => Ok(self.fetch_on_uid(sequence_set)), + _ => self.fetch_on_id(sequence_set), + } } } +#[derive(Clone, Debug)] pub struct MailIndex<'a> { pub i: NonZeroU32, pub uid: ImapUid, pub uuid: UniqueIdent, pub flags: &'a Vec<String>, } + +impl<'a> MailIndex<'a> { + // The following functions are used to implement the SEARCH command + pub fn is_in_sequence_i(&self, seq: &Sequence) -> bool { + match seq { + Sequence::Single(SeqOrUid::Asterisk) => true, + Sequence::Single(SeqOrUid::Value(target)) => target == &self.i, + Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x)) + | Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.i, + Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => { + if x1 < x2 { + x1 <= &self.i && &self.i <= x2 + } else { + x1 >= &self.i && &self.i >= x2 + } + } + Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true, + } + } + + pub fn is_in_sequence_uid(&self, seq: &Sequence) -> bool { + match seq { + Sequence::Single(SeqOrUid::Asterisk) => true, + Sequence::Single(SeqOrUid::Value(target)) => target == &self.uid, + Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x)) + | Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.uid, + Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => { + if x1 < x2 { + x1 <= &self.uid && &self.uid <= x2 + } else { + x1 >= &self.uid && &self.uid >= x2 + } + } + Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true, + } + } + + pub fn is_flag_set(&self, flag: &str) -> bool { + self.flags + .iter() + .any(|candidate| candidate.as_str() == flag) + } +} |