Merge pull request 'Implement search' (#61) from feat/search into main

Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/aerogramme/pulls/61
author: Quentin <quentin@dufour.io> 2024-01-08 10:39:26 +0000
committer: Quentin <quentin@dufour.io> 2024-01-08 10:39:26 +0000
commit: d7788e29a8a64550e9b274001ff3fb9a7bf3473b (patch)
tree: e43a11753472f1917ce4aa6ddba24ae3a513bd50 /src/imap/index.rs
parent: 152d5b7604337fe19a7aea7fc37b3d4615ca7393 (diff)
parent: 42a54b2c500294c594f3efdd25db28c18f5ac238 (diff)
download: aerogramme-d7788e29a8a64550e9b274001ff3fb9a7bf3473b.tar.gz
aerogramme-d7788e29a8a64550e9b274001ff3fb9a7bf3473b.zip
1 files changed, 154 insertions, 68 deletions
diff --git a/src/imap/index.rs b/src/imap/index.rs
index 01dd2ef..3ca5562 100644
--- a/src/imap/index.rs
+++ b/src/imap/index.rs
@@ -1,95 +1,181 @@
 use std::num::NonZeroU32;
 
-use anyhow::{anyhow, bail, Result};
-use imap_codec::imap_types::sequence::{self, SequenceSet};
+use anyhow::{anyhow, Context, Result};
+use imap_codec::imap_types::sequence::{self, SeqOrUid, Sequence, SequenceSet};
 
 use crate::mail::uidindex::{ImapUid, UidIndex};
 use crate::mail::unique_ident::UniqueIdent;
 
-pub struct Index<'a>(pub &'a UidIndex);
+pub struct Index<'a> {
+    pub imap_index: Vec<MailIndex<'a>>,
+    pub internal: &'a UidIndex,
+}
 impl<'a> Index<'a> {
-    pub fn fetch(
-        self: &Index<'a>,
-        sequence_set: &SequenceSet,
-        by_uid: bool,
-    ) -> Result<Vec<MailIndex<'a>>> {
-        let mail_vec = self
-            .0
+    pub fn new(internal: &'a UidIndex) -> Result<Self> {
+        let imap_index = internal
             .idx_by_uid
             .iter()
-            .map(|(uid, uuid)| (*uid, *uuid))
-            .collect::<Vec<_>>();
+            .enumerate()
+            .map(|(i_enum, (&uid, &uuid))| {
+                let flags = internal
+                    .table
+                    .get(&uuid)
+                    .ok_or(anyhow!("mail is missing from index"))?
+                    .1
+                    .as_ref();
+                let i_int: u32 = (i_enum + 1).try_into()?;
+                let i: NonZeroU32 = i_int.try_into()?;
 
-        let mut mails = vec![];
+                Ok(MailIndex {
+                    i,
+                    uid,
+                    uuid,
+                    flags,
+                })
+            })
+            .collect::<Result<Vec<_>>>()?;
 
-        if by_uid {
-            if mail_vec.is_empty() {
-                return Ok(vec![]);
-            }
-            let iter_strat = sequence::Strategy::Naive {
-                largest: mail_vec.last().unwrap().0,
-            };
+        Ok(Self {
+            imap_index,
+            internal,
+        })
+    }
 
-            let mut i = 0;
-            for uid in sequence_set.iter(iter_strat) {
-                while mail_vec.get(i).map(|mail| mail.0 < uid).unwrap_or(false) {
-                    i += 1;
-                }
-                if let Some(mail) = mail_vec.get(i) {
-                    if mail.0 == uid {
-                        mails.push(MailIndex {
-                            i: NonZeroU32::try_from(i as u32 + 1).unwrap(),
-                            uid: mail.0,
-                            uuid: mail.1,
-                            flags: self
-                                .0
-                                .table
-                                .get(&mail.1)
-                                .ok_or(anyhow!("mail is missing from index"))?
-                                .1
-                                .as_ref(),
-                        });
-                    }
-                } else {
-                    break;
-                }
-            }
-        } else {
-            if mail_vec.is_empty() {
-                bail!("No such message (mailbox is empty)");
-            }
+    pub fn last(&'a self) -> Option<&'a MailIndex<'a>> {
+        self.imap_index.last()
+    }
+
+    /// Fetch mail descriptors based on a sequence of UID
+    ///
+    /// Complexity analysis:
+    ///  - Sort is O(n * log n) where n is the number of uid generated by the sequence
+    ///  - Finding the starting point in the index O(log m) where m is the size of the mailbox
+    /// While n =< m, it's not clear if the difference is big or not.
+    ///
+    /// For now, the algorithm tries to be fast for small values of n,
+    /// as it is what is expected by clients.
+    ///
+    /// So we assume for our implementation that : n << m.
+    /// It's not true for full mailbox searches for example...
+    pub fn fetch_on_uid(&'a self, sequence_set: &SequenceSet) -> Vec<&'a MailIndex<'a>> {
+        if self.imap_index.is_empty() {
+            return vec![];
+        }
+        let iter_strat = sequence::Strategy::Naive {
+            largest: self.last().expect("imap index is not empty").uid,
+        };
+        let mut unroll_seq = sequence_set.iter(iter_strat).collect::<Vec<_>>();
+        unroll_seq.sort();
+
+        let start_seq = match unroll_seq.iter().next() {
+            Some(elem) => elem,
+            None => return vec![],
+        };
+
+        // Quickly jump to the right point in the mailbox vector O(log m) instead
+        // of iterating one by one O(m). Works only because both unroll_seq & imap_index are sorted per uid.
+        let mut imap_idx = {
+            let start_idx = self
+                .imap_index
+                .partition_point(|mail_idx| &mail_idx.uid < start_seq);
+            &self.imap_index[start_idx..]
+        };
+        println!(
+            "win: {:?}",
+            imap_idx.iter().map(|midx| midx.uid).collect::<Vec<_>>()
+        );
 
-            let iter_strat = sequence::Strategy::Naive {
-                largest: NonZeroU32::try_from((mail_vec.len()) as u32).unwrap(),
+        let mut acc = vec![];
+        for wanted_uid in unroll_seq.iter() {
+            // Slide the window forward as long as its first element is lower than our wanted uid.
+            let start_idx = match imap_idx.iter().position(|midx| &midx.uid >= wanted_uid) {
+                Some(v) => v,
+                None => break,
             };
+            imap_idx = &imap_idx[start_idx..];
 
-            for i in sequence_set.iter(iter_strat) {
-                if let Some(mail) = mail_vec.get(i.get() as usize - 1) {
-                    mails.push(MailIndex {
-                        i,
-                        uid: mail.0,
-                        uuid: mail.1,
-                        flags: self
-                            .0
-                            .table
-                            .get(&mail.1)
-                            .ok_or(anyhow!("mail is missing from index"))?
-                            .1
-                            .as_ref(),
-                    });
-                } else {
-                    bail!("No such mail: {}", i);
-                }
+            // If the beginning of our new window is the uid we want, we collect it
+            if &imap_idx[0].uid == wanted_uid {
+                acc.push(&imap_idx[0]);
             }
         }
 
-        Ok(mails)
+        acc
+    }
+
+    pub fn fetch_on_id(&'a self, sequence_set: &SequenceSet) -> Result<Vec<&'a MailIndex<'a>>> {
+        let iter_strat = sequence::Strategy::Naive {
+            largest: self.last().context("The mailbox is empty")?.uid,
+        };
+        sequence_set
+            .iter(iter_strat)
+            .map(|wanted_id| {
+                self.imap_index
+                    .get((wanted_id.get() as usize) - 1)
+                    .ok_or(anyhow!("Mail not found"))
+            })
+            .collect::<Result<Vec<_>>>()
+    }
+
+    pub fn fetch(
+        self: &'a Index<'a>,
+        sequence_set: &SequenceSet,
+        by_uid: bool,
+    ) -> Result<Vec<&'a MailIndex<'a>>> {
+        match by_uid {
+            true => Ok(self.fetch_on_uid(sequence_set)),
+            _ => self.fetch_on_id(sequence_set),
+        }
     }
 }
 
+#[derive(Clone, Debug)]
 pub struct MailIndex<'a> {
     pub i: NonZeroU32,
     pub uid: ImapUid,
     pub uuid: UniqueIdent,
     pub flags: &'a Vec<String>,
 }
+
+impl<'a> MailIndex<'a> {
+    // The following functions are used to implement the SEARCH command
+    pub fn is_in_sequence_i(&self, seq: &Sequence) -> bool {
+        match seq {
+            Sequence::Single(SeqOrUid::Asterisk) => true,
+            Sequence::Single(SeqOrUid::Value(target)) => target == &self.i,
+            Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x))
+            | Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.i,
+            Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => {
+                if x1 < x2 {
+                    x1 <= &self.i && &self.i <= x2
+                } else {
+                    x1 >= &self.i && &self.i >= x2
+                }
+            }
+            Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true,
+        }
+    }
+
+    pub fn is_in_sequence_uid(&self, seq: &Sequence) -> bool {
+        match seq {
+            Sequence::Single(SeqOrUid::Asterisk) => true,
+            Sequence::Single(SeqOrUid::Value(target)) => target == &self.uid,
+            Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Value(x))
+            | Sequence::Range(SeqOrUid::Value(x), SeqOrUid::Asterisk) => x <= &self.uid,
+            Sequence::Range(SeqOrUid::Value(x1), SeqOrUid::Value(x2)) => {
+                if x1 < x2 {
+                    x1 <= &self.uid && &self.uid <= x2
+                } else {
+                    x1 >= &self.uid && &self.uid >= x2
+                }
+            }
+            Sequence::Range(SeqOrUid::Asterisk, SeqOrUid::Asterisk) => true,
+        }
+    }
+
+    pub fn is_flag_set(&self, flag: &str) -> bool {
+        self.flags
+            .iter()
+            .any(|candidate| candidate.as_str() == flag)
+    }
+}
author	Quentin <quentin@dufour.io>	2024-01-08 10:39:26 +0000
committer	Quentin <quentin@dufour.io>	2024-01-08 10:39:26 +0000
commit	d7788e29a8a64550e9b274001ff3fb9a7bf3473b (patch)
tree	e43a11753472f1917ce4aa6ddba24ae3a513bd50 /src/imap/index.rs
parent	152d5b7604337fe19a7aea7fc37b3d4615ca7393 (diff)
parent	42a54b2c500294c594f3efdd25db28c18f5ac238 (diff)
download	aerogramme-d7788e29a8a64550e9b274001ff3fb9a7bf3473b.tar.gz aerogramme-d7788e29a8a64550e9b274001ff3fb9a7bf3473b.zip