Merge pull request 'Aerogramme refactoring' (#57) from feat/more-imap-qol into main

Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/aerogramme/pulls/57
author: Quentin <quentin@dufour.io> 2024-01-06 10:38:37 +0000
committer: Quentin <quentin@dufour.io> 2024-01-06 10:38:37 +0000
commit: 44ca458c5cf666246e472dea9be70b745a130e8c (patch)
tree: b78cca747e5c2bc004cb93b93536623f7abb6ef5 /src/imap/mime_view.rs
parent: bcf6de83419b405fea95b740869f98d43586ea7c (diff)
parent: 53dbf82cbce3cb17cbcffd09558677faf8702f54 (diff)
download: aerogramme-44ca458c5cf666246e472dea9be70b745a130e8c.tar.gz
aerogramme-44ca458c5cf666246e472dea9be70b745a130e8c.zip
1 files changed, 538 insertions, 0 deletions
diff --git a/src/imap/mime_view.rs b/src/imap/mime_view.rs
new file mode 100644
index 0000000..1f36c47
--- /dev/null
+++ b/src/imap/mime_view.rs
@@ -0,0 +1,538 @@
+use std::borrow::Cow;
+use std::collections::HashSet;
+use std::num::NonZeroU32;
+
+use anyhow::{anyhow, bail, Result};
+
+use imap_codec::imap_types::body::{BasicFields, Body as FetchBody, BodyStructure, SpecificFields};
+use imap_codec::imap_types::core::{AString, IString, NString, NonEmptyVec};
+use imap_codec::imap_types::fetch::{Part as FetchPart, Section as FetchSection};
+
+use eml_codec::{
+    header, mime, mime::r#type::Deductible, part::composite, part::discrete, part::AnyPart,
+};
+
+use crate::imap::imf_view::message_envelope;
+
+pub enum BodySection<'a> {
+    Full(Cow<'a, [u8]>),
+    Slice {
+        body: Cow<'a, [u8]>,
+        origin_octet: u32,
+    },
+}
+
+/// Logic for BODY[<section>]<<partial>>
+/// Works in 3 times:
+///  1. Find the section (RootMime::subset)
+///  2. Apply the extraction logic (SelectedMime::extract), like TEXT, HEADERS, etc.
+///  3. Keep only the given subset provided by partial
+///
+/// Example of message sections:
+///
+/// ```
+///    HEADER     ([RFC-2822] header of the message)
+///    TEXT       ([RFC-2822] text body of the message) MULTIPART/MIXED
+///    1          TEXT/PLAIN
+///    2          APPLICATION/OCTET-STREAM
+///    3          MESSAGE/RFC822
+///    3.HEADER   ([RFC-2822] header of the message)
+///    3.TEXT     ([RFC-2822] text body of the message) MULTIPART/MIXED
+///    3.1        TEXT/PLAIN
+///    3.2        APPLICATION/OCTET-STREAM
+///    4          MULTIPART/MIXED
+///    4.1        IMAGE/GIF
+///    4.1.MIME   ([MIME-IMB] header for the IMAGE/GIF)
+///    4.2        MESSAGE/RFC822
+///    4.2.HEADER ([RFC-2822] header of the message)
+///    4.2.TEXT   ([RFC-2822] text body of the message) MULTIPART/MIXED
+///    4.2.1      TEXT/PLAIN
+///    4.2.2      MULTIPART/ALTERNATIVE
+///    4.2.2.1    TEXT/PLAIN
+///    4.2.2.2    TEXT/RICHTEXT
+/// ```
+pub fn body_ext<'a>(
+    part: &'a AnyPart<'a>,
+    section: &'a Option<FetchSection<'a>>,
+    partial: &'a Option<(u32, NonZeroU32)>,
+) -> Result<BodySection<'a>> {
+    let root_mime = NodeMime(part);
+    let (extractor, path) = SubsettedSection::from(section);
+    let selected_mime = root_mime.subset(path)?;
+    let extracted_full = selected_mime.extract(&extractor)?;
+    Ok(extracted_full.to_body_section(partial))
+}
+
+/// Logic for BODY and BODYSTRUCTURE
+///
+/// ```raw
+/// b fetch 29878:29879 (BODY)
+/// * 29878 FETCH (BODY (("text" "plain" ("charset" "utf-8") NIL NIL "quoted-printable" 3264 82)("text" "html" ("charset" "utf-8") NIL NIL "quoted-printable" 31834 643) "alternative"))
+/// * 29879 FETCH (BODY ("text" "html" ("charset" "us-ascii") NIL NIL "7bit" 4107 131))
+///                                   ^^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^ ^^^^^^ ^^^^ ^^^
+///                                   |                      |   |   |      |    | number of lines
+///                                   |                      |   |   |      | size
+///                                   |                      |   |   | content transfer encoding
+///                                   |                      |   | description
+///                                   |                      | id
+///                                   | parameter list
+/// b OK Fetch completed (0.001 + 0.000 secs).
+/// ```
+pub fn bodystructure(part: &AnyPart) -> Result<BodyStructure<'static>> {
+    NodeMime(part).structure()
+}
+
+/// NodeMime
+///
+/// Used for recursive logic on MIME.
+/// See SelectedMime for inspection.
+struct NodeMime<'a>(&'a AnyPart<'a>);
+impl<'a> NodeMime<'a> {
+    /// A MIME object is a tree of elements.
+    /// The path indicates which element must be picked.
+    /// This function returns the picked element as the new view
+    fn subset(self, path: Option<&'a FetchPart>) -> Result<SelectedMime<'a>> {
+        match path {
+            None => Ok(SelectedMime(self.0)),
+            Some(v) => self.rec_subset(v.0.as_ref()),
+        }
+    }
+
+    fn rec_subset(self, path: &'a [NonZeroU32]) -> Result<SelectedMime> {
+        if path.is_empty() {
+            Ok(SelectedMime(self.0))
+        } else {
+            match self.0 {
+                AnyPart::Mult(x) => {
+                    let next = Self(x.children
+                        .get(path[0].get() as usize - 1)
+                        .ok_or(anyhow!("Unable to resolve subpath {:?}, current multipart has only {} elements", path, x.children.len()))?);
+                    next.rec_subset(&path[1..])
+                },
+                AnyPart::Msg(x) => {
+                    let next = Self(x.child.as_ref());
+                    next.rec_subset(path)
+                },
+                _ => bail!("You tried to access a subpart on an atomic part (text or binary). Unresolved subpath {:?}", path),
+            }
+        }
+    }
+
+    fn structure(&self) -> Result<BodyStructure<'static>> {
+        match self.0 {
+            AnyPart::Txt(x) => NodeTxt(self, x).structure(),
+            AnyPart::Bin(x) => NodeBin(self, x).structure(),
+            AnyPart::Mult(x) => NodeMult(self, x).structure(),
+            AnyPart::Msg(x) => NodeMsg(self, x).structure(),
+        }
+    }
+}
+
+//----------------------------------------------------------
+
+/// A FetchSection must be handled in 2 times:
+///  - First we must extract the MIME part
+///  - Then we must process it as desired
+/// The given struct mixes both work, so
+/// we separate this work here.
+enum SubsettedSection<'a> {
+    Part,
+    Header,
+    HeaderFields(&'a NonEmptyVec<AString<'a>>),
+    HeaderFieldsNot(&'a NonEmptyVec<AString<'a>>),
+    Text,
+    Mime,
+}
+impl<'a> SubsettedSection<'a> {
+    fn from(section: &'a Option<FetchSection>) -> (Self, Option<&'a FetchPart>) {
+        match section {
+            Some(FetchSection::Text(maybe_part)) => (Self::Text, maybe_part.as_ref()),
+            Some(FetchSection::Header(maybe_part)) => (Self::Header, maybe_part.as_ref()),
+            Some(FetchSection::HeaderFields(maybe_part, fields)) => {
+                (Self::HeaderFields(fields), maybe_part.as_ref())
+            }
+            Some(FetchSection::HeaderFieldsNot(maybe_part, fields)) => {
+                (Self::HeaderFieldsNot(fields), maybe_part.as_ref())
+            }
+            Some(FetchSection::Mime(part)) => (Self::Mime, Some(part)),
+            Some(FetchSection::Part(part)) => (Self::Part, Some(part)),
+            None => (Self::Part, None),
+        }
+    }
+}
+
+/// Used for current MIME inspection
+///
+/// See NodeMime for recursive logic
+struct SelectedMime<'a>(&'a AnyPart<'a>);
+impl<'a> SelectedMime<'a> {
+    /// The subsetted fetch section basically tells us the
+    /// extraction logic to apply on our selected MIME.
+    /// This function acts as a router for these logic.
+    fn extract(&self, extractor: &SubsettedSection<'a>) -> Result<ExtractedFull<'a>> {
+        match extractor {
+            SubsettedSection::Text => self.text(),
+            SubsettedSection::Header => self.header(),
+            SubsettedSection::HeaderFields(fields) => self.header_fields(fields, false),
+            SubsettedSection::HeaderFieldsNot(fields) => self.header_fields(fields, true),
+            SubsettedSection::Part => self.part(),
+            SubsettedSection::Mime => self.mime(),
+        }
+    }
+
+    fn mime(&self) -> Result<ExtractedFull<'a>> {
+        let bytes = match &self.0 {
+            AnyPart::Txt(p) => p.mime.fields.raw,
+            AnyPart::Bin(p) => p.mime.fields.raw,
+            AnyPart::Msg(p) => p.child.mime().raw,
+            AnyPart::Mult(p) => p.mime.fields.raw,
+        };
+        Ok(ExtractedFull(bytes.into()))
+    }
+
+    fn part(&self) -> Result<ExtractedFull<'a>> {
+        let bytes = match &self.0 {
+            AnyPart::Txt(p) => p.body,
+            AnyPart::Bin(p) => p.body,
+            AnyPart::Msg(p) => p.raw_part,
+            AnyPart::Mult(_) => bail!("Multipart part has no body"),
+        };
+        Ok(ExtractedFull(bytes.to_vec().into()))
+    }
+
+    /// The [...] HEADER.FIELDS, and HEADER.FIELDS.NOT part
+    /// specifiers refer to the [RFC-2822] header of the message or of
+    /// an encapsulated [MIME-IMT] MESSAGE/RFC822 message.
+    /// HEADER.FIELDS and HEADER.FIELDS.NOT are followed by a list of
+    /// field-name (as defined in [RFC-2822]) names, and return a
+    /// subset of the header.  The subset returned by HEADER.FIELDS
+    /// contains only those header fields with a field-name that
+    /// matches one of the names in the list; similarly, the subset
+    /// returned by HEADER.FIELDS.NOT contains only the header fields
+    /// with a non-matching field-name.  The field-matching is
+    /// case-insensitive but otherwise exact.
+    fn header_fields(
+        &self,
+        fields: &'a NonEmptyVec<AString<'a>>,
+        invert: bool,
+    ) -> Result<ExtractedFull<'a>> {
+        // Build a lowercase ascii hashset with the fields to fetch
+        let index = fields
+            .as_ref()
+            .iter()
+            .map(|x| {
+                match x {
+                    AString::Atom(a) => a.inner().as_bytes(),
+                    AString::String(IString::Literal(l)) => l.as_ref(),
+                    AString::String(IString::Quoted(q)) => q.inner().as_bytes(),
+                }
+                .to_ascii_lowercase()
+            })
+            .collect::<HashSet<_>>();
+
+        // Extract MIME headers
+        let mime = match &self.0 {
+            AnyPart::Msg(msg) => msg.child.mime(),
+            other => other.mime(),
+        };
+
+        // Filter our MIME headers based on the field index
+        // 1. Keep only the correctly formatted headers
+        // 2. Keep only based on the index presence or absence
+        // 3. Reduce as a byte vector
+        let buffer = mime
+            .kv
+            .iter()
+            .filter_map(|field| match field {
+                header::Field::Good(header::Kv2(k, v)) => Some((k, v)),
+                _ => None,
+            })
+            .filter(|(k, _)| index.contains(&k.to_ascii_lowercase()) ^ invert)
+            .fold(vec![], |mut acc, (k, v)| {
+                acc.extend(*k);
+                acc.extend(b": ");
+                acc.extend(*v);
+                acc.extend(b"\r\n");
+                acc
+            });
+
+        Ok(ExtractedFull(buffer.into()))
+    }
+
+    /// The HEADER [...] part specifiers refer to the [RFC-2822] header of the message or of
+    /// an encapsulated [MIME-IMT] MESSAGE/RFC822 message.
+    /// ```raw
+    /// HEADER     ([RFC-2822] header of the message)
+    /// ```
+    fn header(&self) -> Result<ExtractedFull<'a>> {
+        let msg = self
+            .0
+            .as_message()
+            .ok_or(anyhow!("Selected part must be a message/rfc822"))?;
+        Ok(ExtractedFull(msg.raw_headers.into()))
+    }
+
+    /// The TEXT part specifier refers to the text body of the message, omitting the [RFC-2822] header.
+    fn text(&self) -> Result<ExtractedFull<'a>> {
+        let msg = self
+            .0
+            .as_message()
+            .ok_or(anyhow!("Selected part must be a message/rfc822"))?;
+        Ok(ExtractedFull(msg.raw_body.into()))
+    }
+
+    // ------------
+
+    /// Basic field of a MIME part that is
+    /// common to all parts
+    fn basic_fields(&self) -> Result<BasicFields<'static>> {
+        let sz = match self.0 {
+            AnyPart::Txt(x) => x.body.len(),
+            AnyPart::Bin(x) => x.body.len(),
+            AnyPart::Msg(x) => x.raw_part.len(),
+            AnyPart::Mult(_) => 0,
+        };
+        let m = self.0.mime();
+        let parameter_list = m
+            .ctype
+            .as_ref()
+            .map(|x| {
+                x.params
+                    .iter()
+                    .map(|p| {
+                        (
+                            IString::try_from(String::from_utf8_lossy(p.name).to_string()),
+                            IString::try_from(p.value.to_string()),
+                        )
+                    })
+                    .filter(|(k, v)| k.is_ok() && v.is_ok())
+                    .map(|(k, v)| (k.unwrap(), v.unwrap()))
+                    .collect()
+            })
+            .unwrap_or(vec![]);
+
+        Ok(BasicFields {
+            parameter_list,
+            id: NString(
+                m.id.as_ref()
+                    .and_then(|ci| IString::try_from(ci.to_string()).ok()),
+            ),
+            description: NString(
+                m.description
+                    .as_ref()
+                    .and_then(|cd| IString::try_from(cd.to_string()).ok()),
+            ),
+            content_transfer_encoding: match m.transfer_encoding {
+                mime::mechanism::Mechanism::_8Bit => unchecked_istring("8bit"),
+                mime::mechanism::Mechanism::Binary => unchecked_istring("binary"),
+                mime::mechanism::Mechanism::QuotedPrintable => {
+                    unchecked_istring("quoted-printable")
+                }
+                mime::mechanism::Mechanism::Base64 => unchecked_istring("base64"),
+                _ => unchecked_istring("7bit"),
+            },
+            // @FIXME we can't compute the size of the message currently...
+            size: u32::try_from(sz)?,
+        })
+    }
+}
+
+// ---------------------------
+struct NodeMsg<'a>(&'a NodeMime<'a>, &'a composite::Message<'a>);
+impl<'a> NodeMsg<'a> {
+    fn structure(&self) -> Result<BodyStructure<'static>> {
+        let basic = SelectedMime(self.0 .0).basic_fields()?;
+
+        Ok(BodyStructure::Single {
+            body: FetchBody {
+                basic,
+                specific: SpecificFields::Message {
+                    envelope: Box::new(message_envelope(&self.1.imf)),
+                    body_structure: Box::new(NodeMime(&self.1.child).structure()?),
+                    number_of_lines: nol(self.1.raw_part),
+                },
+            },
+            extension_data: None,
+        })
+    }
+}
+struct NodeMult<'a>(&'a NodeMime<'a>, &'a composite::Multipart<'a>);
+impl<'a> NodeMult<'a> {
+    fn structure(&self) -> Result<BodyStructure<'static>> {
+        let itype = &self.1.mime.interpreted_type;
+        let subtype = IString::try_from(itype.subtype.to_string())
+            .unwrap_or(unchecked_istring("alternative"));
+
+        let inner_bodies = self
+            .1
+            .children
+            .iter()
+            .filter_map(|inner| NodeMime(&inner).structure().ok())
+            .collect::<Vec<_>>();
+
+        NonEmptyVec::validate(&inner_bodies)?;
+        let bodies = NonEmptyVec::unvalidated(inner_bodies);
+
+        Ok(BodyStructure::Multi {
+            bodies,
+            subtype,
+            extension_data: None,
+            /*Some(MultipartExtensionData {
+            parameter_list: vec![],
+            disposition: None,
+            language: None,
+            location: None,
+            extension: vec![],
+            })*/
+        })
+    }
+}
+struct NodeTxt<'a>(&'a NodeMime<'a>, &'a discrete::Text<'a>);
+impl<'a> NodeTxt<'a> {
+    fn structure(&self) -> Result<BodyStructure<'static>> {
+        let mut basic = SelectedMime(self.0 .0).basic_fields()?;
+
+        // Get the interpreted content type, set it
+        let itype = match &self.1.mime.interpreted_type {
+            Deductible::Inferred(v) | Deductible::Explicit(v) => v,
+        };
+        let subtype =
+            IString::try_from(itype.subtype.to_string()).unwrap_or(unchecked_istring("plain"));
+
+        // Add charset to the list of parameters if we know it has been inferred as it will be
+        // missing from the parsed content.
+        if let Deductible::Inferred(charset) = &itype.charset {
+            basic.parameter_list.push((
+                unchecked_istring("charset"),
+                IString::try_from(charset.to_string()).unwrap_or(unchecked_istring("us-ascii")),
+            ));
+        }
+
+        Ok(BodyStructure::Single {
+            body: FetchBody {
+                basic,
+                specific: SpecificFields::Text {
+                    subtype,
+                    number_of_lines: nol(self.1.body),
+                },
+            },
+            extension_data: None,
+        })
+    }
+}
+
+struct NodeBin<'a>(&'a NodeMime<'a>, &'a discrete::Binary<'a>);
+impl<'a> NodeBin<'a> {
+    fn structure(&self) -> Result<BodyStructure<'static>> {
+        let basic = SelectedMime(self.0 .0).basic_fields()?;
+
+        let default = mime::r#type::NaiveType {
+            main: &b"application"[..],
+            sub: &b"octet-stream"[..],
+            params: vec![],
+        };
+        let ct = self.1.mime.fields.ctype.as_ref().unwrap_or(&default);
+
+        let r#type = IString::try_from(String::from_utf8_lossy(ct.main).to_string()).or(Err(
+            anyhow!("Unable to build IString from given Content-Type type given"),
+        ))?;
+
+        let subtype = IString::try_from(String::from_utf8_lossy(ct.sub).to_string()).or(Err(
+            anyhow!("Unable to build IString from given Content-Type subtype given"),
+        ))?;
+
+        Ok(BodyStructure::Single {
+            body: FetchBody {
+                basic,
+                specific: SpecificFields::Basic { r#type, subtype },
+            },
+            extension_data: None,
+        })
+    }
+}
+
+// ---------------------------
+
+struct ExtractedFull<'a>(Cow<'a, [u8]>);
+impl<'a> ExtractedFull<'a> {
+    /// It is possible to fetch a substring of the designated text.
+    /// This is done by appending an open angle bracket ("<"), the
+    /// octet position of the first desired octet, a period, the
+    /// maximum number of octets desired, and a close angle bracket
+    /// (">") to the part specifier.  If the starting octet is beyond
+    /// the end of the text, an empty string is returned.
+    ///
+    /// Any partial fetch that attempts to read beyond the end of the
+    /// text is truncated as appropriate.  A partial fetch that starts
+    /// at octet 0 is returned as a partial fetch, even if this
+    /// truncation happened.
+    ///
+    /// Note: This means that BODY[]<0.2048> of a 1500-octet message
+    /// will return BODY[]<0> with a literal of size 1500, not
+    /// BODY[].
+    ///
+    /// Note: A substring fetch of a HEADER.FIELDS or
+    /// HEADER.FIELDS.NOT part specifier is calculated after
+    /// subsetting the header.
+    fn to_body_section(self, partial: &'_ Option<(u32, NonZeroU32)>) -> BodySection<'a> {
+        match partial {
+            Some((begin, len)) => self.partialize(*begin, *len),
+            None => BodySection::Full(self.0),
+        }
+    }
+
+    fn partialize(self, begin: u32, len: NonZeroU32) -> BodySection<'a> {
+        // Asked range is starting after the end of the content,
+        // returning an empty buffer
+        if begin as usize > self.0.len() {
+            return BodySection::Slice {
+                body: Cow::Borrowed(&[][..]),
+                origin_octet: begin,
+            };
+        }
+
+        // Asked range is ending after the end of the content,
+        // slice only the beginning of the buffer
+        if (begin + len.get()) as usize >= self.0.len() {
+            return BodySection::Slice {
+                body: match self.0 {
+                    Cow::Borrowed(body) => Cow::Borrowed(&body[begin as usize..]),
+                    Cow::Owned(body) => Cow::Owned(body[begin as usize..].to_vec()),
+                },
+                origin_octet: begin,
+            };
+        }
+
+        // Range is included inside the considered content,
+        // this is the "happy case"
+        BodySection::Slice {
+            body: match self.0 {
+                Cow::Borrowed(body) => {
+                    Cow::Borrowed(&body[begin as usize..(begin + len.get()) as usize])
+                }
+                Cow::Owned(body) => {
+                    Cow::Owned(body[begin as usize..(begin + len.get()) as usize].to_vec())
+                }
+            },
+            origin_octet: begin,
+        }
+    }
+}
+
+/// ---- LEGACY
+
+/// s is set to static to ensure that only compile time values
+/// checked by developpers are passed.
+fn unchecked_istring(s: &'static str) -> IString {
+    IString::try_from(s).expect("this value is expected to be a valid imap-codec::IString")
+}
+
+// Number Of Lines
+fn nol(input: &[u8]) -> u32 {
+    input
+        .iter()
+        .filter(|x| **x == b'\n')
+        .count()
+        .try_into()
+        .unwrap_or(0)
+}
author	Quentin <quentin@dufour.io>	2024-01-06 10:38:37 +0000
committer	Quentin <quentin@dufour.io>	2024-01-06 10:38:37 +0000
commit	44ca458c5cf666246e472dea9be70b745a130e8c (patch)
tree	b78cca747e5c2bc004cb93b93536623f7abb6ef5 /src/imap/mime_view.rs
parent	bcf6de83419b405fea95b740869f98d43586ea7c (diff)
parent	53dbf82cbce3cb17cbcffd09558677faf8702f54 (diff)
download	aerogramme-44ca458c5cf666246e472dea9be70b745a130e8c.tar.gz aerogramme-44ca458c5cf666246e472dea9be70b745a130e8c.zip