From 11462f80c4ae25696c7436ed7aacb92074d7e911 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Fri, 8 Mar 2024 09:55:33 +0100 Subject: Re-enable proto --- aero-proto/src/imap/mime_view.rs | 582 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 582 insertions(+) create mode 100644 aero-proto/src/imap/mime_view.rs (limited to 'aero-proto/src/imap/mime_view.rs') diff --git a/aero-proto/src/imap/mime_view.rs b/aero-proto/src/imap/mime_view.rs new file mode 100644 index 0000000..720f20a --- /dev/null +++ b/aero-proto/src/imap/mime_view.rs @@ -0,0 +1,582 @@ +use std::borrow::Cow; +use std::collections::HashSet; +use std::num::NonZeroU32; + +use anyhow::{anyhow, bail, Result}; + +use imap_codec::imap_types::body::{ + BasicFields, Body as FetchBody, BodyStructure, MultiPartExtensionData, SinglePartExtensionData, + SpecificFields, +}; +use imap_codec::imap_types::core::{AString, IString, NString, Vec1}; +use imap_codec::imap_types::fetch::{Part as FetchPart, Section as FetchSection}; + +use eml_codec::{ + header, mime, mime::r#type::Deductible, part::composite, part::discrete, part::AnyPart, +}; + +use crate::imap::imf_view::ImfView; + +pub enum BodySection<'a> { + Full(Cow<'a, [u8]>), + Slice { + body: Cow<'a, [u8]>, + origin_octet: u32, + }, +} + +/// Logic for BODY[
]<> +/// Works in 3 times: +/// 1. Find the section (RootMime::subset) +/// 2. Apply the extraction logic (SelectedMime::extract), like TEXT, HEADERS, etc. +/// 3. Keep only the given subset provided by partial +/// +/// Example of message sections: +/// +/// ``` +/// HEADER ([RFC-2822] header of the message) +/// TEXT ([RFC-2822] text body of the message) MULTIPART/MIXED +/// 1 TEXT/PLAIN +/// 2 APPLICATION/OCTET-STREAM +/// 3 MESSAGE/RFC822 +/// 3.HEADER ([RFC-2822] header of the message) +/// 3.TEXT ([RFC-2822] text body of the message) MULTIPART/MIXED +/// 3.1 TEXT/PLAIN +/// 3.2 APPLICATION/OCTET-STREAM +/// 4 MULTIPART/MIXED +/// 4.1 IMAGE/GIF +/// 4.1.MIME ([MIME-IMB] header for the IMAGE/GIF) +/// 4.2 MESSAGE/RFC822 +/// 4.2.HEADER ([RFC-2822] header of the message) +/// 4.2.TEXT ([RFC-2822] text body of the message) MULTIPART/MIXED +/// 4.2.1 TEXT/PLAIN +/// 4.2.2 MULTIPART/ALTERNATIVE +/// 4.2.2.1 TEXT/PLAIN +/// 4.2.2.2 TEXT/RICHTEXT +/// ``` +pub fn body_ext<'a>( + part: &'a AnyPart<'a>, + section: &'a Option>, + partial: &'a Option<(u32, NonZeroU32)>, +) -> Result> { + let root_mime = NodeMime(part); + let (extractor, path) = SubsettedSection::from(section); + let selected_mime = root_mime.subset(path)?; + let extracted_full = selected_mime.extract(&extractor)?; + Ok(extracted_full.to_body_section(partial)) +} + +/// Logic for BODY and BODYSTRUCTURE +/// +/// ```raw +/// b fetch 29878:29879 (BODY) +/// * 29878 FETCH (BODY (("text" "plain" ("charset" "utf-8") NIL NIL "quoted-printable" 3264 82)("text" "html" ("charset" "utf-8") NIL NIL "quoted-printable" 31834 643) "alternative")) +/// * 29879 FETCH (BODY ("text" "html" ("charset" "us-ascii") NIL NIL "7bit" 4107 131)) +/// ^^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^ ^^^^^^ ^^^^ ^^^ +/// | | | | | | number of lines +/// | | | | | size +/// | | | | content transfer encoding +/// | | | description +/// | | id +/// | parameter list +/// b OK Fetch completed (0.001 + 0.000 secs). +/// ``` +pub fn bodystructure(part: &AnyPart, is_ext: bool) -> Result> { + NodeMime(part).structure(is_ext) +} + +/// NodeMime +/// +/// Used for recursive logic on MIME. +/// See SelectedMime for inspection. +struct NodeMime<'a>(&'a AnyPart<'a>); +impl<'a> NodeMime<'a> { + /// A MIME object is a tree of elements. + /// The path indicates which element must be picked. + /// This function returns the picked element as the new view + fn subset(self, path: Option<&'a FetchPart>) -> Result> { + match path { + None => Ok(SelectedMime(self.0)), + Some(v) => self.rec_subset(v.0.as_ref()), + } + } + + fn rec_subset(self, path: &'a [NonZeroU32]) -> Result { + if path.is_empty() { + Ok(SelectedMime(self.0)) + } else { + match self.0 { + AnyPart::Mult(x) => { + let next = Self(x.children + .get(path[0].get() as usize - 1) + .ok_or(anyhow!("Unable to resolve subpath {:?}, current multipart has only {} elements", path, x.children.len()))?); + next.rec_subset(&path[1..]) + }, + AnyPart::Msg(x) => { + let next = Self(x.child.as_ref()); + next.rec_subset(path) + }, + _ => bail!("You tried to access a subpart on an atomic part (text or binary). Unresolved subpath {:?}", path), + } + } + } + + fn structure(&self, is_ext: bool) -> Result> { + match self.0 { + AnyPart::Txt(x) => NodeTxt(self, x).structure(is_ext), + AnyPart::Bin(x) => NodeBin(self, x).structure(is_ext), + AnyPart::Mult(x) => NodeMult(self, x).structure(is_ext), + AnyPart::Msg(x) => NodeMsg(self, x).structure(is_ext), + } + } +} + +//---------------------------------------------------------- + +/// A FetchSection must be handled in 2 times: +/// - First we must extract the MIME part +/// - Then we must process it as desired +/// The given struct mixes both work, so +/// we separate this work here. +enum SubsettedSection<'a> { + Part, + Header, + HeaderFields(&'a Vec1>), + HeaderFieldsNot(&'a Vec1>), + Text, + Mime, +} +impl<'a> SubsettedSection<'a> { + fn from(section: &'a Option) -> (Self, Option<&'a FetchPart>) { + match section { + Some(FetchSection::Text(maybe_part)) => (Self::Text, maybe_part.as_ref()), + Some(FetchSection::Header(maybe_part)) => (Self::Header, maybe_part.as_ref()), + Some(FetchSection::HeaderFields(maybe_part, fields)) => { + (Self::HeaderFields(fields), maybe_part.as_ref()) + } + Some(FetchSection::HeaderFieldsNot(maybe_part, fields)) => { + (Self::HeaderFieldsNot(fields), maybe_part.as_ref()) + } + Some(FetchSection::Mime(part)) => (Self::Mime, Some(part)), + Some(FetchSection::Part(part)) => (Self::Part, Some(part)), + None => (Self::Part, None), + } + } +} + +/// Used for current MIME inspection +/// +/// See NodeMime for recursive logic +pub struct SelectedMime<'a>(pub &'a AnyPart<'a>); +impl<'a> SelectedMime<'a> { + pub fn header_value(&'a self, to_match_ext: &[u8]) -> Option<&'a [u8]> { + let to_match = to_match_ext.to_ascii_lowercase(); + + self.eml_mime() + .kv + .iter() + .filter_map(|field| match field { + header::Field::Good(header::Kv2(k, v)) => Some((k, v)), + _ => None, + }) + .find(|(k, _)| k.to_ascii_lowercase() == to_match) + .map(|(_, v)| v) + .copied() + } + + /// The subsetted fetch section basically tells us the + /// extraction logic to apply on our selected MIME. + /// This function acts as a router for these logic. + fn extract(&self, extractor: &SubsettedSection<'a>) -> Result> { + match extractor { + SubsettedSection::Text => self.text(), + SubsettedSection::Header => self.header(), + SubsettedSection::HeaderFields(fields) => self.header_fields(fields, false), + SubsettedSection::HeaderFieldsNot(fields) => self.header_fields(fields, true), + SubsettedSection::Part => self.part(), + SubsettedSection::Mime => self.mime(), + } + } + + fn mime(&self) -> Result> { + let bytes = match &self.0 { + AnyPart::Txt(p) => p.mime.fields.raw, + AnyPart::Bin(p) => p.mime.fields.raw, + AnyPart::Msg(p) => p.child.mime().raw, + AnyPart::Mult(p) => p.mime.fields.raw, + }; + Ok(ExtractedFull(bytes.into())) + } + + fn part(&self) -> Result> { + let bytes = match &self.0 { + AnyPart::Txt(p) => p.body, + AnyPart::Bin(p) => p.body, + AnyPart::Msg(p) => p.raw_part, + AnyPart::Mult(_) => bail!("Multipart part has no body"), + }; + Ok(ExtractedFull(bytes.to_vec().into())) + } + + fn eml_mime(&self) -> &eml_codec::mime::NaiveMIME<'_> { + match &self.0 { + AnyPart::Msg(msg) => msg.child.mime(), + other => other.mime(), + } + } + + /// The [...] HEADER.FIELDS, and HEADER.FIELDS.NOT part + /// specifiers refer to the [RFC-2822] header of the message or of + /// an encapsulated [MIME-IMT] MESSAGE/RFC822 message. + /// HEADER.FIELDS and HEADER.FIELDS.NOT are followed by a list of + /// field-name (as defined in [RFC-2822]) names, and return a + /// subset of the header. The subset returned by HEADER.FIELDS + /// contains only those header fields with a field-name that + /// matches one of the names in the list; similarly, the subset + /// returned by HEADER.FIELDS.NOT contains only the header fields + /// with a non-matching field-name. The field-matching is + /// case-insensitive but otherwise exact. + fn header_fields( + &self, + fields: &'a Vec1>, + invert: bool, + ) -> Result> { + // Build a lowercase ascii hashset with the fields to fetch + let index = fields + .as_ref() + .iter() + .map(|x| { + match x { + AString::Atom(a) => a.inner().as_bytes(), + AString::String(IString::Literal(l)) => l.as_ref(), + AString::String(IString::Quoted(q)) => q.inner().as_bytes(), + } + .to_ascii_lowercase() + }) + .collect::>(); + + // Extract MIME headers + let mime = self.eml_mime(); + + // Filter our MIME headers based on the field index + // 1. Keep only the correctly formatted headers + // 2. Keep only based on the index presence or absence + // 3. Reduce as a byte vector + let buffer = mime + .kv + .iter() + .filter_map(|field| match field { + header::Field::Good(header::Kv2(k, v)) => Some((k, v)), + _ => None, + }) + .filter(|(k, _)| index.contains(&k.to_ascii_lowercase()) ^ invert) + .fold(vec![], |mut acc, (k, v)| { + acc.extend(*k); + acc.extend(b": "); + acc.extend(*v); + acc.extend(b"\r\n"); + acc + }); + + Ok(ExtractedFull(buffer.into())) + } + + /// The HEADER [...] part specifiers refer to the [RFC-2822] header of the message or of + /// an encapsulated [MIME-IMT] MESSAGE/RFC822 message. + /// ```raw + /// HEADER ([RFC-2822] header of the message) + /// ``` + fn header(&self) -> Result> { + let msg = self + .0 + .as_message() + .ok_or(anyhow!("Selected part must be a message/rfc822"))?; + Ok(ExtractedFull(msg.raw_headers.into())) + } + + /// The TEXT part specifier refers to the text body of the message, omitting the [RFC-2822] header. + fn text(&self) -> Result> { + let msg = self + .0 + .as_message() + .ok_or(anyhow!("Selected part must be a message/rfc822"))?; + Ok(ExtractedFull(msg.raw_body.into())) + } + + // ------------ + + /// Basic field of a MIME part that is + /// common to all parts + fn basic_fields(&self) -> Result> { + let sz = match self.0 { + AnyPart::Txt(x) => x.body.len(), + AnyPart::Bin(x) => x.body.len(), + AnyPart::Msg(x) => x.raw_part.len(), + AnyPart::Mult(_) => 0, + }; + let m = self.0.mime(); + let parameter_list = m + .ctype + .as_ref() + .map(|x| { + x.params + .iter() + .map(|p| { + ( + IString::try_from(String::from_utf8_lossy(p.name).to_string()), + IString::try_from(p.value.to_string()), + ) + }) + .filter(|(k, v)| k.is_ok() && v.is_ok()) + .map(|(k, v)| (k.unwrap(), v.unwrap())) + .collect() + }) + .unwrap_or(vec![]); + + Ok(BasicFields { + parameter_list, + id: NString( + m.id.as_ref() + .and_then(|ci| IString::try_from(ci.to_string()).ok()), + ), + description: NString( + m.description + .as_ref() + .and_then(|cd| IString::try_from(cd.to_string()).ok()), + ), + content_transfer_encoding: match m.transfer_encoding { + mime::mechanism::Mechanism::_8Bit => unchecked_istring("8bit"), + mime::mechanism::Mechanism::Binary => unchecked_istring("binary"), + mime::mechanism::Mechanism::QuotedPrintable => { + unchecked_istring("quoted-printable") + } + mime::mechanism::Mechanism::Base64 => unchecked_istring("base64"), + _ => unchecked_istring("7bit"), + }, + // @FIXME we can't compute the size of the message currently... + size: u32::try_from(sz)?, + }) + } +} + +// --------------------------- +struct NodeMsg<'a>(&'a NodeMime<'a>, &'a composite::Message<'a>); +impl<'a> NodeMsg<'a> { + fn structure(&self, is_ext: bool) -> Result> { + let basic = SelectedMime(self.0 .0).basic_fields()?; + + Ok(BodyStructure::Single { + body: FetchBody { + basic, + specific: SpecificFields::Message { + envelope: Box::new(ImfView(&self.1.imf).message_envelope()), + body_structure: Box::new(NodeMime(&self.1.child).structure(is_ext)?), + number_of_lines: nol(self.1.raw_part), + }, + }, + extension_data: match is_ext { + true => Some(SinglePartExtensionData { + md5: NString(None), + tail: None, + }), + _ => None, + }, + }) + } +} + +#[allow(dead_code)] +struct NodeMult<'a>(&'a NodeMime<'a>, &'a composite::Multipart<'a>); +impl<'a> NodeMult<'a> { + fn structure(&self, is_ext: bool) -> Result> { + let itype = &self.1.mime.interpreted_type; + let subtype = IString::try_from(itype.subtype.to_string()) + .unwrap_or(unchecked_istring("alternative")); + + let inner_bodies = self + .1 + .children + .iter() + .filter_map(|inner| NodeMime(&inner).structure(is_ext).ok()) + .collect::>(); + + Vec1::validate(&inner_bodies)?; + let bodies = Vec1::unvalidated(inner_bodies); + + Ok(BodyStructure::Multi { + bodies, + subtype, + extension_data: match is_ext { + true => Some(MultiPartExtensionData { + parameter_list: vec![( + IString::try_from("boundary").unwrap(), + IString::try_from(self.1.mime.interpreted_type.boundary.to_string())?, + )], + tail: None, + }), + _ => None, + }, + }) + } +} +struct NodeTxt<'a>(&'a NodeMime<'a>, &'a discrete::Text<'a>); +impl<'a> NodeTxt<'a> { + fn structure(&self, is_ext: bool) -> Result> { + let mut basic = SelectedMime(self.0 .0).basic_fields()?; + + // Get the interpreted content type, set it + let itype = match &self.1.mime.interpreted_type { + Deductible::Inferred(v) | Deductible::Explicit(v) => v, + }; + let subtype = + IString::try_from(itype.subtype.to_string()).unwrap_or(unchecked_istring("plain")); + + // Add charset to the list of parameters if we know it has been inferred as it will be + // missing from the parsed content. + if let Deductible::Inferred(charset) = &itype.charset { + basic.parameter_list.push(( + unchecked_istring("charset"), + IString::try_from(charset.to_string()).unwrap_or(unchecked_istring("us-ascii")), + )); + } + + Ok(BodyStructure::Single { + body: FetchBody { + basic, + specific: SpecificFields::Text { + subtype, + number_of_lines: nol(self.1.body), + }, + }, + extension_data: match is_ext { + true => Some(SinglePartExtensionData { + md5: NString(None), + tail: None, + }), + _ => None, + }, + }) + } +} + +struct NodeBin<'a>(&'a NodeMime<'a>, &'a discrete::Binary<'a>); +impl<'a> NodeBin<'a> { + fn structure(&self, is_ext: bool) -> Result> { + let basic = SelectedMime(self.0 .0).basic_fields()?; + + let default = mime::r#type::NaiveType { + main: &b"application"[..], + sub: &b"octet-stream"[..], + params: vec![], + }; + let ct = self.1.mime.fields.ctype.as_ref().unwrap_or(&default); + + let r#type = IString::try_from(String::from_utf8_lossy(ct.main).to_string()).or(Err( + anyhow!("Unable to build IString from given Content-Type type given"), + ))?; + + let subtype = IString::try_from(String::from_utf8_lossy(ct.sub).to_string()).or(Err( + anyhow!("Unable to build IString from given Content-Type subtype given"), + ))?; + + Ok(BodyStructure::Single { + body: FetchBody { + basic, + specific: SpecificFields::Basic { r#type, subtype }, + }, + extension_data: match is_ext { + true => Some(SinglePartExtensionData { + md5: NString(None), + tail: None, + }), + _ => None, + }, + }) + } +} + +// --------------------------- + +struct ExtractedFull<'a>(Cow<'a, [u8]>); +impl<'a> ExtractedFull<'a> { + /// It is possible to fetch a substring of the designated text. + /// This is done by appending an open angle bracket ("<"), the + /// octet position of the first desired octet, a period, the + /// maximum number of octets desired, and a close angle bracket + /// (">") to the part specifier. If the starting octet is beyond + /// the end of the text, an empty string is returned. + /// + /// Any partial fetch that attempts to read beyond the end of the + /// text is truncated as appropriate. A partial fetch that starts + /// at octet 0 is returned as a partial fetch, even if this + /// truncation happened. + /// + /// Note: This means that BODY[]<0.2048> of a 1500-octet message + /// will return BODY[]<0> with a literal of size 1500, not + /// BODY[]. + /// + /// Note: A substring fetch of a HEADER.FIELDS or + /// HEADER.FIELDS.NOT part specifier is calculated after + /// subsetting the header. + fn to_body_section(self, partial: &'_ Option<(u32, NonZeroU32)>) -> BodySection<'a> { + match partial { + Some((begin, len)) => self.partialize(*begin, *len), + None => BodySection::Full(self.0), + } + } + + fn partialize(self, begin: u32, len: NonZeroU32) -> BodySection<'a> { + // Asked range is starting after the end of the content, + // returning an empty buffer + if begin as usize > self.0.len() { + return BodySection::Slice { + body: Cow::Borrowed(&[][..]), + origin_octet: begin, + }; + } + + // Asked range is ending after the end of the content, + // slice only the beginning of the buffer + if (begin + len.get()) as usize >= self.0.len() { + return BodySection::Slice { + body: match self.0 { + Cow::Borrowed(body) => Cow::Borrowed(&body[begin as usize..]), + Cow::Owned(body) => Cow::Owned(body[begin as usize..].to_vec()), + }, + origin_octet: begin, + }; + } + + // Range is included inside the considered content, + // this is the "happy case" + BodySection::Slice { + body: match self.0 { + Cow::Borrowed(body) => { + Cow::Borrowed(&body[begin as usize..(begin + len.get()) as usize]) + } + Cow::Owned(body) => { + Cow::Owned(body[begin as usize..(begin + len.get()) as usize].to_vec()) + } + }, + origin_octet: begin, + } + } +} + +/// ---- LEGACY + +/// s is set to static to ensure that only compile time values +/// checked by developpers are passed. +fn unchecked_istring(s: &'static str) -> IString { + IString::try_from(s).expect("this value is expected to be a valid imap-codec::IString") +} + +// Number Of Lines +fn nol(input: &[u8]) -> u32 { + input + .iter() + .filter(|x| **x == b'\n') + .count() + .try_into() + .unwrap_or(0) +} -- cgit v1.2.3