diff options
author | Trinity Pointard <trinity.pointard@gmail.com> | 2021-11-11 11:26:02 +0100 |
---|---|---|
committer | Gitea <gitea@fake.local> | 2021-11-16 15:41:41 +0100 |
commit | 9c58ec28d3b23accf782c6eb005b7c3966ec6314 (patch) | |
tree | a328628c5fb6c6067dbe06ef7d5af8b5f645d15d | |
parent | cdeb5b4dbb7ed95c7ff19f5d1cccdd69b5104c45 (diff) | |
download | garage-9c58ec28d3b23accf782c6eb005b7c3966ec6314.tar.gz garage-9c58ec28d3b23accf782c6eb005b7c3966ec6314.zip |
add support for vhost-style s3 bucket
-rw-r--r-- | Cargo.lock | 2 | ||||
-rw-r--r-- | src/api/Cargo.toml | 1 | ||||
-rw-r--r-- | src/api/api_server.rs | 110 | ||||
-rw-r--r-- | src/api/helpers.rs | 114 | ||||
-rw-r--r-- | src/api/lib.rs | 1 | ||||
-rw-r--r-- | src/util/config.rs | 3 | ||||
-rw-r--r-- | src/web/Cargo.toml | 1 | ||||
-rw-r--r-- | src/web/web_server.rs | 114 |
8 files changed, 224 insertions, 122 deletions
@@ -426,6 +426,7 @@ dependencies = [ "http-range", "httpdate 0.3.2", "hyper", + "idna", "log", "md-5", "percent-encoding", @@ -539,7 +540,6 @@ dependencies = [ "garage_util", "http", "hyper", - "idna", "log", "percent-encoding", ] diff --git a/src/api/Cargo.toml b/src/api/Cargo.toml index ebbe7c0d..f06c67e4 100644 --- a/src/api/Cargo.toml +++ b/src/api/Cargo.toml @@ -24,6 +24,7 @@ crypto-mac = "0.10" err-derive = "0.3" hex = "0.4" hmac = "0.10" +idna = "0.2" log = "0.4" md-5 = "0.9" sha2 = "0.9" diff --git a/src/api/api_server.rs b/src/api/api_server.rs index d51b5a28..2217be1a 100644 --- a/src/api/api_server.rs +++ b/src/api/api_server.rs @@ -3,6 +3,7 @@ use std::net::SocketAddr; use std::sync::Arc; use futures::future::Future; +use hyper::header; use hyper::server::conn::AddrStream; use hyper::service::{make_service_fn, service_fn}; use hyper::{Body, Method, Request, Response, Server}; @@ -14,6 +15,7 @@ use garage_model::garage::Garage; use crate::error::*; use crate::signature::check_signature; +use crate::helpers::*; use crate::s3_bucket::*; use crate::s3_copy::*; use crate::s3_delete::*; @@ -86,7 +88,20 @@ async fn handler_inner(garage: Arc<Garage>, req: Request<Body>) -> Result<Respon return handle_list_buckets(&api_key); } - let (bucket, key) = parse_bucket_key(&path)?; + let authority = req + .headers() + .get(header::HOST) + .ok_or_else(|| Error::BadRequest("HOST header required".to_owned()))? + .to_str()?; + + // Get bucket + let host = authority_to_host(authority)?; + + let (bucket, key) = parse_bucket_key( + &path, + Some(&host), + garage.config.s3_api.root_domain.as_deref(), + )?; let allowed = match req.method() { &Method::HEAD | &Method::GET => api_key.allow_read(bucket), _ => api_key.allow_write(bucket), @@ -137,7 +152,7 @@ async fn handler_inner(garage: Arc<Garage>, req: Request<Body>) -> Result<Respon let copy_source = req.headers().get("x-amz-copy-source").unwrap().to_str()?; let copy_source = percent_encoding::percent_decode_str(copy_source).decode_utf8()?; - let (source_bucket, source_key) = parse_bucket_key(©_source)?; + let (source_bucket, source_key) = parse_bucket_key(©_source, None, None)?; if !api_key.allow_read(source_bucket) { return Err(Error::Forbidden(format!( "Reading from bucket {} not allowed for this key", @@ -249,9 +264,23 @@ async fn handler_inner(garage: Arc<Garage>, req: Request<Body>) -> Result<Respon /// /// S3 internally manages only buckets and keys. This function splits /// an HTTP path to get the corresponding bucket name and key. -fn parse_bucket_key(path: &str) -> Result<(&str, Option<&str>), Error> { +fn parse_bucket_key<'a>( + path: &'a str, + host: Option<&'a str>, + root: Option<&str>, +) -> Result<(&'a str, Option<&'a str>), Error> { let path = path.trim_start_matches('/'); + if host.and(root).is_some() { + if let Some(bucket) = host_to_bucket(host.unwrap(), root.unwrap()) { + if !path.is_empty() { + return Ok((bucket, Some(path))); + } else { + return Ok((bucket, None)); + } + } + } + let (bucket, key) = match path.find('/') { Some(i) => { let key = &path[i + 1..]; @@ -275,7 +304,7 @@ mod tests { #[test] fn parse_bucket_containing_a_key() -> Result<(), Error> { - let (bucket, key) = parse_bucket_key("/my_bucket/a/super/file.jpg")?; + let (bucket, key) = parse_bucket_key("/my_bucket/a/super/file.jpg", None, None)?; assert_eq!(bucket, "my_bucket"); assert_eq!(key.expect("key must be set"), "a/super/file.jpg"); Ok(()) @@ -283,10 +312,10 @@ mod tests { #[test] fn parse_bucket_containing_no_key() -> Result<(), Error> { - let (bucket, key) = parse_bucket_key("/my_bucket/")?; + let (bucket, key) = parse_bucket_key("/my_bucket/", None, None)?; assert_eq!(bucket, "my_bucket"); assert!(key.is_none()); - let (bucket, key) = parse_bucket_key("/my_bucket")?; + let (bucket, key) = parse_bucket_key("/my_bucket", None, None)?; assert_eq!(bucket, "my_bucket"); assert!(key.is_none()); Ok(()) @@ -294,11 +323,74 @@ mod tests { #[test] fn parse_bucket_containing_no_bucket() { - let parsed = parse_bucket_key(""); + let parsed = parse_bucket_key("", None, None); assert!(parsed.is_err()); - let parsed = parse_bucket_key("/"); + let parsed = parse_bucket_key("/", None, None); assert!(parsed.is_err()); - let parsed = parse_bucket_key("////"); + let parsed = parse_bucket_key("////", None, None); assert!(parsed.is_err()); } + + #[test] + fn parse_bucket_with_vhost_and_key() -> Result<(), Error> { + let (bucket, key) = parse_bucket_key( + "/a/super/file.jpg", + Some("my-bucket.garage.tld"), + Some("garage.tld"), + )?; + assert_eq!(bucket, "my-bucket"); + assert_eq!(key.expect("key must be set"), "a/super/file.jpg"); + + let (bucket, key) = parse_bucket_key( + "/my_bucket/a/super/file.jpg", + Some("not-garage.tld"), + Some("garage.tld"), + )?; + assert_eq!(bucket, "my_bucket"); + assert_eq!(key.expect("key must be set"), "a/super/file.jpg"); + Ok(()) + } + + #[test] + fn parse_bucket_with_vhost_no_key() -> Result<(), Error> { + let (bucket, key) = parse_bucket_key("", Some("my-bucket.garage.tld"), Some("garage.tld"))?; + assert_eq!(bucket, "my-bucket"); + assert!(key.is_none()); + let (bucket, key) = + parse_bucket_key("/", Some("my-bucket.garage.tld"), Some("garage.tld"))?; + assert_eq!(bucket, "my-bucket"); + assert!(key.is_none()); + Ok(()) + } + + #[test] + fn parse_bucket_missmatch_vhost() { + let test_vec = [ + "/my_bucket/a/super/file.jpg", + "/my_bucket/", + "/my_bucket", + "", + "/", + "////", + ]; + let eq = |l, r| match (l, r) { + (Ok(l), Ok(r)) => l == r, + (Err(_), Err(_)) => true, + _ => false, + }; + for test in test_vec { + assert!(eq( + parse_bucket_key(test, None, None), + parse_bucket_key(test, Some("bucket.garage.tld"), None) + )); + assert!(eq( + parse_bucket_key(test, None, None), + parse_bucket_key(test, None, Some("garage.tld")) + )); + assert!(eq( + parse_bucket_key(test, None, None), + parse_bucket_key(test, Some("not-garage.tld"), Some("garage.tld")) + )); + } + } } diff --git a/src/api/helpers.rs b/src/api/helpers.rs new file mode 100644 index 00000000..9ba32537 --- /dev/null +++ b/src/api/helpers.rs @@ -0,0 +1,114 @@ +use crate::Error; +use idna::domain_to_unicode; + +/// Host to bucket +/// +/// Convert a host, like "bucket.garage-site.tld" or "john.doe.com" +/// to the corresponding bucket, resp. "bucket" and "john.doe.com" +/// considering that ".garage-site.tld" is the "root domain". +/// This behavior has been chosen to follow AWS S3 semantic. +pub fn host_to_bucket<'a>(host: &'a str, root: &str) -> Option<&'a str> { + let root = root.trim_start_matches('.'); + let label_root = root.chars().filter(|c| c == &'.').count() + 1; + let root = root.rsplit('.'); + let mut host = host.rsplitn(label_root + 1, '.'); + for root_part in root { + let host_part = host.next()?; + if root_part != host_part { + return None; + } + } + host.next() +} + +/// Extract host from the authority section given by the HTTP host header +/// +/// The HTTP host contains both a host and a port. +/// Extracting the port is more complex than just finding the colon (:) symbol due to IPv6 +/// We do not use the collect pattern as there is no way in std rust to collect over a stack allocated value +/// check here: https://docs.rs/collect_slice/1.2.0/collect_slice/ +pub fn authority_to_host(authority: &str) -> Result<String, Error> { + let mut iter = authority.chars().enumerate(); + let (_, first_char) = iter + .next() + .ok_or_else(|| Error::BadRequest("Authority is empty".to_string()))?; + + let split = match first_char { + '[' => { + let mut iter = iter.skip_while(|(_, c)| c != &']'); + match iter.next() { + Some((_, ']')) => iter.next(), + _ => { + return Err(Error::BadRequest(format!( + "Authority {} has an illegal format", + authority + ))) + } + } + } + _ => iter.find(|(_, c)| *c == ':'), + }; + + let authority = match split { + Some((i, ':')) => Ok(&authority[..i]), + None => Ok(authority), + Some((_, _)) => Err(Error::BadRequest(format!( + "Authority {} has an illegal format", + authority + ))), + }; + authority.map(|h| domain_to_unicode(h).0) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn authority_to_host_with_port() -> Result<(), Error> { + let domain = authority_to_host("[::1]:3902")?; + assert_eq!(domain, "[::1]"); + let domain2 = authority_to_host("garage.tld:65200")?; + assert_eq!(domain2, "garage.tld"); + let domain3 = authority_to_host("127.0.0.1:80")?; + assert_eq!(domain3, "127.0.0.1"); + Ok(()) + } + + #[test] + fn authority_to_host_without_port() -> Result<(), Error> { + let domain = authority_to_host("[::1]")?; + assert_eq!(domain, "[::1]"); + let domain2 = authority_to_host("garage.tld")?; + assert_eq!(domain2, "garage.tld"); + let domain3 = authority_to_host("127.0.0.1")?; + assert_eq!(domain3, "127.0.0.1"); + assert!(authority_to_host("[").is_err()); + assert!(authority_to_host("[hello").is_err()); + Ok(()) + } + + #[test] + fn host_to_bucket_test() { + assert_eq!( + host_to_bucket("john.doe.garage.tld", ".garage.tld").unwrap(), + "john.doe" + ); + + assert_eq!( + host_to_bucket("john.doe.garage.tld", "garage.tld").unwrap(), + "john.doe" + ); + + assert_eq!(host_to_bucket("john.doe.com", "garage.tld"), None); + + assert_eq!(host_to_bucket("john.doe.com", ".garage.tld"), None); + + assert_eq!(host_to_bucket("garage.tld", "garage.tld"), None); + + assert_eq!(host_to_bucket("garage.tld", ".garage.tld"), None); + + assert_eq!(host_to_bucket("not-garage.tld", "garage.tld"), None); + assert_eq!(host_to_bucket("not-garage.tld", ".garage.tld"), None); + } +} diff --git a/src/api/lib.rs b/src/api/lib.rs index 9a23c231..ab8e3dd1 100644 --- a/src/api/lib.rs +++ b/src/api/lib.rs @@ -12,6 +12,7 @@ pub use api_server::run_api_server; mod signature; +pub mod helpers; mod s3_bucket; mod s3_copy; mod s3_delete; diff --git a/src/util/config.rs b/src/util/config.rs index 0e4c76ae..33802012 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -68,6 +68,9 @@ pub struct ApiConfig { pub api_bind_addr: SocketAddr, /// S3 region to use pub s3_region: String, + /// Suffix to remove from domain name to find bucket. If None, + /// vhost-style S3 request are disabled + pub root_domain: Option<String>, } /// Configuration for serving files as normal web server diff --git a/src/web/Cargo.toml b/src/web/Cargo.toml index f5b40370..634ce282 100644 --- a/src/web/Cargo.toml +++ b/src/web/Cargo.toml @@ -19,7 +19,6 @@ garage_util = { version = "0.4.0", path = "../util" } garage_table = { version = "0.4.0", path = "../table" } err-derive = "0.3" -idna = "0.2" log = "0.4" percent-encoding = "2.1.0" diff --git a/src/web/web_server.rs b/src/web/web_server.rs index bff9e71c..e9c5039d 100644 --- a/src/web/web_server.rs +++ b/src/web/web_server.rs @@ -9,9 +9,8 @@ use hyper::{ Body, Method, Request, Response, Server, }; -use idna::domain_to_unicode; - use crate::error::*; +use garage_api::helpers::{authority_to_host, host_to_bucket}; use garage_api::s3_get::{handle_get, handle_head}; use garage_model::bucket_table::*; use garage_model::garage::Garage; @@ -75,9 +74,9 @@ async fn serve_file(garage: Arc<Garage>, req: Request<Body>) -> Result<Response< .to_str()?; // Get bucket - let (host, _) = domain_to_unicode(authority_to_host(authority)?); + let host = authority_to_host(authority)?; let root = &garage.config.s3_web.root_domain; - let bucket = host_to_bucket(&host, root); + let bucket = host_to_bucket(&host, root).unwrap_or(&host); // Check bucket is exposed as a website let bucket_desc = garage @@ -108,65 +107,6 @@ async fn serve_file(garage: Arc<Garage>, req: Request<Body>) -> Result<Response< Ok(res) } -/// Extract host from the authority section given by the HTTP host header -/// -/// The HTTP host contains both a host and a port. -/// Extracting the port is more complex than just finding the colon (:) symbol due to IPv6 -/// We do not use the collect pattern as there is no way in std rust to collect over a stack allocated value -/// check here: https://docs.rs/collect_slice/1.2.0/collect_slice/ -fn authority_to_host(authority: &str) -> Result<&str, Error> { - let mut iter = authority.chars().enumerate(); - let (_, first_char) = iter - .next() - .ok_or_else(|| Error::BadRequest("Authority is empty".to_string()))?; - - let split = match first_char { - '[' => { - let mut iter = iter.skip_while(|(_, c)| c != &']'); - match iter.next() { - Some((_, ']')) => iter.next(), - _ => { - return Err(Error::BadRequest(format!( - "Authority {} has an illegal format", - authority - ))) - } - } - } - _ => iter.find(|(_, c)| *c == ':'), - }; - - match split { - Some((i, ':')) => Ok(&authority[..i]), - None => Ok(authority), - Some((_, _)) => Err(Error::BadRequest(format!( - "Authority {} has an illegal format", - authority - ))), - } -} - -/// Host to bucket -/// -/// Convert a host, like "bucket.garage-site.tld" or "john.doe.com" -/// to the corresponding bucket, resp. "bucket" and "john.doe.com" -/// considering that ".garage-site.tld" is the "root domain". -/// This behavior has been chosen to follow AWS S3 semantic. -fn host_to_bucket<'a>(host: &'a str, root: &str) -> &'a str { - if root.len() >= host.len() || !host.ends_with(root) { - return host; - } - - let len_diff = host.len() - root.len(); - let missing_starting_dot = !root.starts_with('.'); - let cursor = if missing_starting_dot { - len_diff - 1 - } else { - len_diff - }; - &host[..cursor] -} - /// Path to key /// /// Convert the provided path to the internal key @@ -201,54 +141,6 @@ mod tests { use super::*; #[test] - fn authority_to_host_with_port() -> Result<(), Error> { - let domain = authority_to_host("[::1]:3902")?; - assert_eq!(domain, "[::1]"); - let domain2 = authority_to_host("garage.tld:65200")?; - assert_eq!(domain2, "garage.tld"); - let domain3 = authority_to_host("127.0.0.1:80")?; - assert_eq!(domain3, "127.0.0.1"); - Ok(()) - } - - #[test] - fn authority_to_host_without_port() -> Result<(), Error> { - let domain = authority_to_host("[::1]")?; - assert_eq!(domain, "[::1]"); - let domain2 = authority_to_host("garage.tld")?; - assert_eq!(domain2, "garage.tld"); - let domain3 = authority_to_host("127.0.0.1")?; - assert_eq!(domain3, "127.0.0.1"); - assert!(authority_to_host("[").is_err()); - assert!(authority_to_host("[hello").is_err()); - Ok(()) - } - - #[test] - fn host_to_bucket_test() { - assert_eq!( - host_to_bucket("john.doe.garage.tld", ".garage.tld"), - "john.doe" - ); - - assert_eq!( - host_to_bucket("john.doe.garage.tld", "garage.tld"), - "john.doe" - ); - - assert_eq!(host_to_bucket("john.doe.com", "garage.tld"), "john.doe.com"); - - assert_eq!( - host_to_bucket("john.doe.com", ".garage.tld"), - "john.doe.com" - ); - - assert_eq!(host_to_bucket("garage.tld", "garage.tld"), "garage.tld"); - - assert_eq!(host_to_bucket("garage.tld", ".garage.tld"), "garage.tld"); - } - - #[test] fn path_to_key_test() -> Result<(), Error> { assert_eq!(path_to_key("/file%20.jpg", "index.html")?, "file .jpg"); assert_eq!(path_to_key("/%20t/", "index.html")?, " t/index.html"); |