aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTrinity Pointard <trinity.pointard@gmail.com>2021-11-11 11:26:02 +0100
committerGitea <gitea@fake.local>2021-11-16 15:41:41 +0100
commit9c58ec28d3b23accf782c6eb005b7c3966ec6314 (patch)
treea328628c5fb6c6067dbe06ef7d5af8b5f645d15d
parentcdeb5b4dbb7ed95c7ff19f5d1cccdd69b5104c45 (diff)
downloadgarage-9c58ec28d3b23accf782c6eb005b7c3966ec6314.tar.gz
garage-9c58ec28d3b23accf782c6eb005b7c3966ec6314.zip
add support for vhost-style s3 bucket
-rw-r--r--Cargo.lock2
-rw-r--r--src/api/Cargo.toml1
-rw-r--r--src/api/api_server.rs110
-rw-r--r--src/api/helpers.rs114
-rw-r--r--src/api/lib.rs1
-rw-r--r--src/util/config.rs3
-rw-r--r--src/web/Cargo.toml1
-rw-r--r--src/web/web_server.rs114
8 files changed, 224 insertions, 122 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 66cf79c2..58a28ab3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -426,6 +426,7 @@ dependencies = [
"http-range",
"httpdate 0.3.2",
"hyper",
+ "idna",
"log",
"md-5",
"percent-encoding",
@@ -539,7 +540,6 @@ dependencies = [
"garage_util",
"http",
"hyper",
- "idna",
"log",
"percent-encoding",
]
diff --git a/src/api/Cargo.toml b/src/api/Cargo.toml
index ebbe7c0d..f06c67e4 100644
--- a/src/api/Cargo.toml
+++ b/src/api/Cargo.toml
@@ -24,6 +24,7 @@ crypto-mac = "0.10"
err-derive = "0.3"
hex = "0.4"
hmac = "0.10"
+idna = "0.2"
log = "0.4"
md-5 = "0.9"
sha2 = "0.9"
diff --git a/src/api/api_server.rs b/src/api/api_server.rs
index d51b5a28..2217be1a 100644
--- a/src/api/api_server.rs
+++ b/src/api/api_server.rs
@@ -3,6 +3,7 @@ use std::net::SocketAddr;
use std::sync::Arc;
use futures::future::Future;
+use hyper::header;
use hyper::server::conn::AddrStream;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server};
@@ -14,6 +15,7 @@ use garage_model::garage::Garage;
use crate::error::*;
use crate::signature::check_signature;
+use crate::helpers::*;
use crate::s3_bucket::*;
use crate::s3_copy::*;
use crate::s3_delete::*;
@@ -86,7 +88,20 @@ async fn handler_inner(garage: Arc<Garage>, req: Request<Body>) -> Result<Respon
return handle_list_buckets(&api_key);
}
- let (bucket, key) = parse_bucket_key(&path)?;
+ let authority = req
+ .headers()
+ .get(header::HOST)
+ .ok_or_else(|| Error::BadRequest("HOST header required".to_owned()))?
+ .to_str()?;
+
+ // Get bucket
+ let host = authority_to_host(authority)?;
+
+ let (bucket, key) = parse_bucket_key(
+ &path,
+ Some(&host),
+ garage.config.s3_api.root_domain.as_deref(),
+ )?;
let allowed = match req.method() {
&Method::HEAD | &Method::GET => api_key.allow_read(bucket),
_ => api_key.allow_write(bucket),
@@ -137,7 +152,7 @@ async fn handler_inner(garage: Arc<Garage>, req: Request<Body>) -> Result<Respon
let copy_source = req.headers().get("x-amz-copy-source").unwrap().to_str()?;
let copy_source =
percent_encoding::percent_decode_str(copy_source).decode_utf8()?;
- let (source_bucket, source_key) = parse_bucket_key(&copy_source)?;
+ let (source_bucket, source_key) = parse_bucket_key(&copy_source, None, None)?;
if !api_key.allow_read(source_bucket) {
return Err(Error::Forbidden(format!(
"Reading from bucket {} not allowed for this key",
@@ -249,9 +264,23 @@ async fn handler_inner(garage: Arc<Garage>, req: Request<Body>) -> Result<Respon
///
/// S3 internally manages only buckets and keys. This function splits
/// an HTTP path to get the corresponding bucket name and key.
-fn parse_bucket_key(path: &str) -> Result<(&str, Option<&str>), Error> {
+fn parse_bucket_key<'a>(
+ path: &'a str,
+ host: Option<&'a str>,
+ root: Option<&str>,
+) -> Result<(&'a str, Option<&'a str>), Error> {
let path = path.trim_start_matches('/');
+ if host.and(root).is_some() {
+ if let Some(bucket) = host_to_bucket(host.unwrap(), root.unwrap()) {
+ if !path.is_empty() {
+ return Ok((bucket, Some(path)));
+ } else {
+ return Ok((bucket, None));
+ }
+ }
+ }
+
let (bucket, key) = match path.find('/') {
Some(i) => {
let key = &path[i + 1..];
@@ -275,7 +304,7 @@ mod tests {
#[test]
fn parse_bucket_containing_a_key() -> Result<(), Error> {
- let (bucket, key) = parse_bucket_key("/my_bucket/a/super/file.jpg")?;
+ let (bucket, key) = parse_bucket_key("/my_bucket/a/super/file.jpg", None, None)?;
assert_eq!(bucket, "my_bucket");
assert_eq!(key.expect("key must be set"), "a/super/file.jpg");
Ok(())
@@ -283,10 +312,10 @@ mod tests {
#[test]
fn parse_bucket_containing_no_key() -> Result<(), Error> {
- let (bucket, key) = parse_bucket_key("/my_bucket/")?;
+ let (bucket, key) = parse_bucket_key("/my_bucket/", None, None)?;
assert_eq!(bucket, "my_bucket");
assert!(key.is_none());
- let (bucket, key) = parse_bucket_key("/my_bucket")?;
+ let (bucket, key) = parse_bucket_key("/my_bucket", None, None)?;
assert_eq!(bucket, "my_bucket");
assert!(key.is_none());
Ok(())
@@ -294,11 +323,74 @@ mod tests {
#[test]
fn parse_bucket_containing_no_bucket() {
- let parsed = parse_bucket_key("");
+ let parsed = parse_bucket_key("", None, None);
assert!(parsed.is_err());
- let parsed = parse_bucket_key("/");
+ let parsed = parse_bucket_key("/", None, None);
assert!(parsed.is_err());
- let parsed = parse_bucket_key("////");
+ let parsed = parse_bucket_key("////", None, None);
assert!(parsed.is_err());
}
+
+ #[test]
+ fn parse_bucket_with_vhost_and_key() -> Result<(), Error> {
+ let (bucket, key) = parse_bucket_key(
+ "/a/super/file.jpg",
+ Some("my-bucket.garage.tld"),
+ Some("garage.tld"),
+ )?;
+ assert_eq!(bucket, "my-bucket");
+ assert_eq!(key.expect("key must be set"), "a/super/file.jpg");
+
+ let (bucket, key) = parse_bucket_key(
+ "/my_bucket/a/super/file.jpg",
+ Some("not-garage.tld"),
+ Some("garage.tld"),
+ )?;
+ assert_eq!(bucket, "my_bucket");
+ assert_eq!(key.expect("key must be set"), "a/super/file.jpg");
+ Ok(())
+ }
+
+ #[test]
+ fn parse_bucket_with_vhost_no_key() -> Result<(), Error> {
+ let (bucket, key) = parse_bucket_key("", Some("my-bucket.garage.tld"), Some("garage.tld"))?;
+ assert_eq!(bucket, "my-bucket");
+ assert!(key.is_none());
+ let (bucket, key) =
+ parse_bucket_key("/", Some("my-bucket.garage.tld"), Some("garage.tld"))?;
+ assert_eq!(bucket, "my-bucket");
+ assert!(key.is_none());
+ Ok(())
+ }
+
+ #[test]
+ fn parse_bucket_missmatch_vhost() {
+ let test_vec = [
+ "/my_bucket/a/super/file.jpg",
+ "/my_bucket/",
+ "/my_bucket",
+ "",
+ "/",
+ "////",
+ ];
+ let eq = |l, r| match (l, r) {
+ (Ok(l), Ok(r)) => l == r,
+ (Err(_), Err(_)) => true,
+ _ => false,
+ };
+ for test in test_vec {
+ assert!(eq(
+ parse_bucket_key(test, None, None),
+ parse_bucket_key(test, Some("bucket.garage.tld"), None)
+ ));
+ assert!(eq(
+ parse_bucket_key(test, None, None),
+ parse_bucket_key(test, None, Some("garage.tld"))
+ ));
+ assert!(eq(
+ parse_bucket_key(test, None, None),
+ parse_bucket_key(test, Some("not-garage.tld"), Some("garage.tld"))
+ ));
+ }
+ }
}
diff --git a/src/api/helpers.rs b/src/api/helpers.rs
new file mode 100644
index 00000000..9ba32537
--- /dev/null
+++ b/src/api/helpers.rs
@@ -0,0 +1,114 @@
+use crate::Error;
+use idna::domain_to_unicode;
+
+/// Host to bucket
+///
+/// Convert a host, like "bucket.garage-site.tld" or "john.doe.com"
+/// to the corresponding bucket, resp. "bucket" and "john.doe.com"
+/// considering that ".garage-site.tld" is the "root domain".
+/// This behavior has been chosen to follow AWS S3 semantic.
+pub fn host_to_bucket<'a>(host: &'a str, root: &str) -> Option<&'a str> {
+ let root = root.trim_start_matches('.');
+ let label_root = root.chars().filter(|c| c == &'.').count() + 1;
+ let root = root.rsplit('.');
+ let mut host = host.rsplitn(label_root + 1, '.');
+ for root_part in root {
+ let host_part = host.next()?;
+ if root_part != host_part {
+ return None;
+ }
+ }
+ host.next()
+}
+
+/// Extract host from the authority section given by the HTTP host header
+///
+/// The HTTP host contains both a host and a port.
+/// Extracting the port is more complex than just finding the colon (:) symbol due to IPv6
+/// We do not use the collect pattern as there is no way in std rust to collect over a stack allocated value
+/// check here: https://docs.rs/collect_slice/1.2.0/collect_slice/
+pub fn authority_to_host(authority: &str) -> Result<String, Error> {
+ let mut iter = authority.chars().enumerate();
+ let (_, first_char) = iter
+ .next()
+ .ok_or_else(|| Error::BadRequest("Authority is empty".to_string()))?;
+
+ let split = match first_char {
+ '[' => {
+ let mut iter = iter.skip_while(|(_, c)| c != &']');
+ match iter.next() {
+ Some((_, ']')) => iter.next(),
+ _ => {
+ return Err(Error::BadRequest(format!(
+ "Authority {} has an illegal format",
+ authority
+ )))
+ }
+ }
+ }
+ _ => iter.find(|(_, c)| *c == ':'),
+ };
+
+ let authority = match split {
+ Some((i, ':')) => Ok(&authority[..i]),
+ None => Ok(authority),
+ Some((_, _)) => Err(Error::BadRequest(format!(
+ "Authority {} has an illegal format",
+ authority
+ ))),
+ };
+ authority.map(|h| domain_to_unicode(h).0)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn authority_to_host_with_port() -> Result<(), Error> {
+ let domain = authority_to_host("[::1]:3902")?;
+ assert_eq!(domain, "[::1]");
+ let domain2 = authority_to_host("garage.tld:65200")?;
+ assert_eq!(domain2, "garage.tld");
+ let domain3 = authority_to_host("127.0.0.1:80")?;
+ assert_eq!(domain3, "127.0.0.1");
+ Ok(())
+ }
+
+ #[test]
+ fn authority_to_host_without_port() -> Result<(), Error> {
+ let domain = authority_to_host("[::1]")?;
+ assert_eq!(domain, "[::1]");
+ let domain2 = authority_to_host("garage.tld")?;
+ assert_eq!(domain2, "garage.tld");
+ let domain3 = authority_to_host("127.0.0.1")?;
+ assert_eq!(domain3, "127.0.0.1");
+ assert!(authority_to_host("[").is_err());
+ assert!(authority_to_host("[hello").is_err());
+ Ok(())
+ }
+
+ #[test]
+ fn host_to_bucket_test() {
+ assert_eq!(
+ host_to_bucket("john.doe.garage.tld", ".garage.tld").unwrap(),
+ "john.doe"
+ );
+
+ assert_eq!(
+ host_to_bucket("john.doe.garage.tld", "garage.tld").unwrap(),
+ "john.doe"
+ );
+
+ assert_eq!(host_to_bucket("john.doe.com", "garage.tld"), None);
+
+ assert_eq!(host_to_bucket("john.doe.com", ".garage.tld"), None);
+
+ assert_eq!(host_to_bucket("garage.tld", "garage.tld"), None);
+
+ assert_eq!(host_to_bucket("garage.tld", ".garage.tld"), None);
+
+ assert_eq!(host_to_bucket("not-garage.tld", "garage.tld"), None);
+ assert_eq!(host_to_bucket("not-garage.tld", ".garage.tld"), None);
+ }
+}
diff --git a/src/api/lib.rs b/src/api/lib.rs
index 9a23c231..ab8e3dd1 100644
--- a/src/api/lib.rs
+++ b/src/api/lib.rs
@@ -12,6 +12,7 @@ pub use api_server::run_api_server;
mod signature;
+pub mod helpers;
mod s3_bucket;
mod s3_copy;
mod s3_delete;
diff --git a/src/util/config.rs b/src/util/config.rs
index 0e4c76ae..33802012 100644
--- a/src/util/config.rs
+++ b/src/util/config.rs
@@ -68,6 +68,9 @@ pub struct ApiConfig {
pub api_bind_addr: SocketAddr,
/// S3 region to use
pub s3_region: String,
+ /// Suffix to remove from domain name to find bucket. If None,
+ /// vhost-style S3 request are disabled
+ pub root_domain: Option<String>,
}
/// Configuration for serving files as normal web server
diff --git a/src/web/Cargo.toml b/src/web/Cargo.toml
index f5b40370..634ce282 100644
--- a/src/web/Cargo.toml
+++ b/src/web/Cargo.toml
@@ -19,7 +19,6 @@ garage_util = { version = "0.4.0", path = "../util" }
garage_table = { version = "0.4.0", path = "../table" }
err-derive = "0.3"
-idna = "0.2"
log = "0.4"
percent-encoding = "2.1.0"
diff --git a/src/web/web_server.rs b/src/web/web_server.rs
index bff9e71c..e9c5039d 100644
--- a/src/web/web_server.rs
+++ b/src/web/web_server.rs
@@ -9,9 +9,8 @@ use hyper::{
Body, Method, Request, Response, Server,
};
-use idna::domain_to_unicode;
-
use crate::error::*;
+use garage_api::helpers::{authority_to_host, host_to_bucket};
use garage_api::s3_get::{handle_get, handle_head};
use garage_model::bucket_table::*;
use garage_model::garage::Garage;
@@ -75,9 +74,9 @@ async fn serve_file(garage: Arc<Garage>, req: Request<Body>) -> Result<Response<
.to_str()?;
// Get bucket
- let (host, _) = domain_to_unicode(authority_to_host(authority)?);
+ let host = authority_to_host(authority)?;
let root = &garage.config.s3_web.root_domain;
- let bucket = host_to_bucket(&host, root);
+ let bucket = host_to_bucket(&host, root).unwrap_or(&host);
// Check bucket is exposed as a website
let bucket_desc = garage
@@ -108,65 +107,6 @@ async fn serve_file(garage: Arc<Garage>, req: Request<Body>) -> Result<Response<
Ok(res)
}
-/// Extract host from the authority section given by the HTTP host header
-///
-/// The HTTP host contains both a host and a port.
-/// Extracting the port is more complex than just finding the colon (:) symbol due to IPv6
-/// We do not use the collect pattern as there is no way in std rust to collect over a stack allocated value
-/// check here: https://docs.rs/collect_slice/1.2.0/collect_slice/
-fn authority_to_host(authority: &str) -> Result<&str, Error> {
- let mut iter = authority.chars().enumerate();
- let (_, first_char) = iter
- .next()
- .ok_or_else(|| Error::BadRequest("Authority is empty".to_string()))?;
-
- let split = match first_char {
- '[' => {
- let mut iter = iter.skip_while(|(_, c)| c != &']');
- match iter.next() {
- Some((_, ']')) => iter.next(),
- _ => {
- return Err(Error::BadRequest(format!(
- "Authority {} has an illegal format",
- authority
- )))
- }
- }
- }
- _ => iter.find(|(_, c)| *c == ':'),
- };
-
- match split {
- Some((i, ':')) => Ok(&authority[..i]),
- None => Ok(authority),
- Some((_, _)) => Err(Error::BadRequest(format!(
- "Authority {} has an illegal format",
- authority
- ))),
- }
-}
-
-/// Host to bucket
-///
-/// Convert a host, like "bucket.garage-site.tld" or "john.doe.com"
-/// to the corresponding bucket, resp. "bucket" and "john.doe.com"
-/// considering that ".garage-site.tld" is the "root domain".
-/// This behavior has been chosen to follow AWS S3 semantic.
-fn host_to_bucket<'a>(host: &'a str, root: &str) -> &'a str {
- if root.len() >= host.len() || !host.ends_with(root) {
- return host;
- }
-
- let len_diff = host.len() - root.len();
- let missing_starting_dot = !root.starts_with('.');
- let cursor = if missing_starting_dot {
- len_diff - 1
- } else {
- len_diff
- };
- &host[..cursor]
-}
-
/// Path to key
///
/// Convert the provided path to the internal key
@@ -201,54 +141,6 @@ mod tests {
use super::*;
#[test]
- fn authority_to_host_with_port() -> Result<(), Error> {
- let domain = authority_to_host("[::1]:3902")?;
- assert_eq!(domain, "[::1]");
- let domain2 = authority_to_host("garage.tld:65200")?;
- assert_eq!(domain2, "garage.tld");
- let domain3 = authority_to_host("127.0.0.1:80")?;
- assert_eq!(domain3, "127.0.0.1");
- Ok(())
- }
-
- #[test]
- fn authority_to_host_without_port() -> Result<(), Error> {
- let domain = authority_to_host("[::1]")?;
- assert_eq!(domain, "[::1]");
- let domain2 = authority_to_host("garage.tld")?;
- assert_eq!(domain2, "garage.tld");
- let domain3 = authority_to_host("127.0.0.1")?;
- assert_eq!(domain3, "127.0.0.1");
- assert!(authority_to_host("[").is_err());
- assert!(authority_to_host("[hello").is_err());
- Ok(())
- }
-
- #[test]
- fn host_to_bucket_test() {
- assert_eq!(
- host_to_bucket("john.doe.garage.tld", ".garage.tld"),
- "john.doe"
- );
-
- assert_eq!(
- host_to_bucket("john.doe.garage.tld", "garage.tld"),
- "john.doe"
- );
-
- assert_eq!(host_to_bucket("john.doe.com", "garage.tld"), "john.doe.com");
-
- assert_eq!(
- host_to_bucket("john.doe.com", ".garage.tld"),
- "john.doe.com"
- );
-
- assert_eq!(host_to_bucket("garage.tld", "garage.tld"), "garage.tld");
-
- assert_eq!(host_to_bucket("garage.tld", ".garage.tld"), "garage.tld");
- }
-
- #[test]
fn path_to_key_test() -> Result<(), Error> {
assert_eq!(path_to_key("/file%20.jpg", "index.html")?, "file .jpg");
assert_eq!(path_to_key("/%20t/", "index.html")?, " t/index.html");