aboutsummaryrefslogtreecommitdiff
path: root/src/web
diff options
context:
space:
mode:
authorAlex <alex@adnab.me>2021-01-15 17:49:49 +0100
committerAlex <alex@adnab.me>2021-01-15 17:49:49 +0100
commit97494b4a190cb16986b743ba1446968a6c180c6e (patch)
tree81edd9c3fab766e83ba8d8bba962aa68e1b911c3 /src/web
parent8956db2a81c3700b62c5208cff2ca4b0b124c328 (diff)
parent851893a3f299da9eeb0ef3c745be1f30164fd6cf (diff)
downloadgarage-97494b4a190cb16986b743ba1446968a6c180c6e.tar.gz
garage-97494b4a190cb16986b743ba1446968a6c180c6e.zip
Merge pull request 'Support website publishing' (#7) from feature/website into master
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/7
Diffstat (limited to 'src/web')
-rw-r--r--src/web/Cargo.toml28
-rw-r--r--src/web/error.rs39
-rw-r--r--src/web/lib.rs6
-rw-r--r--src/web/web_server.rs260
4 files changed, 333 insertions, 0 deletions
diff --git a/src/web/Cargo.toml b/src/web/Cargo.toml
new file mode 100644
index 00000000..751b9ace
--- /dev/null
+++ b/src/web/Cargo.toml
@@ -0,0 +1,28 @@
+[package]
+name = "garage_web"
+version = "0.1.0"
+authors = ["Alex Auvolat <alex@adnab.me>", "Quentin Dufour <quentin@dufour.io>"]
+edition = "2018"
+license = "GPL-3.0"
+description = "S3-like website endpoint crate for the Garage object store"
+repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
+
+[lib]
+path = "lib.rs"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+garage_util = { version = "0.1", path = "../util" }
+garage_table = { version = "0.1.1", path = "../table" }
+garage_model = { version = "0.1.1", path = "../model" }
+garage_api = { version = "0.1.1", path = "../api" }
+
+err-derive = "0.2.3"
+log = "0.4"
+futures = "0.3"
+http = "0.2"
+hyper = "0.13"
+percent-encoding = "2.1.0"
+roxmltree = "0.11"
+idna = "0.2"
diff --git a/src/web/error.rs b/src/web/error.rs
new file mode 100644
index 00000000..14bc3b75
--- /dev/null
+++ b/src/web/error.rs
@@ -0,0 +1,39 @@
+use err_derive::Error;
+use hyper::StatusCode;
+
+use garage_util::error::Error as GarageError;
+
+#[derive(Debug, Error)]
+pub enum Error {
+ #[error(display = "API error: {}", _0)]
+ ApiError(#[error(source)] garage_api::error::Error),
+
+ // Category: internal error
+ #[error(display = "Internal error: {}", _0)]
+ InternalError(#[error(source)] GarageError),
+
+ #[error(display = "Not found")]
+ NotFound,
+
+ // Category: bad request
+ #[error(display = "Invalid UTF-8: {}", _0)]
+ InvalidUTF8(#[error(source)] std::str::Utf8Error),
+
+ #[error(display = "Invalid header value: {}", _0)]
+ InvalidHeader(#[error(source)] hyper::header::ToStrError),
+
+ #[error(display = "Bad request: {}", _0)]
+ BadRequest(String),
+}
+
+impl Error {
+ pub fn http_status_code(&self) -> StatusCode {
+ match self {
+ Error::NotFound => StatusCode::NOT_FOUND,
+ Error::ApiError(e) => e.http_status_code(),
+ Error::InternalError(GarageError::RPC(_)) => StatusCode::SERVICE_UNAVAILABLE,
+ Error::InternalError(_) => StatusCode::INTERNAL_SERVER_ERROR,
+ _ => StatusCode::BAD_REQUEST,
+ }
+ }
+}
diff --git a/src/web/lib.rs b/src/web/lib.rs
new file mode 100644
index 00000000..f28937b9
--- /dev/null
+++ b/src/web/lib.rs
@@ -0,0 +1,6 @@
+#[macro_use]
+extern crate log;
+
+pub mod error;
+
+pub mod web_server;
diff --git a/src/web/web_server.rs b/src/web/web_server.rs
new file mode 100644
index 00000000..24d111a9
--- /dev/null
+++ b/src/web/web_server.rs
@@ -0,0 +1,260 @@
+use std::{borrow::Cow, convert::Infallible, net::SocketAddr, sync::Arc};
+
+use futures::future::Future;
+
+use hyper::{
+ header::HOST,
+ server::conn::AddrStream,
+ service::{make_service_fn, service_fn},
+ Body, Method, Request, Response, Server,
+};
+
+use idna::domain_to_unicode;
+
+use crate::error::*;
+use garage_api::s3_get::{handle_get, handle_head};
+use garage_model::bucket_table::*;
+use garage_model::garage::Garage;
+use garage_table::*;
+use garage_util::error::Error as GarageError;
+
+pub async fn run_web_server(
+ garage: Arc<Garage>,
+ shutdown_signal: impl Future<Output = ()>,
+) -> Result<(), GarageError> {
+ let addr = &garage.config.s3_web.bind_addr;
+
+ let service = make_service_fn(|conn: &AddrStream| {
+ let garage = garage.clone();
+ let client_addr = conn.remote_addr();
+ async move {
+ Ok::<_, Error>(service_fn(move |req: Request<Body>| {
+ let garage = garage.clone();
+ handle_request(garage, req, client_addr)
+ }))
+ }
+ });
+
+ let server = Server::bind(&addr).serve(service);
+ let graceful = server.with_graceful_shutdown(shutdown_signal);
+ info!("Web server listening on http://{}", addr);
+
+ graceful.await?;
+ Ok(())
+}
+
+async fn handle_request(
+ garage: Arc<Garage>,
+ req: Request<Body>,
+ addr: SocketAddr,
+) -> Result<Response<Body>, Infallible> {
+ info!("{} {} {}", addr, req.method(), req.uri());
+ let res = serve_file(garage, req).await;
+ match &res {
+ Ok(r) => debug!("{} {:?}", r.status(), r.headers()),
+ Err(e) => warn!("Response: error {}, {}", e.http_status_code(), e),
+ }
+
+ Ok(res.unwrap_or_else(error_to_res))
+}
+
+fn error_to_res(e: Error) -> Response<Body> {
+ let body: Body = Body::from(format!("{}\n", e));
+ let mut http_error = Response::new(body);
+ *http_error.status_mut() = e.http_status_code();
+ http_error
+}
+
+async fn serve_file(garage: Arc<Garage>, req: Request<Body>) -> Result<Response<Body>, Error> {
+ // Get http authority string (eg. [::1]:3902 or garage.tld:80)
+ let authority = req
+ .headers()
+ .get(HOST)
+ .ok_or(Error::BadRequest(format!("HOST header required")))?
+ .to_str()?;
+
+ // Get bucket
+ let (host, _) = domain_to_unicode(authority_to_host(authority)?);
+ let root = &garage.config.s3_web.root_domain;
+ let bucket = host_to_bucket(&host, root);
+
+ // Check bucket is exposed as a website
+ let bucket_desc = garage
+ .bucket_table
+ .get(&EmptyKey, &bucket.to_string())
+ .await?
+ .filter(|b| !b.is_deleted())
+ .ok_or(Error::NotFound)?;
+
+ match bucket_desc.state.get() {
+ BucketState::Present(params) if *params.website.get() => Ok(()),
+ _ => Err(Error::NotFound),
+ }?;
+
+ // Get path
+ let path = req.uri().path().to_string();
+ let index = &garage.config.s3_web.index;
+ let key = path_to_key(&path, &index)?;
+
+ info!("Selected bucket: \"{}\", selected key: \"{}\"", bucket, key);
+
+ let res = match req.method() {
+ &Method::HEAD => handle_head(garage, &bucket, &key).await?,
+ &Method::GET => handle_get(garage, &req, bucket, &key).await?,
+ _ => return Err(Error::BadRequest(format!("HTTP method not supported"))),
+ };
+
+ Ok(res)
+}
+
+/// Extract host from the authority section given by the HTTP host header
+///
+/// The HTTP host contains both a host and a port.
+/// Extracting the port is more complex than just finding the colon (:) symbol due to IPv6
+/// We do not use the collect pattern as there is no way in std rust to collect over a stack allocated value
+/// check here: https://docs.rs/collect_slice/1.2.0/collect_slice/
+fn authority_to_host(authority: &str) -> Result<&str, Error> {
+ let mut iter = authority.chars().enumerate();
+ let (_, first_char) = iter
+ .next()
+ .ok_or(Error::BadRequest(format!("Authority is empty")))?;
+
+ let split = match first_char {
+ '[' => {
+ let mut iter = iter.skip_while(|(_, c)| c != &']');
+ match iter.next() {
+ Some((_, ']')) => iter.next(),
+ _ => {
+ return Err(Error::BadRequest(format!(
+ "Authority {} has an illegal format",
+ authority
+ )))
+ }
+ }
+ }
+ _ => iter.skip_while(|(_, c)| c != &':').next(),
+ };
+
+ match split {
+ Some((i, ':')) => Ok(&authority[..i]),
+ None => Ok(authority),
+ Some((_, _)) => Err(Error::BadRequest(format!(
+ "Authority {} has an illegal format",
+ authority
+ ))),
+ }
+}
+
+/// Host to bucket
+///
+/// Convert a host, like "bucket.garage-site.tld" or "john.doe.com"
+/// to the corresponding bucket, resp. "bucket" and "john.doe.com"
+/// considering that ".garage-site.tld" is the "root domain".
+/// This behavior has been chosen to follow AWS S3 semantic.
+fn host_to_bucket<'a>(host: &'a str, root: &str) -> &'a str {
+ if root.len() >= host.len() || !host.ends_with(root) {
+ return host;
+ }
+
+ let len_diff = host.len() - root.len();
+ let missing_starting_dot = root.chars().next() != Some('.');
+ let cursor = if missing_starting_dot {
+ len_diff - 1
+ } else {
+ len_diff
+ };
+ &host[..cursor]
+}
+
+/// Path to key
+///
+/// Convert the provided path to the internal key
+/// When a path ends with "/", we append the index name to match traditional web server behavior
+/// which is also AWS S3 behavior.
+fn path_to_key<'a>(path: &'a str, index: &str) -> Result<Cow<'a, str>, Error> {
+ let path_utf8 = percent_encoding::percent_decode_str(&path).decode_utf8()?;
+
+ if path_utf8.chars().next() != Some('/') {
+ return Err(Error::BadRequest(format!(
+ "Path must start with a / (slash)"
+ )));
+ }
+
+ match path_utf8.chars().last() {
+ None => unreachable!(),
+ Some('/') => {
+ let mut key = String::with_capacity(path_utf8.len() + index.len());
+ key.push_str(&path_utf8[1..]);
+ key.push_str(index);
+ Ok(key.into())
+ }
+ Some(_) => match path_utf8 {
+ Cow::Borrowed(pu8) => Ok((&pu8[1..]).into()),
+ Cow::Owned(pu8) => Ok((&pu8[1..]).to_string().into()),
+ },
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn authority_to_host_with_port() -> Result<(), Error> {
+ let domain = authority_to_host("[::1]:3902")?;
+ assert_eq!(domain, "[::1]");
+ let domain2 = authority_to_host("garage.tld:65200")?;
+ assert_eq!(domain2, "garage.tld");
+ let domain3 = authority_to_host("127.0.0.1:80")?;
+ assert_eq!(domain3, "127.0.0.1");
+ Ok(())
+ }
+
+ #[test]
+ fn authority_to_host_without_port() -> Result<(), Error> {
+ let domain = authority_to_host("[::1]")?;
+ assert_eq!(domain, "[::1]");
+ let domain2 = authority_to_host("garage.tld")?;
+ assert_eq!(domain2, "garage.tld");
+ let domain3 = authority_to_host("127.0.0.1")?;
+ assert_eq!(domain3, "127.0.0.1");
+ assert!(authority_to_host("[").is_err());
+ assert!(authority_to_host("[hello").is_err());
+ Ok(())
+ }
+
+ #[test]
+ fn host_to_bucket_test() {
+ assert_eq!(
+ host_to_bucket("john.doe.garage.tld", ".garage.tld"),
+ "john.doe"
+ );
+
+ assert_eq!(
+ host_to_bucket("john.doe.garage.tld", "garage.tld"),
+ "john.doe"
+ );
+
+ assert_eq!(host_to_bucket("john.doe.com", "garage.tld"), "john.doe.com");
+
+ assert_eq!(
+ host_to_bucket("john.doe.com", ".garage.tld"),
+ "john.doe.com"
+ );
+
+ assert_eq!(host_to_bucket("garage.tld", "garage.tld"), "garage.tld");
+
+ assert_eq!(host_to_bucket("garage.tld", ".garage.tld"), "garage.tld");
+ }
+
+ #[test]
+ fn path_to_key_test() -> Result<(), Error> {
+ assert_eq!(path_to_key("/file%20.jpg", "index.html")?, "file .jpg");
+ assert_eq!(path_to_key("/%20t/", "index.html")?, " t/index.html");
+ assert_eq!(path_to_key("/", "index.html")?, "index.html");
+ assert_eq!(path_to_key("/hello", "index.html")?, "hello");
+ assert!(path_to_key("", "index.html").is_err());
+ assert!(path_to_key("i/am/relative", "index.html").is_err());
+ Ok(())
+ }
+}