diff options
-rw-r--r-- | Cargo.lock | 32 | ||||
-rw-r--r-- | Cargo.toml | 1 | ||||
-rw-r--r-- | README.md | 10 | ||||
-rw-r--r-- | config.dev.toml | 4 | ||||
-rwxr-xr-x | script/dev-cluster.sh | 2 | ||||
-rwxr-xr-x | script/dev-env.sh | 15 | ||||
-rw-r--r-- | src/garage/Cargo.toml | 1 | ||||
-rw-r--r-- | src/garage/server.rs | 6 | ||||
-rw-r--r-- | src/util/config.rs | 9 | ||||
-rw-r--r-- | src/web/Cargo.toml | 47 | ||||
-rw-r--r-- | src/web/error.rs | 55 | ||||
-rw-r--r-- | src/web/lib.rs | 6 | ||||
-rw-r--r-- | src/web/web_server.rs | 238 |
13 files changed, 411 insertions, 15 deletions
@@ -425,6 +425,7 @@ dependencies = [ "garage_rpc 0.1.0", "garage_table 0.1.1", "garage_util 0.1.0", + "garage_web", "hex", "log", "pretty_env_logger", @@ -666,6 +667,37 @@ dependencies = [ ] [[package]] +name = "garage_web" +version = "0.1.0" +dependencies = [ + "err-derive", + "futures", + "futures-util", + "garage_api", + "garage_model 0.1.1", + "garage_table 0.1.1", + "garage_util 0.1.0", + "hex", + "http", + "httpdate", + "hyper", + "idna", + "log", + "percent-encoding", + "rand", + "rmp-serde", + "roxmltree", + "rustls", + "serde", + "serde_json", + "sha2", + "sled", + "tokio", + "toml", + "webpki", +] + +[[package]] name = "generator" version = "0.6.21" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -5,6 +5,7 @@ members = [ "src/table", "src/model", "src/api", + "src/web", "src/garage", ] @@ -20,14 +20,14 @@ Our main use case is to provide a distributed storage layer for small-scale self We propose the following quickstart to setup a full dev. environment as quickly as possible: - 1. Setup a rust/cargo environment and install s3cmd. eg. `dnf install rust cargo s3cmd` + 1. Setup a rust/cargo environment and install `awscli`. eg. `dnf install rust cargo awscli` 2. Run `cargo build` to build the project 3. Run `./script/dev-cluster.sh` to launch a test cluster (feel free to read the script) 4. Run `./script/dev-configure.sh` to configure your test cluster with default values (same datacenter, 100 tokens) - 5. Run `./script/dev-bucket.sh` to create a bucket named `éprouvette` and an API key that will be stored in `/tmp/garage.s3` + 5. Run `./script/dev-bucket.sh` to create a bucket named `eprouvette` and an API key that will be stored in `/tmp/garage.s3` 6. Run `source ./script/dev-env.sh` to configure your CLI environment 7. You can use `garage` to manage the cluster. Try `garage --help`. - 8. You can use `s3grg` to add, remove, and delete files. Try `s3grg --help`, `s3grg put /proc/cpuinfo s3://éprouvette/cpuinfo.txt`, `s3grg ls s3://éprouvette`. `s3grg` is a wrapper on `s3cmd` configured with the previously generated API key (the one in `/tmp/garage.s3`). + 8. You can use `s3grg` to add, remove, and delete files. Try `s3grg --help`, `s3grg cp /proc/cpuinfo s3://eprouvette/cpuinfo.txt`, `s3grg ls s3://eprouvette`. `s3grg` is a wrapper on the `aws s3` subcommand configured with the previously generated API key (the one in `/tmp/garage.s3`). Now you should be ready to start hacking on garage! @@ -85,7 +85,9 @@ api_bind_addr = "[::1]:3900" # the S3 API port, HTTP without TLS. Add a reverse s3_region = "garage" # set this to anything. S3 API calls will fail if they are not made against the region set here. [s3_web] -web_bind_addr = "[::1]:3902" +bind_addr = "[::1]:3902" +root_domain = ".garage.tld" +index = "index.html" ``` Build Garage using `cargo build --release`. diff --git a/config.dev.toml b/config.dev.toml index 966bee4b..215bc50c 100644 --- a/config.dev.toml +++ b/config.dev.toml @@ -17,4 +17,6 @@ api_bind_addr = "[::1]:3900" # the S3 API port, HTTP without TLS. Add a reverse s3_region = "garage" # set this to anything. S3 API calls will fail if they are not made against the region set here. [s3_web] -web_bind_addr = "[::1]:3902" +bind_addr = "[::1]:3902" +root_domain = ".garage.tld" +index = "index.html" diff --git a/script/dev-cluster.sh b/script/dev-cluster.sh index cfe9be0d..9ad4b6b4 100755 --- a/script/dev-cluster.sh +++ b/script/dev-cluster.sh @@ -41,6 +41,8 @@ s3_region = "garage" # set this to anything. S3 API calls will fail if they a [s3_web] bind_addr = "127.0.0.$count:3902" +root_domain = ".garage.tld" +index = "index.html" EOF echo -en "$LABEL configuration written to $CONF_PATH\n" diff --git a/script/dev-env.sh b/script/dev-env.sh index f5e71004..a2829c73 100755 --- a/script/dev-env.sh +++ b/script/dev-env.sh @@ -6,14 +6,9 @@ GARAGE_DEBUG="${REPO_FOLDER}/target/debug/" GARAGE_RELEASE="${REPO_FOLDER}/target/release/" PATH="${GARAGE_DEBUG}:${GARAGE_RELEASE}:$PATH" -ACCESS_KEY=`cat /tmp/garage.s3 |cut -d' ' -f1` -SECRET_KEY=`cat /tmp/garage.s3 |cut -d' ' -f2` - -alias s3grg="s3cmd \ - --host 127.0.0.1:3900 \ - --host-bucket 127.0.0.1:3900 \ - --access_key=$ACCESS_KEY \ - --secret_key=$SECRET_KEY \ - --region=garage \ - --no-ssl" +export AWS_ACCESS_KEY_ID=`cat /tmp/garage.s3 |cut -d' ' -f1` +export AWS_SECRET_ACCESS_KEY=`cat /tmp/garage.s3 |cut -d' ' -f2` +export AWS_DEFAULT_REGION='garage' +alias s3grg="aws s3 \ + --endpoint-url http://127.0.0.1:3900" diff --git a/src/garage/Cargo.toml b/src/garage/Cargo.toml index cb16bcd4..39288f40 100644 --- a/src/garage/Cargo.toml +++ b/src/garage/Cargo.toml @@ -19,6 +19,7 @@ garage_rpc = { version = "0.1", path = "../rpc" } garage_table = { version = "0.1.1", path = "../table" } garage_model = { version = "0.1.1", path = "../model" } garage_api = { version = "0.1.1", path = "../api" } +garage_web = { version = "0.1", path = "../web" } bytes = "0.4" rand = "0.7" diff --git a/src/garage/server.rs b/src/garage/server.rs index 6caea5eb..ec78c067 100644 --- a/src/garage/server.rs +++ b/src/garage/server.rs @@ -11,6 +11,7 @@ use garage_util::error::Error; use garage_api::api_server; use garage_model::garage::Garage; use garage_rpc::rpc_server::RpcServer; +use garage_web::web_server; use crate::admin_rpc::*; @@ -56,6 +57,7 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { info!("Initializing RPC and API servers..."); let run_rpc_server = Arc::new(rpc_server).run(wait_from(watch_cancel.clone())); let api_server = api_server::run_api_server(garage.clone(), wait_from(watch_cancel.clone())); + let web_server = web_server::run_web_server(garage.clone(), wait_from(watch_cancel.clone())); futures::try_join!( garage @@ -78,6 +80,10 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { info!("API server exited"); rv }), + web_server.map(|rv| { + info!("Web server exited"); + rv + }), background.run().map(|rv| { info!("Background runner exited"); Ok(rv) diff --git a/src/util/config.rs b/src/util/config.rs index b985114d..f4c841b7 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -35,6 +35,8 @@ pub struct Config { pub rpc_tls: Option<TlsConfig>, pub s3_api: ApiConfig, + + pub s3_web: WebConfig, } #[derive(Deserialize, Debug, Clone)] @@ -50,6 +52,13 @@ pub struct ApiConfig { pub s3_region: String, } +#[derive(Deserialize, Debug, Clone)] +pub struct WebConfig { + pub bind_addr: SocketAddr, + pub root_domain: String, + pub index: String, +} + fn default_max_concurrent_rpc_requests() -> usize { 12 } diff --git a/src/web/Cargo.toml b/src/web/Cargo.toml new file mode 100644 index 00000000..0d08fdbf --- /dev/null +++ b/src/web/Cargo.toml @@ -0,0 +1,47 @@ +[package] +name = "garage_web" +version = "0.1.0" +authors = ["Alex Auvolat <alex@adnab.me>", "Quentin Dufour <quentin@dufour.io>"] +edition = "2018" +license = "GPL-3.0" +description = "S3-like website endpoint crate for the Garage object store" +repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage" + +[lib] +path = "lib.rs" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +garage_util = { version = "0.1", path = "../util" } +garage_table = { version = "0.1.1", path = "../table" } +garage_model = { version = "0.1.1", path = "../model" } +garage_api = { version = "0.1.1", path = "../api" } + +rand = "0.7" +hex = "0.3" +sha2 = "0.8" +err-derive = "0.2.3" +log = "0.4" + +sled = "0.31" + +toml = "0.5" +rmp-serde = "0.14.3" +serde = { version = "1.0", default-features = false, features = ["derive", "rc"] } +serde_json = "1.0" + +futures = "0.3" +futures-util = "0.3" +tokio = { version = "0.2", default-features = false, features = ["rt-core", "rt-threaded", "io-driver", "net", "tcp", "time", "macros", "sync", "signal", "fs"] } + +http = "0.2" +hyper = "0.13" +percent-encoding = "2.1.0" +rustls = "0.17" +webpki = "0.21" + +roxmltree = "0.11" +idna = "0.2" + +httpdate = "0.3" diff --git a/src/web/error.rs b/src/web/error.rs new file mode 100644 index 00000000..220bacfe --- /dev/null +++ b/src/web/error.rs @@ -0,0 +1,55 @@ +use err_derive::Error; +use hyper::StatusCode; + +use garage_util::error::Error as GarageError; + +#[derive(Debug, Error)] +pub enum Error { + #[error(display = "API error: {}", _0)] + ApiError(#[error(source)] garage_api::error::Error), + + // Category: internal error + #[error(display = "Internal error: {}", _0)] + InternalError(#[error(source)] GarageError), + + #[error(display = "Internal error (Hyper error): {}", _0)] + Hyper(#[error(source)] hyper::Error), + + #[error(display = "Internal error (HTTP error): {}", _0)] + HTTP(#[error(source)] http::Error), + + // Category: cannot process + #[error(display = "Forbidden: {}", _0)] + Forbidden(String), + + #[error(display = "Not found")] + NotFound, + + // Category: bad request + #[error(display = "Invalid UTF-8: {}", _0)] + InvalidUTF8(#[error(source)] std::str::Utf8Error), + + #[error(display = "Invalid XML: {}", _0)] + InvalidXML(#[error(source)] roxmltree::Error), + + #[error(display = "Invalid header value: {}", _0)] + InvalidHeader(#[error(source)] hyper::header::ToStrError), + + #[error(display = "Bad request: {}", _0)] + BadRequest(String), +} + +impl Error { + pub fn http_status_code(&self) -> StatusCode { + match self { + Error::NotFound => StatusCode::NOT_FOUND, + Error::ApiError(e) => e.http_status_code(), + Error::Forbidden(_) => StatusCode::FORBIDDEN, + Error::InternalError(GarageError::RPC(_)) => StatusCode::SERVICE_UNAVAILABLE, + Error::InternalError(_) | Error::Hyper(_) | Error::HTTP(_) => { + StatusCode::INTERNAL_SERVER_ERROR + } + _ => StatusCode::BAD_REQUEST, + } + } +} diff --git a/src/web/lib.rs b/src/web/lib.rs new file mode 100644 index 00000000..f28937b9 --- /dev/null +++ b/src/web/lib.rs @@ -0,0 +1,6 @@ +#[macro_use] +extern crate log; + +pub mod error; + +pub mod web_server; diff --git a/src/web/web_server.rs b/src/web/web_server.rs new file mode 100644 index 00000000..f8a5cd14 --- /dev/null +++ b/src/web/web_server.rs @@ -0,0 +1,238 @@ +use std::{borrow::Cow, convert::Infallible, net::SocketAddr, sync::Arc}; + +use futures::future::Future; + +use hyper::{ + header::HOST, + server::conn::AddrStream, + service::{make_service_fn, service_fn}, + Body, Method, Request, Response, Server, +}; + +use idna::domain_to_unicode; + +use crate::error::*; +use garage_api::s3_get::{handle_get, handle_head}; +use garage_model::garage::Garage; +use garage_util::error::Error as GarageError; + +pub async fn run_web_server( + garage: Arc<Garage>, + shutdown_signal: impl Future<Output = ()>, +) -> Result<(), GarageError> { + let addr = &garage.config.s3_web.bind_addr; + + let service = make_service_fn(|conn: &AddrStream| { + let garage = garage.clone(); + let client_addr = conn.remote_addr(); + async move { + Ok::<_, Error>(service_fn(move |req: Request<Body>| { + let garage = garage.clone(); + handle_request(garage, req, client_addr) + })) + } + }); + + let server = Server::bind(&addr).serve(service); + let graceful = server.with_graceful_shutdown(shutdown_signal); + info!("Web server listening on http://{}", addr); + + graceful.await?; + Ok(()) +} + +async fn handle_request( + garage: Arc<Garage>, + req: Request<Body>, + addr: SocketAddr, +) -> Result<Response<Body>, Infallible> { + info!("{} {} {}", addr, req.method(), req.uri()); + let res = serve_file(garage, req).await; + match &res { + Ok(r) => debug!("{} {:?}", r.status(), r.headers()), + Err(e) => warn!("Response: error {}, {}", e.http_status_code(), e), + } + + Ok(res.unwrap_or_else(error_to_res)) +} + +fn error_to_res(e: Error) -> Response<Body> { + let body: Body = Body::from(format!("{}\n", e)); + let mut http_error = Response::new(body); + *http_error.status_mut() = e.http_status_code(); + http_error +} + +async fn serve_file(garage: Arc<Garage>, req: Request<Body>) -> Result<Response<Body>, Error> { + // Get http authority string (eg. [::1]:3902 or garage.tld:80) + let authority = req + .headers() + .get(HOST) + .ok_or(Error::BadRequest(format!("HOST header required")))? + .to_str()?; + + // Get bucket + let (host, _) = domain_to_unicode(authority_to_host(authority)?); + let root = &garage.config.s3_web.root_domain; + let bucket = host_to_bucket(&host, root); + + // Get path + let path = req.uri().path().to_string(); + let index = &garage.config.s3_web.index; + let key = path_to_key(&path, &index)?; + + info!("Selected bucket: \"{}\", selected key: \"{}\"", bucket, key); + + let res = match req.method() { + &Method::HEAD => handle_head(garage, &bucket, &key).await?, + &Method::GET => handle_get(garage, &req, bucket, &key).await?, + _ => return Err(Error::BadRequest(format!("HTTP method not supported"))), + }; + + Ok(res) +} + +/// Extract host from the authority section given by the HTTP host header +/// +/// The HTTP host contains both a host and a port. +/// Extracting the port is more complex than just finding the colon (:) symbol due to IPv6 +/// We do not use the collect pattern as there is no way in std rust to collect over a stack allocated value +/// check here: https://docs.rs/collect_slice/1.2.0/collect_slice/ +fn authority_to_host(authority: &str) -> Result<&str, Error> { + let mut iter = authority.chars().enumerate(); + let (_, first_char) = iter + .next() + .ok_or(Error::BadRequest(format!("Authority is empty")))?; + + let split = match first_char { + '[' => { + let mut iter = iter.skip_while(|(_, c)| c != &']'); + iter.next().expect("Authority parsing logic error"); + iter.next() + } + _ => iter.skip_while(|(_, c)| c != &':').next(), + }; + + match split { + Some((i, ':')) => Ok(&authority[..i]), + None => Ok(authority), + Some((_, _)) => Err(Error::BadRequest(format!( + "Authority {} has an illegal format", + authority + ))), + } +} + +/// Host to bucket +/// +/// Convert a host, like "bucket.garage-site.tld" or "john.doe.com" +/// to the corresponding bucket, resp. "bucket" and "john.doe.com" +/// considering that ".garage-site.tld" is the "root domain". +/// This behavior has been chosen to follow AWS S3 semantic. +fn host_to_bucket<'a>(host: &'a str, root: &str) -> &'a str { + if root.len() >= host.len() || !host.ends_with(root) { + return host; + } + + let len_diff = host.len() - root.len(); + let missing_starting_dot = root.chars().next() != Some('.'); + let cursor = if missing_starting_dot { + len_diff - 1 + } else { + len_diff + }; + &host[..cursor] +} + +/// Path to key +/// +/// Convert the provided path to the internal key +/// When a path ends with "/", we append the index name to match traditional web server behavior +/// which is also AWS S3 behavior. +fn path_to_key<'a>(path: &'a str, index: &str) -> Result<Cow<'a, str>, Error> { + let path_utf8 = percent_encoding::percent_decode_str(&path).decode_utf8()?; + + if path_utf8.chars().next() != Some('/') { + return Err(Error::BadRequest(format!( + "Path must start with a / (slash)" + ))); + } + + match path_utf8.chars().last() { + None => Err(Error::BadRequest(format!( + "Path must have at least a character" + ))), + Some('/') => { + let mut key = String::with_capacity(path_utf8.len() + index.len()); + key.push_str(&path_utf8[1..]); + key.push_str(index); + Ok(key.into()) + } + Some(_) => match path_utf8 { + Cow::Borrowed(pu8) => Ok((&pu8[1..]).into()), + Cow::Owned(pu8) => Ok((&pu8[1..]).to_string().into()), + }, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn authority_to_host_with_port() -> Result<(), Error> { + let domain = authority_to_host("[::1]:3902")?; + assert_eq!(domain, "[::1]"); + let domain2 = authority_to_host("garage.tld:65200")?; + assert_eq!(domain2, "garage.tld"); + let domain3 = authority_to_host("127.0.0.1:80")?; + assert_eq!(domain3, "127.0.0.1"); + Ok(()) + } + + #[test] + fn authority_to_host_without_port() -> Result<(), Error> { + let domain = authority_to_host("[::1]")?; + assert_eq!(domain, "[::1]"); + let domain2 = authority_to_host("garage.tld")?; + assert_eq!(domain2, "garage.tld"); + let domain3 = authority_to_host("127.0.0.1")?; + assert_eq!(domain3, "127.0.0.1"); + Ok(()) + } + + #[test] + fn host_to_bucket_test() { + assert_eq!( + host_to_bucket("john.doe.garage.tld", ".garage.tld"), + "john.doe" + ); + + assert_eq!( + host_to_bucket("john.doe.garage.tld", "garage.tld"), + "john.doe" + ); + + assert_eq!(host_to_bucket("john.doe.com", "garage.tld"), "john.doe.com"); + + assert_eq!( + host_to_bucket("john.doe.com", ".garage.tld"), + "john.doe.com" + ); + + assert_eq!(host_to_bucket("garage.tld", "garage.tld"), "garage.tld"); + + assert_eq!(host_to_bucket("garage.tld", ".garage.tld"), "garage.tld"); + } + + #[test] + fn path_to_key_test() -> Result<(), Error> { + assert_eq!(path_to_key("/file%20.jpg", "index.html")?, "file .jpg"); + assert_eq!(path_to_key("/%20t/", "index.html")?, " t/index.html"); + assert_eq!(path_to_key("/", "index.html")?, "index.html"); + assert_eq!(path_to_key("/hello", "index.html")?, "hello"); + assert!(path_to_key("", "index.html").is_err()); + assert!(path_to_key("i/am/relative", "index.html").is_err()); + Ok(()) + } +} |