aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlex <alex@adnab.me>2021-01-15 17:49:49 +0100
committerAlex <alex@adnab.me>2021-01-15 17:49:49 +0100
commit97494b4a190cb16986b743ba1446968a6c180c6e (patch)
tree81edd9c3fab766e83ba8d8bba962aa68e1b911c3 /src
parent8956db2a81c3700b62c5208cff2ca4b0b124c328 (diff)
parent851893a3f299da9eeb0ef3c745be1f30164fd6cf (diff)
downloadgarage-97494b4a190cb16986b743ba1446968a6c180c6e.tar.gz
garage-97494b4a190cb16986b743ba1446968a6c180c6e.zip
Merge pull request 'Support website publishing' (#7) from feature/website into master
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/7
Diffstat (limited to 'src')
-rw-r--r--src/garage/Cargo.toml1
-rw-r--r--src/garage/admin_rpc.rs30
-rw-r--r--src/garage/main.rs18
-rw-r--r--src/garage/server.rs6
-rw-r--r--src/model/bucket_table.rs47
-rw-r--r--src/table/crdt.rs6
-rw-r--r--src/util/config.rs9
-rw-r--r--src/web/Cargo.toml28
-rw-r--r--src/web/error.rs39
-rw-r--r--src/web/lib.rs6
-rw-r--r--src/web/web_server.rs260
11 files changed, 438 insertions, 12 deletions
diff --git a/src/garage/Cargo.toml b/src/garage/Cargo.toml
index cb16bcd4..39288f40 100644
--- a/src/garage/Cargo.toml
+++ b/src/garage/Cargo.toml
@@ -19,6 +19,7 @@ garage_rpc = { version = "0.1", path = "../rpc" }
garage_table = { version = "0.1.1", path = "../table" }
garage_model = { version = "0.1.1", path = "../model" }
garage_api = { version = "0.1.1", path = "../api" }
+garage_web = { version = "0.1", path = "../web" }
bytes = "0.4"
rand = "0.7"
diff --git a/src/garage/admin_rpc.rs b/src/garage/admin_rpc.rs
index a23d3e95..e1981e3a 100644
--- a/src/garage/admin_rpc.rs
+++ b/src/garage/admin_rpc.rs
@@ -89,7 +89,7 @@ impl AdminRpcHandler {
}
bucket
.state
- .update(BucketState::Present(crdt::LWWMap::new()));
+ .update(BucketState::Present(BucketParams::new()));
bucket
}
None => Bucket::new(query.name.clone()),
@@ -155,6 +155,29 @@ impl AdminRpcHandler {
&query.key_id, &query.bucket, allow_read, allow_write
)))
}
+ BucketOperation::Website(query) => {
+ let mut bucket = self.get_existing_bucket(&query.bucket).await?;
+
+ if !(query.allow ^ query.deny) {
+ return Err(Error::Message(format!(
+ "You must specify exactly one flag, either --allow or --deny"
+ )));
+ }
+
+ if let BucketState::Present(state) = bucket.state.get_mut() {
+ state.website.update(query.allow);
+ self.garage.bucket_table.insert(&bucket).await?;
+ let msg = if query.allow {
+ format!("Website access allowed for {}", &query.bucket)
+ } else {
+ format!("Website access denied for {}", &query.bucket)
+ };
+
+ Ok(AdminRPC::Ok(msg.to_string()))
+ } else {
+ unreachable!();
+ }
+ }
}
}
@@ -237,6 +260,7 @@ impl AdminRpcHandler {
.unwrap_or(Err(Error::BadRPC(format!("Key {} does not exist", id))))
}
+ /// Update **bucket table** to inform of the new linked key
async fn update_bucket_key(
&self,
mut bucket: Bucket,
@@ -244,7 +268,8 @@ impl AdminRpcHandler {
allow_read: bool,
allow_write: bool,
) -> Result<(), Error> {
- if let BucketState::Present(ak) = bucket.state.get_mut() {
+ if let BucketState::Present(params) = bucket.state.get_mut() {
+ let ak = &mut params.authorized_keys;
let old_ak = ak.take_and_clear();
ak.merge(&old_ak.update_mutator(
key_id.to_string(),
@@ -262,6 +287,7 @@ impl AdminRpcHandler {
Ok(())
}
+ /// Update **key table** to inform of the new linked bucket
async fn update_key_bucket(
&self,
mut key: Key,
diff --git a/src/garage/main.rs b/src/garage/main.rs
index 1185871f..7c8899a0 100644
--- a/src/garage/main.rs
+++ b/src/garage/main.rs
@@ -141,6 +141,24 @@ pub enum BucketOperation {
/// Allow key to read or write to bucket
#[structopt(name = "deny")]
Deny(PermBucketOpt),
+
+ /// Expose as website or not
+ #[structopt(name = "website")]
+ Website(WebsiteOpt),
+}
+
+#[derive(Serialize, Deserialize, StructOpt, Debug)]
+pub struct WebsiteOpt {
+ /// Create
+ #[structopt(long = "allow")]
+ pub allow: bool,
+
+ /// Delete
+ #[structopt(long = "deny")]
+ pub deny: bool,
+
+ /// Bucket name
+ pub bucket: String,
}
#[derive(Serialize, Deserialize, StructOpt, Debug)]
diff --git a/src/garage/server.rs b/src/garage/server.rs
index 6caea5eb..ec78c067 100644
--- a/src/garage/server.rs
+++ b/src/garage/server.rs
@@ -11,6 +11,7 @@ use garage_util::error::Error;
use garage_api::api_server;
use garage_model::garage::Garage;
use garage_rpc::rpc_server::RpcServer;
+use garage_web::web_server;
use crate::admin_rpc::*;
@@ -56,6 +57,7 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
info!("Initializing RPC and API servers...");
let run_rpc_server = Arc::new(rpc_server).run(wait_from(watch_cancel.clone()));
let api_server = api_server::run_api_server(garage.clone(), wait_from(watch_cancel.clone()));
+ let web_server = web_server::run_web_server(garage.clone(), wait_from(watch_cancel.clone()));
futures::try_join!(
garage
@@ -78,6 +80,10 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
info!("API server exited");
rv
}),
+ web_server.map(|rv| {
+ info!("Web server exited");
+ rv
+ }),
background.run().map(|rv| {
info!("Background runner exited");
Ok(rv)
diff --git a/src/model/bucket_table.rs b/src/model/bucket_table.rs
index 11f103ff..a101555f 100644
--- a/src/model/bucket_table.rs
+++ b/src/model/bucket_table.rs
@@ -13,6 +13,11 @@ use crate::key_table::PermissionSet;
// We use them to perform migrations.
use model010::bucket_table as prev;
+/// A bucket is a collection of objects
+///
+/// Its parameters are not directly accessible as:
+/// - It must be possible to merge paramaters, hence the use of a LWW CRDT.
+/// - A bucket has 2 states, Present or Deleted and parameters make sense only if present.
#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
pub struct Bucket {
// Primary key
@@ -24,27 +29,49 @@ pub struct Bucket {
#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
pub enum BucketState {
Deleted,
- Present(crdt::LWWMap<String, PermissionSet>),
+ Present(BucketParams),
}
impl CRDT for BucketState {
fn merge(&mut self, o: &Self) {
match o {
BucketState::Deleted => *self = BucketState::Deleted,
- BucketState::Present(other_ak) => {
- if let BucketState::Present(ak) = self {
- ak.merge(other_ak);
+ BucketState::Present(other_params) => {
+ if let BucketState::Present(params) = self {
+ params.merge(other_params);
}
}
}
}
}
+#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
+pub struct BucketParams {
+ pub authorized_keys: crdt::LWWMap<String, PermissionSet>,
+ pub website: crdt::LWW<bool>,
+}
+
+impl CRDT for BucketParams {
+ fn merge(&mut self, o: &Self) {
+ self.authorized_keys.merge(&o.authorized_keys);
+ self.website.merge(&o.website);
+ }
+}
+
+impl BucketParams {
+ pub fn new() -> Self {
+ BucketParams {
+ authorized_keys: crdt::LWWMap::new(),
+ website: crdt::LWW::new(false),
+ }
+ }
+}
+
impl Bucket {
pub fn new(name: String) -> Self {
Bucket {
name,
- state: crdt::LWW::new(BucketState::Present(crdt::LWWMap::new())),
+ state: crdt::LWW::new(BucketState::Present(BucketParams::new())),
}
}
pub fn is_deleted(&self) -> bool {
@@ -53,7 +80,7 @@ impl Bucket {
pub fn authorized_keys(&self) -> &[(String, u64, PermissionSet)] {
match self.state.get() {
BucketState::Deleted => &[],
- BucketState::Present(ak) => ak.items(),
+ BucketState::Present(state) => state.authorized_keys.items(),
}
}
}
@@ -110,9 +137,15 @@ impl TableSchema for BucketTable {
},
));
}
+
+ let params = BucketParams {
+ authorized_keys: keys,
+ website: crdt::LWW::new(false),
+ };
+
Some(Bucket {
name: old.name,
- state: crdt::LWW::migrate_from_raw(old.timestamp, BucketState::Present(keys)),
+ state: crdt::LWW::migrate_from_raw(old.timestamp, BucketState::Present(params)),
})
}
}
diff --git a/src/table/crdt.rs b/src/table/crdt.rs
index 386e478b..4cba10ce 100644
--- a/src/table/crdt.rs
+++ b/src/table/crdt.rs
@@ -239,7 +239,7 @@ where
///
/// Typically, to update the value associated to a key in the map, you would do the following:
///
- /// ```
+ /// ```ignore
/// let my_update = my_crdt.update_mutator(key_to_modify, new_value);
/// my_crdt.merge(&my_update);
/// ```
@@ -261,7 +261,7 @@ where
/// empty map. This is very usefull to produce in-place a new map that contains only a delta
/// that modifies a certain value:
///
- /// ```
+ /// ```ignore
/// let mut a = get_my_crdt_value();
/// let old_a = a.take_and_clear();
/// a.merge(&old_a.update_mutator(key_to_modify, new_value));
@@ -273,7 +273,7 @@ where
/// but in the case where the map is a field in a struct for instance (as is always the case),
/// this becomes very handy:
///
- /// ```
+ /// ```ignore
/// let mut a = get_my_crdt_value();
/// let old_a_map = a.map_field.take_and_clear();
/// a.map_field.merge(&old_a_map.update_mutator(key_to_modify, new_value));
diff --git a/src/util/config.rs b/src/util/config.rs
index b985114d..f4c841b7 100644
--- a/src/util/config.rs
+++ b/src/util/config.rs
@@ -35,6 +35,8 @@ pub struct Config {
pub rpc_tls: Option<TlsConfig>,
pub s3_api: ApiConfig,
+
+ pub s3_web: WebConfig,
}
#[derive(Deserialize, Debug, Clone)]
@@ -50,6 +52,13 @@ pub struct ApiConfig {
pub s3_region: String,
}
+#[derive(Deserialize, Debug, Clone)]
+pub struct WebConfig {
+ pub bind_addr: SocketAddr,
+ pub root_domain: String,
+ pub index: String,
+}
+
fn default_max_concurrent_rpc_requests() -> usize {
12
}
diff --git a/src/web/Cargo.toml b/src/web/Cargo.toml
new file mode 100644
index 00000000..751b9ace
--- /dev/null
+++ b/src/web/Cargo.toml
@@ -0,0 +1,28 @@
+[package]
+name = "garage_web"
+version = "0.1.0"
+authors = ["Alex Auvolat <alex@adnab.me>", "Quentin Dufour <quentin@dufour.io>"]
+edition = "2018"
+license = "GPL-3.0"
+description = "S3-like website endpoint crate for the Garage object store"
+repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
+
+[lib]
+path = "lib.rs"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+garage_util = { version = "0.1", path = "../util" }
+garage_table = { version = "0.1.1", path = "../table" }
+garage_model = { version = "0.1.1", path = "../model" }
+garage_api = { version = "0.1.1", path = "../api" }
+
+err-derive = "0.2.3"
+log = "0.4"
+futures = "0.3"
+http = "0.2"
+hyper = "0.13"
+percent-encoding = "2.1.0"
+roxmltree = "0.11"
+idna = "0.2"
diff --git a/src/web/error.rs b/src/web/error.rs
new file mode 100644
index 00000000..14bc3b75
--- /dev/null
+++ b/src/web/error.rs
@@ -0,0 +1,39 @@
+use err_derive::Error;
+use hyper::StatusCode;
+
+use garage_util::error::Error as GarageError;
+
+#[derive(Debug, Error)]
+pub enum Error {
+ #[error(display = "API error: {}", _0)]
+ ApiError(#[error(source)] garage_api::error::Error),
+
+ // Category: internal error
+ #[error(display = "Internal error: {}", _0)]
+ InternalError(#[error(source)] GarageError),
+
+ #[error(display = "Not found")]
+ NotFound,
+
+ // Category: bad request
+ #[error(display = "Invalid UTF-8: {}", _0)]
+ InvalidUTF8(#[error(source)] std::str::Utf8Error),
+
+ #[error(display = "Invalid header value: {}", _0)]
+ InvalidHeader(#[error(source)] hyper::header::ToStrError),
+
+ #[error(display = "Bad request: {}", _0)]
+ BadRequest(String),
+}
+
+impl Error {
+ pub fn http_status_code(&self) -> StatusCode {
+ match self {
+ Error::NotFound => StatusCode::NOT_FOUND,
+ Error::ApiError(e) => e.http_status_code(),
+ Error::InternalError(GarageError::RPC(_)) => StatusCode::SERVICE_UNAVAILABLE,
+ Error::InternalError(_) => StatusCode::INTERNAL_SERVER_ERROR,
+ _ => StatusCode::BAD_REQUEST,
+ }
+ }
+}
diff --git a/src/web/lib.rs b/src/web/lib.rs
new file mode 100644
index 00000000..f28937b9
--- /dev/null
+++ b/src/web/lib.rs
@@ -0,0 +1,6 @@
+#[macro_use]
+extern crate log;
+
+pub mod error;
+
+pub mod web_server;
diff --git a/src/web/web_server.rs b/src/web/web_server.rs
new file mode 100644
index 00000000..24d111a9
--- /dev/null
+++ b/src/web/web_server.rs
@@ -0,0 +1,260 @@
+use std::{borrow::Cow, convert::Infallible, net::SocketAddr, sync::Arc};
+
+use futures::future::Future;
+
+use hyper::{
+ header::HOST,
+ server::conn::AddrStream,
+ service::{make_service_fn, service_fn},
+ Body, Method, Request, Response, Server,
+};
+
+use idna::domain_to_unicode;
+
+use crate::error::*;
+use garage_api::s3_get::{handle_get, handle_head};
+use garage_model::bucket_table::*;
+use garage_model::garage::Garage;
+use garage_table::*;
+use garage_util::error::Error as GarageError;
+
+pub async fn run_web_server(
+ garage: Arc<Garage>,
+ shutdown_signal: impl Future<Output = ()>,
+) -> Result<(), GarageError> {
+ let addr = &garage.config.s3_web.bind_addr;
+
+ let service = make_service_fn(|conn: &AddrStream| {
+ let garage = garage.clone();
+ let client_addr = conn.remote_addr();
+ async move {
+ Ok::<_, Error>(service_fn(move |req: Request<Body>| {
+ let garage = garage.clone();
+ handle_request(garage, req, client_addr)
+ }))
+ }
+ });
+
+ let server = Server::bind(&addr).serve(service);
+ let graceful = server.with_graceful_shutdown(shutdown_signal);
+ info!("Web server listening on http://{}", addr);
+
+ graceful.await?;
+ Ok(())
+}
+
+async fn handle_request(
+ garage: Arc<Garage>,
+ req: Request<Body>,
+ addr: SocketAddr,
+) -> Result<Response<Body>, Infallible> {
+ info!("{} {} {}", addr, req.method(), req.uri());
+ let res = serve_file(garage, req).await;
+ match &res {
+ Ok(r) => debug!("{} {:?}", r.status(), r.headers()),
+ Err(e) => warn!("Response: error {}, {}", e.http_status_code(), e),
+ }
+
+ Ok(res.unwrap_or_else(error_to_res))
+}
+
+fn error_to_res(e: Error) -> Response<Body> {
+ let body: Body = Body::from(format!("{}\n", e));
+ let mut http_error = Response::new(body);
+ *http_error.status_mut() = e.http_status_code();
+ http_error
+}
+
+async fn serve_file(garage: Arc<Garage>, req: Request<Body>) -> Result<Response<Body>, Error> {
+ // Get http authority string (eg. [::1]:3902 or garage.tld:80)
+ let authority = req
+ .headers()
+ .get(HOST)
+ .ok_or(Error::BadRequest(format!("HOST header required")))?
+ .to_str()?;
+
+ // Get bucket
+ let (host, _) = domain_to_unicode(authority_to_host(authority)?);
+ let root = &garage.config.s3_web.root_domain;
+ let bucket = host_to_bucket(&host, root);
+
+ // Check bucket is exposed as a website
+ let bucket_desc = garage
+ .bucket_table
+ .get(&EmptyKey, &bucket.to_string())
+ .await?
+ .filter(|b| !b.is_deleted())
+ .ok_or(Error::NotFound)?;
+
+ match bucket_desc.state.get() {
+ BucketState::Present(params) if *params.website.get() => Ok(()),
+ _ => Err(Error::NotFound),
+ }?;
+
+ // Get path
+ let path = req.uri().path().to_string();
+ let index = &garage.config.s3_web.index;
+ let key = path_to_key(&path, &index)?;
+
+ info!("Selected bucket: \"{}\", selected key: \"{}\"", bucket, key);
+
+ let res = match req.method() {
+ &Method::HEAD => handle_head(garage, &bucket, &key).await?,
+ &Method::GET => handle_get(garage, &req, bucket, &key).await?,
+ _ => return Err(Error::BadRequest(format!("HTTP method not supported"))),
+ };
+
+ Ok(res)
+}
+
+/// Extract host from the authority section given by the HTTP host header
+///
+/// The HTTP host contains both a host and a port.
+/// Extracting the port is more complex than just finding the colon (:) symbol due to IPv6
+/// We do not use the collect pattern as there is no way in std rust to collect over a stack allocated value
+/// check here: https://docs.rs/collect_slice/1.2.0/collect_slice/
+fn authority_to_host(authority: &str) -> Result<&str, Error> {
+ let mut iter = authority.chars().enumerate();
+ let (_, first_char) = iter
+ .next()
+ .ok_or(Error::BadRequest(format!("Authority is empty")))?;
+
+ let split = match first_char {
+ '[' => {
+ let mut iter = iter.skip_while(|(_, c)| c != &']');
+ match iter.next() {
+ Some((_, ']')) => iter.next(),
+ _ => {
+ return Err(Error::BadRequest(format!(
+ "Authority {} has an illegal format",
+ authority
+ )))
+ }
+ }
+ }
+ _ => iter.skip_while(|(_, c)| c != &':').next(),
+ };
+
+ match split {
+ Some((i, ':')) => Ok(&authority[..i]),
+ None => Ok(authority),
+ Some((_, _)) => Err(Error::BadRequest(format!(
+ "Authority {} has an illegal format",
+ authority
+ ))),
+ }
+}
+
+/// Host to bucket
+///
+/// Convert a host, like "bucket.garage-site.tld" or "john.doe.com"
+/// to the corresponding bucket, resp. "bucket" and "john.doe.com"
+/// considering that ".garage-site.tld" is the "root domain".
+/// This behavior has been chosen to follow AWS S3 semantic.
+fn host_to_bucket<'a>(host: &'a str, root: &str) -> &'a str {
+ if root.len() >= host.len() || !host.ends_with(root) {
+ return host;
+ }
+
+ let len_diff = host.len() - root.len();
+ let missing_starting_dot = root.chars().next() != Some('.');
+ let cursor = if missing_starting_dot {
+ len_diff - 1
+ } else {
+ len_diff
+ };
+ &host[..cursor]
+}
+
+/// Path to key
+///
+/// Convert the provided path to the internal key
+/// When a path ends with "/", we append the index name to match traditional web server behavior
+/// which is also AWS S3 behavior.
+fn path_to_key<'a>(path: &'a str, index: &str) -> Result<Cow<'a, str>, Error> {
+ let path_utf8 = percent_encoding::percent_decode_str(&path).decode_utf8()?;
+
+ if path_utf8.chars().next() != Some('/') {
+ return Err(Error::BadRequest(format!(
+ "Path must start with a / (slash)"
+ )));
+ }
+
+ match path_utf8.chars().last() {
+ None => unreachable!(),
+ Some('/') => {
+ let mut key = String::with_capacity(path_utf8.len() + index.len());
+ key.push_str(&path_utf8[1..]);
+ key.push_str(index);
+ Ok(key.into())
+ }
+ Some(_) => match path_utf8 {
+ Cow::Borrowed(pu8) => Ok((&pu8[1..]).into()),
+ Cow::Owned(pu8) => Ok((&pu8[1..]).to_string().into()),
+ },
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn authority_to_host_with_port() -> Result<(), Error> {
+ let domain = authority_to_host("[::1]:3902")?;
+ assert_eq!(domain, "[::1]");
+ let domain2 = authority_to_host("garage.tld:65200")?;
+ assert_eq!(domain2, "garage.tld");
+ let domain3 = authority_to_host("127.0.0.1:80")?;
+ assert_eq!(domain3, "127.0.0.1");
+ Ok(())
+ }
+
+ #[test]
+ fn authority_to_host_without_port() -> Result<(), Error> {
+ let domain = authority_to_host("[::1]")?;
+ assert_eq!(domain, "[::1]");
+ let domain2 = authority_to_host("garage.tld")?;
+ assert_eq!(domain2, "garage.tld");
+ let domain3 = authority_to_host("127.0.0.1")?;
+ assert_eq!(domain3, "127.0.0.1");
+ assert!(authority_to_host("[").is_err());
+ assert!(authority_to_host("[hello").is_err());
+ Ok(())
+ }
+
+ #[test]
+ fn host_to_bucket_test() {
+ assert_eq!(
+ host_to_bucket("john.doe.garage.tld", ".garage.tld"),
+ "john.doe"
+ );
+
+ assert_eq!(
+ host_to_bucket("john.doe.garage.tld", "garage.tld"),
+ "john.doe"
+ );
+
+ assert_eq!(host_to_bucket("john.doe.com", "garage.tld"), "john.doe.com");
+
+ assert_eq!(
+ host_to_bucket("john.doe.com", ".garage.tld"),
+ "john.doe.com"
+ );
+
+ assert_eq!(host_to_bucket("garage.tld", "garage.tld"), "garage.tld");
+
+ assert_eq!(host_to_bucket("garage.tld", ".garage.tld"), "garage.tld");
+ }
+
+ #[test]
+ fn path_to_key_test() -> Result<(), Error> {
+ assert_eq!(path_to_key("/file%20.jpg", "index.html")?, "file .jpg");
+ assert_eq!(path_to_key("/%20t/", "index.html")?, " t/index.html");
+ assert_eq!(path_to_key("/", "index.html")?, "index.html");
+ assert_eq!(path_to_key("/hello", "index.html")?, "hello");
+ assert!(path_to_key("", "index.html").is_err());
+ assert!(path_to_key("i/am/relative", "index.html").is_err());
+ Ok(())
+ }
+}