aboutsummaryrefslogtreecommitdiff
path: root/src/api
diff options
context:
space:
mode:
Diffstat (limited to 'src/api')
-rw-r--r--src/api/Cargo.toml5
-rw-r--r--src/api/admin/api_server.rs2
-rw-r--r--src/api/admin/bucket.rs4
-rw-r--r--src/api/admin/cluster.rs176
-rw-r--r--src/api/common_error.rs8
-rw-r--r--src/api/k2v/index.rs13
-rw-r--r--src/api/s3/copy.rs301
-rw-r--r--src/api/s3/encryption.rs595
-rw-r--r--src/api/s3/error.rs6
-rw-r--r--src/api/s3/get.rs216
-rw-r--r--src/api/s3/list.rs5
-rw-r--r--src/api/s3/mod.rs1
-rw-r--r--src/api/s3/multipart.rs56
-rw-r--r--src/api/s3/post_object.rs42
-rw-r--r--src/api/s3/put.rs178
15 files changed, 1299 insertions, 309 deletions
diff --git a/src/api/Cargo.toml b/src/api/Cargo.toml
index 317031a7..bcf6a537 100644
--- a/src/api/Cargo.toml
+++ b/src/api/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "garage_api"
-version = "0.9.3"
+version = "0.10.0"
authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018"
license = "AGPL-3.0"
@@ -21,7 +21,9 @@ garage_net.workspace = true
garage_util.workspace = true
garage_rpc.workspace = true
+aes-gcm.workspace = true
argon2.workspace = true
+async-compression.workspace = true
async-trait.workspace = true
base64.workspace = true
bytes.workspace = true
@@ -41,6 +43,7 @@ futures.workspace = true
futures-util.workspace = true
tokio.workspace = true
tokio-stream.workspace = true
+tokio-util.workspace = true
form_urlencoded.workspace = true
http.workspace = true
diff --git a/src/api/admin/api_server.rs b/src/api/admin/api_server.rs
index 265639c4..0e4565bb 100644
--- a/src/api/admin/api_server.rs
+++ b/src/api/admin/api_server.rs
@@ -276,7 +276,7 @@ impl ApiHandler for AdminApiServer {
Endpoint::GetClusterLayout => handle_get_cluster_layout(&self.garage).await,
Endpoint::UpdateClusterLayout => handle_update_cluster_layout(&self.garage, req).await,
Endpoint::ApplyClusterLayout => handle_apply_cluster_layout(&self.garage, req).await,
- Endpoint::RevertClusterLayout => handle_revert_cluster_layout(&self.garage, req).await,
+ Endpoint::RevertClusterLayout => handle_revert_cluster_layout(&self.garage).await,
// Keys
Endpoint::ListKeys => handle_list_keys(&self.garage).await,
Endpoint::GetKeyInfo {
diff --git a/src/api/admin/bucket.rs b/src/api/admin/bucket.rs
index cfe8a6c4..ac3cba00 100644
--- a/src/api/admin/bucket.rs
+++ b/src/api/admin/bucket.rs
@@ -123,7 +123,7 @@ async fn bucket_info_results(
.table
.get(&bucket_id, &EmptyKey)
.await?
- .map(|x| x.filtered_values(&garage.system.ring.borrow()))
+ .map(|x| x.filtered_values(&garage.system.cluster_layout()))
.unwrap_or_default();
let mpu_counters = garage
@@ -131,7 +131,7 @@ async fn bucket_info_results(
.table
.get(&bucket_id, &EmptyKey)
.await?
- .map(|x| x.filtered_values(&garage.system.ring.borrow()))
+ .map(|x| x.filtered_values(&garage.system.cluster_layout()))
.unwrap_or_default();
let mut relevant_keys = HashMap::new();
diff --git a/src/api/admin/cluster.rs b/src/api/admin/cluster.rs
index 3876c608..8ce6c5ed 100644
--- a/src/api/admin/cluster.rs
+++ b/src/api/admin/cluster.rs
@@ -1,3 +1,4 @@
+use std::collections::HashMap;
use std::net::SocketAddr;
use std::sync::Arc;
@@ -16,25 +17,95 @@ use crate::admin::error::*;
use crate::helpers::{json_ok_response, parse_json_body};
pub async fn handle_get_cluster_status(garage: &Arc<Garage>) -> Result<Response<ResBody>, Error> {
+ let layout = garage.system.cluster_layout();
+ let mut nodes = garage
+ .system
+ .get_known_nodes()
+ .into_iter()
+ .map(|i| {
+ (
+ i.id,
+ NodeResp {
+ id: hex::encode(i.id),
+ addr: Some(i.addr),
+ hostname: i.status.hostname,
+ is_up: i.is_up,
+ last_seen_secs_ago: i.last_seen_secs_ago,
+ data_partition: i
+ .status
+ .data_disk_avail
+ .map(|(avail, total)| FreeSpaceResp {
+ available: avail,
+ total,
+ }),
+ metadata_partition: i.status.meta_disk_avail.map(|(avail, total)| {
+ FreeSpaceResp {
+ available: avail,
+ total,
+ }
+ }),
+ ..Default::default()
+ },
+ )
+ })
+ .collect::<HashMap<_, _>>();
+
+ for (id, _, role) in layout.current().roles.items().iter() {
+ if let layout::NodeRoleV(Some(r)) = role {
+ let role = NodeRoleResp {
+ id: hex::encode(id),
+ zone: r.zone.to_string(),
+ capacity: r.capacity,
+ tags: r.tags.clone(),
+ };
+ match nodes.get_mut(id) {
+ None => {
+ nodes.insert(
+ *id,
+ NodeResp {
+ id: hex::encode(id),
+ role: Some(role),
+ ..Default::default()
+ },
+ );
+ }
+ Some(n) => {
+ if n.role.is_none() {
+ n.role = Some(role);
+ }
+ }
+ }
+ }
+ }
+
+ for ver in layout.versions.iter().rev().skip(1) {
+ for (id, _, role) in ver.roles.items().iter() {
+ if let layout::NodeRoleV(Some(r)) = role {
+ if !nodes.contains_key(id) && r.capacity.is_some() {
+ nodes.insert(
+ *id,
+ NodeResp {
+ id: hex::encode(id),
+ draining: true,
+ ..Default::default()
+ },
+ );
+ }
+ }
+ }
+ }
+
+ let mut nodes = nodes.into_values().collect::<Vec<_>>();
+ nodes.sort_by(|x, y| x.id.cmp(&y.id));
+
let res = GetClusterStatusResponse {
node: hex::encode(garage.system.id),
garage_version: garage_util::version::garage_version(),
garage_features: garage_util::version::garage_features(),
rust_version: garage_util::version::rust_version(),
db_engine: garage.db.engine(),
- known_nodes: garage
- .system
- .get_known_nodes()
- .into_iter()
- .map(|i| KnownNodeResp {
- id: hex::encode(i.id),
- addr: i.addr,
- is_up: i.is_up,
- last_seen_secs_ago: i.last_seen_secs_ago,
- hostname: i.status.hostname,
- })
- .collect(),
- layout: format_cluster_layout(&garage.system.get_cluster_layout()),
+ layout_version: layout.current().version,
+ nodes,
};
Ok(json_ok_response(&res)?)
@@ -85,13 +156,14 @@ pub async fn handle_connect_cluster_nodes(
}
pub async fn handle_get_cluster_layout(garage: &Arc<Garage>) -> Result<Response<ResBody>, Error> {
- let res = format_cluster_layout(&garage.system.get_cluster_layout());
+ let res = format_cluster_layout(&garage.system.cluster_layout());
Ok(json_ok_response(&res)?)
}
-fn format_cluster_layout(layout: &layout::ClusterLayout) -> GetClusterLayoutResponse {
+fn format_cluster_layout(layout: &layout::LayoutHistory) -> GetClusterLayoutResponse {
let roles = layout
+ .current()
.roles
.items()
.iter()
@@ -105,10 +177,12 @@ fn format_cluster_layout(layout: &layout::ClusterLayout) -> GetClusterLayoutResp
.collect::<Vec<_>>();
let staged_role_changes = layout
- .staging_roles
+ .staging
+ .get()
+ .roles
.items()
.iter()
- .filter(|(k, _, v)| layout.roles.get(k) != Some(v))
+ .filter(|(k, _, v)| layout.current().roles.get(k) != Some(v))
.map(|(k, _, v)| match &v.0 {
None => NodeRoleChange {
id: hex::encode(k),
@@ -126,7 +200,7 @@ fn format_cluster_layout(layout: &layout::ClusterLayout) -> GetClusterLayoutResp
.collect::<Vec<_>>();
GetClusterLayoutResponse {
- version: layout.version,
+ version: layout.current().version,
roles,
staged_role_changes,
}
@@ -155,8 +229,8 @@ struct GetClusterStatusResponse {
garage_features: Option<&'static [&'static str]>,
rust_version: &'static str,
db_engine: String,
- known_nodes: Vec<KnownNodeResp>,
- layout: GetClusterLayoutResponse,
+ layout_version: u64,
+ nodes: Vec<NodeResp>,
}
#[derive(Serialize)]
@@ -190,14 +264,27 @@ struct NodeRoleResp {
tags: Vec<String>,
}
-#[derive(Serialize)]
+#[derive(Serialize, Default)]
+#[serde(rename_all = "camelCase")]
+struct FreeSpaceResp {
+ available: u64,
+ total: u64,
+}
+
+#[derive(Serialize, Default)]
#[serde(rename_all = "camelCase")]
-struct KnownNodeResp {
+struct NodeResp {
id: String,
- addr: SocketAddr,
+ role: Option<NodeRoleResp>,
+ addr: Option<SocketAddr>,
+ hostname: Option<String>,
is_up: bool,
last_seen_secs_ago: Option<u64>,
- hostname: String,
+ draining: bool,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ data_partition: Option<FreeSpaceResp>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ metadata_partition: Option<FreeSpaceResp>,
}
// ---- update functions ----
@@ -208,10 +295,10 @@ pub async fn handle_update_cluster_layout(
) -> Result<Response<ResBody>, Error> {
let updates = parse_json_body::<UpdateClusterLayoutRequest, _, Error>(req).await?;
- let mut layout = garage.system.get_cluster_layout();
+ let mut layout = garage.system.cluster_layout().clone();
- let mut roles = layout.roles.clone();
- roles.merge(&layout.staging_roles);
+ let mut roles = layout.current().roles.clone();
+ roles.merge(&layout.staging.get().roles);
for change in updates {
let node = hex::decode(&change.id).ok_or_bad_request("Invalid node identifier")?;
@@ -232,11 +319,17 @@ pub async fn handle_update_cluster_layout(
};
layout
- .staging_roles
+ .staging
+ .get_mut()
+ .roles
.merge(&roles.update_mutator(node, layout::NodeRoleV(new_role)));
}
- garage.system.update_cluster_layout(&layout).await?;
+ garage
+ .system
+ .layout_manager
+ .update_cluster_layout(&layout)
+ .await?;
let res = format_cluster_layout(&layout);
Ok(json_ok_response(&res)?)
@@ -246,12 +339,16 @@ pub async fn handle_apply_cluster_layout(
garage: &Arc<Garage>,
req: Request<IncomingBody>,
) -> Result<Response<ResBody>, Error> {
- let param = parse_json_body::<ApplyRevertLayoutRequest, _, Error>(req).await?;
+ let param = parse_json_body::<ApplyLayoutRequest, _, Error>(req).await?;
- let layout = garage.system.get_cluster_layout();
+ let layout = garage.system.cluster_layout().clone();
let (layout, msg) = layout.apply_staged_changes(Some(param.version))?;
- garage.system.update_cluster_layout(&layout).await?;
+ garage
+ .system
+ .layout_manager
+ .update_cluster_layout(&layout)
+ .await?;
let res = ApplyClusterLayoutResponse {
message: msg,
@@ -262,13 +359,14 @@ pub async fn handle_apply_cluster_layout(
pub async fn handle_revert_cluster_layout(
garage: &Arc<Garage>,
- req: Request<IncomingBody>,
) -> Result<Response<ResBody>, Error> {
- let param = parse_json_body::<ApplyRevertLayoutRequest, _, Error>(req).await?;
-
- let layout = garage.system.get_cluster_layout();
- let layout = layout.revert_staged_changes(Some(param.version))?;
- garage.system.update_cluster_layout(&layout).await?;
+ let layout = garage.system.cluster_layout().clone();
+ let layout = layout.revert_staged_changes()?;
+ garage
+ .system
+ .layout_manager
+ .update_cluster_layout(&layout)
+ .await?;
let res = format_cluster_layout(&layout);
Ok(json_ok_response(&res)?)
@@ -280,7 +378,7 @@ type UpdateClusterLayoutRequest = Vec<NodeRoleChange>;
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
-struct ApplyRevertLayoutRequest {
+struct ApplyLayoutRequest {
version: u64,
}
diff --git a/src/api/common_error.rs b/src/api/common_error.rs
index 4381f227..c47555d4 100644
--- a/src/api/common_error.rs
+++ b/src/api/common_error.rs
@@ -59,9 +59,7 @@ impl CommonError {
pub fn http_status_code(&self) -> StatusCode {
match self {
CommonError::InternalError(
- GarageError::Timeout
- | GarageError::RemoteError(_)
- | GarageError::Quorum(_, _, _, _),
+ GarageError::Timeout | GarageError::RemoteError(_) | GarageError::Quorum(..),
) => StatusCode::SERVICE_UNAVAILABLE,
CommonError::InternalError(_) | CommonError::Hyper(_) | CommonError::Http(_) => {
StatusCode::INTERNAL_SERVER_ERROR
@@ -80,9 +78,7 @@ impl CommonError {
match self {
CommonError::Forbidden(_) => "AccessDenied",
CommonError::InternalError(
- GarageError::Timeout
- | GarageError::RemoteError(_)
- | GarageError::Quorum(_, _, _, _),
+ GarageError::Timeout | GarageError::RemoteError(_) | GarageError::Quorum(..),
) => "ServiceUnavailable",
CommonError::InternalError(_) | CommonError::Hyper(_) | CommonError::Http(_) => {
"InternalError"
diff --git a/src/api/k2v/index.rs b/src/api/k2v/index.rs
index 822bec44..e3397238 100644
--- a/src/api/k2v/index.rs
+++ b/src/api/k2v/index.rs
@@ -1,9 +1,6 @@
-use std::sync::Arc;
-
use hyper::Response;
use serde::Serialize;
-use garage_rpc::ring::Ring;
use garage_table::util::*;
use garage_model::k2v::item_table::{BYTES, CONFLICTS, ENTRIES, VALUES};
@@ -27,7 +24,11 @@ pub async fn handle_read_index(
let reverse = reverse.unwrap_or(false);
- let ring: Arc<Ring> = garage.system.ring.borrow().clone();
+ let node_id_vec = garage
+ .system
+ .cluster_layout()
+ .all_nongateway_nodes()
+ .to_vec();
let (partition_keys, more, next_start) = read_range(
&garage.k2v.counter_table.table,
@@ -36,7 +37,7 @@ pub async fn handle_read_index(
&start,
&end,
limit,
- Some((DeletedFilter::NotDeleted, ring.layout.node_id_vec.clone())),
+ Some((DeletedFilter::NotDeleted, node_id_vec)),
EnumerationOrder::from_reverse(reverse),
)
.await?;
@@ -55,7 +56,7 @@ pub async fn handle_read_index(
partition_keys: partition_keys
.into_iter()
.map(|part| {
- let vals = part.filtered_values(&ring);
+ let vals = part.filtered_values(&garage.system.cluster_layout());
ReadIndexResponseEntry {
pk: part.sk,
entries: *vals.get(&s_entries).unwrap_or(&0),
diff --git a/src/api/s3/copy.rs b/src/api/s3/copy.rs
index 3c2bd483..2b29ec6d 100644
--- a/src/api/s3/copy.rs
+++ b/src/api/s3/copy.rs
@@ -1,7 +1,7 @@
use std::pin::Pin;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
-use futures::{stream, stream::Stream, StreamExt};
+use futures::{stream, stream::Stream, StreamExt, TryStreamExt};
use md5::{Digest as Md5Digest, Md5};
use bytes::Bytes;
@@ -9,9 +9,11 @@ use hyper::{Request, Response};
use serde::Serialize;
use garage_net::bytes_buf::BytesBuf;
+use garage_net::stream::read_stream_to_end;
use garage_rpc::rpc_helper::OrderTag;
use garage_table::*;
use garage_util::data::*;
+use garage_util::error::Error as GarageError;
use garage_util::time::*;
use garage_model::s3::block_ref_table::*;
@@ -21,11 +23,15 @@ use garage_model::s3::version_table::*;
use crate::helpers::*;
use crate::s3::api_server::{ReqBody, ResBody};
+use crate::s3::encryption::EncryptionParams;
use crate::s3::error::*;
+use crate::s3::get::full_object_byte_stream;
use crate::s3::multipart;
-use crate::s3::put::get_headers;
+use crate::s3::put::{get_headers, save_stream, SaveStreamResult};
use crate::s3::xml::{self as s3_xml, xmlns_tag};
+// -------- CopyObject ---------
+
pub async fn handle_copy(
ctx: ReqCtx,
req: &Request<ReqBody>,
@@ -35,38 +41,114 @@ pub async fn handle_copy(
let source_object = get_copy_source(&ctx, req).await?;
- let ReqCtx {
- garage,
- bucket_id: dest_bucket_id,
- ..
- } = ctx;
-
let (source_version, source_version_data, source_version_meta) =
extract_source_info(&source_object)?;
// Check precondition, e.g. x-amz-copy-source-if-match
copy_precondition.check(source_version, &source_version_meta.etag)?;
+ // Determine encryption parameters
+ let (source_encryption, source_object_headers) =
+ EncryptionParams::check_decrypt_for_copy_source(
+ &ctx.garage,
+ req.headers(),
+ &source_version_meta.encryption,
+ )?;
+ let dest_encryption = EncryptionParams::new_from_headers(&ctx.garage, req.headers())?;
+
+ // Determine headers of destination object
+ let dest_object_headers = match req.headers().get("x-amz-metadata-directive") {
+ Some(v) if v == hyper::header::HeaderValue::from_static("REPLACE") => {
+ get_headers(req.headers())?
+ }
+ _ => source_object_headers.into_owned(),
+ };
+
+ // Do actual object copying
+ let res = if EncryptionParams::is_same(&source_encryption, &dest_encryption) {
+ // If source and dest are both unencrypted, or if the encryption keys
+ // are the same, we can just copy the metadata and link blocks of the
+ // old object from the new object.
+ handle_copy_metaonly(
+ ctx,
+ dest_key,
+ dest_object_headers,
+ dest_encryption,
+ source_version,
+ source_version_data,
+ source_version_meta,
+ )
+ .await?
+ } else {
+ // If source and dest encryption use different keys,
+ // we must decrypt content and re-encrypt, so rewrite all data blocks.
+ handle_copy_reencrypt(
+ ctx,
+ dest_key,
+ dest_object_headers,
+ dest_encryption,
+ source_version,
+ source_version_data,
+ source_encryption,
+ )
+ .await?
+ };
+
+ let last_modified = msec_to_rfc3339(res.version_timestamp);
+ let result = CopyObjectResult {
+ last_modified: s3_xml::Value(last_modified),
+ etag: s3_xml::Value(format!("\"{}\"", res.etag)),
+ };
+ let xml = s3_xml::to_xml_with_header(&result)?;
+
+ let mut resp = Response::builder()
+ .header("Content-Type", "application/xml")
+ .header("x-amz-version-id", hex::encode(res.version_uuid))
+ .header(
+ "x-amz-copy-source-version-id",
+ hex::encode(source_version.uuid),
+ );
+ dest_encryption.add_response_headers(&mut resp);
+ Ok(resp.body(string_body(xml))?)
+}
+
+async fn handle_copy_metaonly(
+ ctx: ReqCtx,
+ dest_key: &str,
+ dest_object_headers: ObjectVersionHeaders,
+ dest_encryption: EncryptionParams,
+ source_version: &ObjectVersion,
+ source_version_data: &ObjectVersionData,
+ source_version_meta: &ObjectVersionMeta,
+) -> Result<SaveStreamResult, Error> {
+ let ReqCtx {
+ garage,
+ bucket_id: dest_bucket_id,
+ ..
+ } = ctx;
+
// Generate parameters for copied object
let new_uuid = gen_uuid();
let new_timestamp = now_msec();
- // Implement x-amz-metadata-directive: REPLACE
- let new_meta = match req.headers().get("x-amz-metadata-directive") {
- Some(v) if v == hyper::header::HeaderValue::from_static("REPLACE") => ObjectVersionMeta {
- headers: get_headers(req.headers())?,
- size: source_version_meta.size,
- etag: source_version_meta.etag.clone(),
- },
- _ => source_version_meta.clone(),
+ let new_meta = ObjectVersionMeta {
+ encryption: dest_encryption.encrypt_headers(dest_object_headers)?,
+ size: source_version_meta.size,
+ etag: source_version_meta.etag.clone(),
};
- let etag = new_meta.etag.to_string();
+ let res = SaveStreamResult {
+ version_uuid: new_uuid,
+ version_timestamp: new_timestamp,
+ etag: new_meta.etag.clone(),
+ };
// Save object copy
match source_version_data {
ObjectVersionData::DeleteMarker => unreachable!(),
ObjectVersionData::Inline(_meta, bytes) => {
+ // bytes is either plaintext before&after or encrypted with the
+ // same keys, so it's ok to just copy it as is
let dest_object_version = ObjectVersion {
uuid: new_uuid,
timestamp: new_timestamp,
@@ -97,7 +179,7 @@ pub async fn handle_copy(
uuid: new_uuid,
timestamp: new_timestamp,
state: ObjectVersionState::Uploading {
- headers: new_meta.headers.clone(),
+ encryption: new_meta.encryption.clone(),
multipart: false,
},
};
@@ -164,23 +246,42 @@ pub async fn handle_copy(
}
}
- let last_modified = msec_to_rfc3339(new_timestamp);
- let result = CopyObjectResult {
- last_modified: s3_xml::Value(last_modified),
- etag: s3_xml::Value(format!("\"{}\"", etag)),
- };
- let xml = s3_xml::to_xml_with_header(&result)?;
+ Ok(res)
+}
- Ok(Response::builder()
- .header("Content-Type", "application/xml")
- .header("x-amz-version-id", hex::encode(new_uuid))
- .header(
- "x-amz-copy-source-version-id",
- hex::encode(source_version.uuid),
- )
- .body(string_body(xml))?)
+async fn handle_copy_reencrypt(
+ ctx: ReqCtx,
+ dest_key: &str,
+ dest_object_headers: ObjectVersionHeaders,
+ dest_encryption: EncryptionParams,
+ source_version: &ObjectVersion,
+ source_version_data: &ObjectVersionData,
+ source_encryption: EncryptionParams,
+) -> Result<SaveStreamResult, Error> {
+ // basically we will read the source data (decrypt if necessary)
+ // and save that in a new object (encrypt if necessary),
+ // by combining the code used in getobject and putobject
+ let source_stream = full_object_byte_stream(
+ ctx.garage.clone(),
+ source_version,
+ source_version_data,
+ source_encryption,
+ );
+
+ save_stream(
+ &ctx,
+ dest_object_headers,
+ dest_encryption,
+ source_stream.map_err(|e| Error::from(GarageError::from(e))),
+ &dest_key.to_string(),
+ None,
+ None,
+ )
+ .await
}
+// -------- UploadPartCopy ---------
+
pub async fn handle_upload_part_copy(
ctx: ReqCtx,
req: &Request<ReqBody>,
@@ -193,7 +294,7 @@ pub async fn handle_upload_part_copy(
let dest_upload_id = multipart::decode_upload_id(upload_id)?;
let dest_key = dest_key.to_string();
- let (source_object, (_, _, mut dest_mpu)) = futures::try_join!(
+ let (source_object, (_, dest_version, mut dest_mpu)) = futures::try_join!(
get_copy_source(&ctx, req),
multipart::get_upload(&ctx, &dest_key, &dest_upload_id)
)?;
@@ -206,6 +307,20 @@ pub async fn handle_upload_part_copy(
// Check precondition on source, e.g. x-amz-copy-source-if-match
copy_precondition.check(source_object_version, &source_version_meta.etag)?;
+ // Determine encryption parameters
+ let (source_encryption, _) = EncryptionParams::check_decrypt_for_copy_source(
+ &garage,
+ req.headers(),
+ &source_version_meta.encryption,
+ )?;
+ let dest_object_encryption = match dest_version.state {
+ ObjectVersionState::Uploading { encryption, .. } => encryption,
+ _ => unreachable!(),
+ };
+ let (dest_encryption, _) =
+ EncryptionParams::check_decrypt(&garage, req.headers(), &dest_object_encryption)?;
+ let same_encryption = EncryptionParams::is_same(&source_encryption, &dest_encryption);
+
// Check source range is valid
let source_range = match req.headers().get("x-amz-copy-source-range") {
Some(range) => {
@@ -227,21 +342,16 @@ pub async fn handle_upload_part_copy(
};
// Check source version is not inlined
- match source_version_data {
- ObjectVersionData::DeleteMarker => unreachable!(),
- ObjectVersionData::Inline(_meta, _bytes) => {
- // This is only for small files, we don't bother handling this.
- // (in AWS UploadPartCopy works for parts at least 5MB which
- // is never the case of an inline object)
- return Err(Error::bad_request(
- "Source object is too small (minimum part size is 5Mb)",
- ));
- }
- ObjectVersionData::FirstBlock(_meta, _first_block_hash) => (),
- };
+ if matches!(source_version_data, ObjectVersionData::Inline(_, _)) {
+ // This is only for small files, we don't bother handling this.
+ // (in AWS UploadPartCopy works for parts at least 5MB which
+ // is never the case of an inline object)
+ return Err(Error::bad_request(
+ "Source object is too small (minimum part size is 5Mb)",
+ ));
+ }
- // Fetch source versin with its block list,
- // and destination version to check part hasn't yet been uploaded
+ // Fetch source version with its block list
let source_version = garage
.version_table
.get(&source_object_version.uuid, &EmptyKey)
@@ -251,7 +361,9 @@ pub async fn handle_upload_part_copy(
// We want to reuse blocks from the source version as much as possible.
// However, we still need to get the data from these blocks
// because we need to know it to calculate the MD5sum of the part
- // which is used as its ETag.
+ // which is used as its ETag. For encrypted sources or destinations,
+ // we must always read(+decrypt) and then write(+encrypt), so we
+ // can never reuse data blocks as is.
// First, calculate what blocks we want to keep,
// and the subrange of the block to take, if the bounds of the
@@ -313,6 +425,8 @@ pub async fn handle_upload_part_copy(
},
false,
);
+ // write an empty version now to be the parent of the block_ref entries
+ garage.version_table.insert(&dest_version).await?;
// Now, actually copy the blocks
let mut md5hasher = Md5::new();
@@ -321,24 +435,44 @@ pub async fn handle_upload_part_copy(
// and extract the subrange if necessary.
// The second returned value is an Option<Hash>, that is Some
// if and only if the block returned is a block that already existed
- // in the Garage data store (thus we don't need to save it again).
+ // in the Garage data store and can be reused as-is instead of having
+ // to save it again. This excludes encrypted source blocks that we had
+ // to decrypt.
let garage2 = garage.clone();
let order_stream = OrderTag::stream();
let source_blocks = stream::iter(blocks_to_copy)
.enumerate()
- .flat_map(|(i, (block_hash, range_to_copy))| {
+ .map(|(i, (block_hash, range_to_copy))| {
let garage3 = garage2.clone();
- stream::once(async move {
- let data = garage3
- .block_manager
- .rpc_get_block(&block_hash, Some(order_stream.order(i as u64)))
+ async move {
+ let stream = source_encryption
+ .get_block(&garage3, &block_hash, Some(order_stream.order(i as u64)))
.await?;
+ let data = read_stream_to_end(stream).await?.into_bytes();
+ // For each item, we return a tuple of:
+ // 1. the full data block (decrypted)
+ // 2. an Option<Hash> that indicates the hash of the block in the block store,
+ // only if it can be re-used as-is in the copied object
match range_to_copy {
- Some(r) => Ok((data.slice(r), None)),
- None => Ok((data, Some(block_hash))),
+ Some(r) => {
+ // If we are taking a subslice of the data, we cannot reuse the block as-is
+ Ok((data.slice(r), None))
+ }
+ None if same_encryption => {
+ // If the data is unencrypted before & after, or if we are using
+ // the same encryption key, we can reuse the stored block, no need
+ // to re-send it to storage nodes.
+ Ok((data, Some(block_hash)))
+ }
+ None => {
+ // If we are decrypting / (re)encrypting with different keys,
+ // we cannot reuse the block as-is
+ Ok((data, None))
+ }
}
- })
+ }
})
+ .buffered(2)
.peekable();
// The defragmenter is a custom stream (defined below) that concatenates
@@ -346,22 +480,33 @@ pub async fn handle_upload_part_copy(
// It returns a series of (Vec<u8>, Option<Hash>).
// When it is done, it returns an empty vec.
// Same as the previous iterator, the Option is Some(_) if and only if
- // it's an existing block of the Garage data store.
+ // it's an existing block of the Garage data store that can be reused.
let mut defragmenter = Defragmenter::new(garage.config.block_size, Box::pin(source_blocks));
let mut current_offset = 0;
let mut next_block = defragmenter.next().await?;
+ // TODO this could be optimized similarly to read_and_put_blocks
+ // low priority because uploadpartcopy is rarely used
loop {
let (data, existing_block_hash) = next_block;
if data.is_empty() {
break;
}
+ let data_len = data.len() as u64;
md5hasher.update(&data[..]);
- let must_upload = existing_block_hash.is_none();
- let final_hash = existing_block_hash.unwrap_or_else(|| blake2sum(&data[..]));
+ let (final_data, must_upload, final_hash) = match existing_block_hash {
+ Some(hash) if same_encryption => (data, false, hash),
+ _ => tokio::task::spawn_blocking(move || {
+ let data_enc = dest_encryption.encrypt_block(data)?;
+ let hash = blake2sum(&data_enc);
+ Ok::<_, Error>((data_enc, true, hash))
+ })
+ .await
+ .unwrap()?,
+ };
dest_version.blocks.clear();
dest_version.blocks.put(
@@ -371,10 +516,10 @@ pub async fn handle_upload_part_copy(
},
VersionBlock {
hash: final_hash,
- size: data.len() as u64,
+ size: data_len,
},
);
- current_offset += data.len() as u64;
+ current_offset += data_len;
let block_ref = BlockRef {
block: final_hash,
@@ -382,36 +527,33 @@ pub async fn handle_upload_part_copy(
deleted: false.into(),
};
- let garage2 = garage.clone();
- let res = futures::try_join!(
+ let (_, _, _, next) = futures::try_join!(
// Thing 1: if the block is not exactly a block that existed before,
// we need to insert that data as a new block.
- async move {
+ async {
if must_upload {
- garage2
+ garage
.block_manager
- .rpc_put_block(final_hash, data, None)
+ .rpc_put_block(final_hash, final_data, dest_encryption.is_encrypted(), None)
.await
} else {
Ok(())
}
},
- async {
- // Thing 2: we need to insert the block in the version
- garage.version_table.insert(&dest_version).await?;
- // Thing 3: we need to add a block reference
- garage.block_ref_table.insert(&block_ref).await
- },
- // Thing 4: we need to prefetch the next block
+ // Thing 2: we need to insert the block in the version
+ garage.version_table.insert(&dest_version),
+ // Thing 3: we need to add a block reference
+ garage.block_ref_table.insert(&block_ref),
+ // Thing 4: we need to read the next block
defragmenter.next(),
)?;
- next_block = res.2;
+ next_block = next;
}
assert_eq!(current_offset, source_range.length);
let data_md5sum = md5hasher.finalize();
- let etag = hex::encode(data_md5sum);
+ let etag = dest_encryption.etag_from_md5(&data_md5sum);
// Put the part's ETag in the Versiontable
dest_mpu.parts.put(
@@ -431,13 +573,14 @@ pub async fn handle_upload_part_copy(
last_modified: s3_xml::Value(msec_to_rfc3339(source_object_version.timestamp)),
})?;
- Ok(Response::builder()
+ let mut resp = Response::builder()
.header("Content-Type", "application/xml")
.header(
"x-amz-copy-source-version-id",
hex::encode(source_object_version.uuid),
- )
- .body(string_body(resp_xml))?)
+ );
+ dest_encryption.add_response_headers(&mut resp);
+ Ok(resp.body(string_body(resp_xml))?)
}
async fn get_copy_source(ctx: &ReqCtx, req: &Request<ReqBody>) -> Result<Object, Error> {
diff --git a/src/api/s3/encryption.rs b/src/api/s3/encryption.rs
new file mode 100644
index 00000000..2b105e90
--- /dev/null
+++ b/src/api/s3/encryption.rs
@@ -0,0 +1,595 @@
+use std::borrow::Cow;
+use std::convert::TryInto;
+use std::pin::Pin;
+
+use aes_gcm::{
+ aead::stream::{DecryptorLE31, EncryptorLE31, StreamLE31},
+ aead::{Aead, AeadCore, KeyInit, OsRng},
+ aes::cipher::crypto_common::rand_core::RngCore,
+ aes::cipher::typenum::Unsigned,
+ Aes256Gcm, Key, Nonce,
+};
+use base64::prelude::*;
+use bytes::Bytes;
+
+use futures::stream::Stream;
+use futures::task;
+use tokio::io::BufReader;
+
+use http::header::{HeaderMap, HeaderName, HeaderValue};
+
+use garage_net::bytes_buf::BytesBuf;
+use garage_net::stream::{stream_asyncread, ByteStream};
+use garage_rpc::rpc_helper::OrderTag;
+use garage_util::data::Hash;
+use garage_util::error::Error as GarageError;
+use garage_util::migrate::Migrate;
+
+use garage_model::garage::Garage;
+use garage_model::s3::object_table::{ObjectVersionEncryption, ObjectVersionHeaders};
+
+use crate::common_error::*;
+use crate::s3::error::Error;
+
+const X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM: HeaderName =
+ HeaderName::from_static("x-amz-server-side-encryption-customer-algorithm");
+const X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY: HeaderName =
+ HeaderName::from_static("x-amz-server-side-encryption-customer-key");
+const X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5: HeaderName =
+ HeaderName::from_static("x-amz-server-side-encryption-customer-key-md5");
+
+const X_AMZ_COPY_SOURCE_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM: HeaderName =
+ HeaderName::from_static("x-amz-copy-source-server-side-encryption-customer-algorithm");
+const X_AMZ_COPY_SOURCE_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY: HeaderName =
+ HeaderName::from_static("x-amz-copy-source-server-side-encryption-customer-key");
+const X_AMZ_COPY_SOURCE_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5: HeaderName =
+ HeaderName::from_static("x-amz-copy-source-server-side-encryption-customer-key-md5");
+
+const CUSTOMER_ALGORITHM_AES256: &[u8] = b"AES256";
+
+type Md5Output = md5::digest::Output<md5::Md5Core>;
+
+type StreamNonceSize = aes_gcm::aead::stream::NonceSize<Aes256Gcm, StreamLE31<Aes256Gcm>>;
+
+// Data blocks are encrypted by smaller chunks of size 4096 bytes,
+// so that data can be streamed when reading.
+// This size has to be known and has to be constant, or data won't be
+// readable anymore. DO NOT CHANGE THIS VALUE.
+const STREAM_ENC_PLAIN_CHUNK_SIZE: usize = 0x1000; // 4096 bytes
+const STREAM_ENC_CYPER_CHUNK_SIZE: usize = STREAM_ENC_PLAIN_CHUNK_SIZE + 16;
+
+#[derive(Clone, Copy)]
+pub enum EncryptionParams {
+ Plaintext,
+ SseC {
+ client_key: Key<Aes256Gcm>,
+ client_key_md5: Md5Output,
+ compression_level: Option<i32>,
+ },
+}
+
+impl EncryptionParams {
+ pub fn is_encrypted(&self) -> bool {
+ !matches!(self, Self::Plaintext)
+ }
+
+ pub fn is_same(a: &Self, b: &Self) -> bool {
+ let relevant_info = |x: &Self| match x {
+ Self::Plaintext => None,
+ Self::SseC {
+ client_key,
+ compression_level,
+ ..
+ } => Some((*client_key, compression_level.is_some())),
+ };
+ relevant_info(a) == relevant_info(b)
+ }
+
+ pub fn new_from_headers(
+ garage: &Garage,
+ headers: &HeaderMap,
+ ) -> Result<EncryptionParams, Error> {
+ let key = parse_request_headers(
+ headers,
+ &X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM,
+ &X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY,
+ &X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5,
+ )?;
+ match key {
+ Some((client_key, client_key_md5)) => Ok(EncryptionParams::SseC {
+ client_key,
+ client_key_md5,
+ compression_level: garage.config.compression_level,
+ }),
+ None => Ok(EncryptionParams::Plaintext),
+ }
+ }
+
+ pub fn add_response_headers(&self, resp: &mut http::response::Builder) {
+ if let Self::SseC { client_key_md5, .. } = self {
+ let md5 = BASE64_STANDARD.encode(&client_key_md5);
+
+ resp.headers_mut().unwrap().insert(
+ X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM,
+ HeaderValue::from_bytes(CUSTOMER_ALGORITHM_AES256).unwrap(),
+ );
+ resp.headers_mut().unwrap().insert(
+ X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5,
+ HeaderValue::from_bytes(md5.as_bytes()).unwrap(),
+ );
+ }
+ }
+
+ pub fn check_decrypt<'a>(
+ garage: &Garage,
+ headers: &HeaderMap,
+ obj_enc: &'a ObjectVersionEncryption,
+ ) -> Result<(Self, Cow<'a, ObjectVersionHeaders>), Error> {
+ let key = parse_request_headers(
+ headers,
+ &X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM,
+ &X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY,
+ &X_AMZ_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5,
+ )?;
+ Self::check_decrypt_common(garage, key, obj_enc)
+ }
+
+ pub fn check_decrypt_for_copy_source<'a>(
+ garage: &Garage,
+ headers: &HeaderMap,
+ obj_enc: &'a ObjectVersionEncryption,
+ ) -> Result<(Self, Cow<'a, ObjectVersionHeaders>), Error> {
+ let key = parse_request_headers(
+ headers,
+ &X_AMZ_COPY_SOURCE_SERVER_SIDE_ENCRYPTION_CUSTOMER_ALGORITHM,
+ &X_AMZ_COPY_SOURCE_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY,
+ &X_AMZ_COPY_SOURCE_SERVER_SIDE_ENCRYPTION_CUSTOMER_KEY_MD5,
+ )?;
+ Self::check_decrypt_common(garage, key, obj_enc)
+ }
+
+ fn check_decrypt_common<'a>(
+ garage: &Garage,
+ key: Option<(Key<Aes256Gcm>, Md5Output)>,
+ obj_enc: &'a ObjectVersionEncryption,
+ ) -> Result<(Self, Cow<'a, ObjectVersionHeaders>), Error> {
+ match (key, &obj_enc) {
+ (
+ Some((client_key, client_key_md5)),
+ ObjectVersionEncryption::SseC {
+ headers,
+ compressed,
+ },
+ ) => {
+ let enc = Self::SseC {
+ client_key,
+ client_key_md5,
+ compression_level: if *compressed {
+ Some(garage.config.compression_level.unwrap_or(1))
+ } else {
+ None
+ },
+ };
+ let plaintext = enc.decrypt_blob(&headers)?;
+ let headers = ObjectVersionHeaders::decode(&plaintext)
+ .ok_or_internal_error("Could not decode encrypted headers")?;
+ Ok((enc, Cow::Owned(headers)))
+ }
+ (None, ObjectVersionEncryption::Plaintext { headers }) => {
+ Ok((Self::Plaintext, Cow::Borrowed(headers)))
+ }
+ (_, ObjectVersionEncryption::SseC { .. }) => {
+ Err(Error::bad_request("Object is encrypted"))
+ }
+ (Some(_), _) => {
+ // TODO: should this be an OK scenario?
+ Err(Error::bad_request("Trying to decrypt a plaintext object"))
+ }
+ }
+ }
+
+ pub fn encrypt_headers(
+ &self,
+ h: ObjectVersionHeaders,
+ ) -> Result<ObjectVersionEncryption, Error> {
+ match self {
+ Self::SseC {
+ compression_level, ..
+ } => {
+ let plaintext = h.encode().map_err(GarageError::from)?;
+ let ciphertext = self.encrypt_blob(&plaintext)?;
+ Ok(ObjectVersionEncryption::SseC {
+ headers: ciphertext.into_owned(),
+ compressed: compression_level.is_some(),
+ })
+ }
+ Self::Plaintext => Ok(ObjectVersionEncryption::Plaintext { headers: h }),
+ }
+ }
+
+ // ---- generating object Etag values ----
+ pub fn etag_from_md5(&self, md5sum: &[u8]) -> String {
+ match self {
+ Self::Plaintext => hex::encode(md5sum),
+ Self::SseC { .. } => {
+ // AWS specifies that for encrypted objects, the Etag is not
+ // the md5sum of the data, but doesn't say what it is.
+ // So we just put some random bytes.
+ let mut random = [0u8; 16];
+ OsRng.fill_bytes(&mut random);
+ hex::encode(&random)
+ }
+ }
+ }
+
+ // ---- generic function for encrypting / decrypting blobs ----
+ // Prepends a randomly-generated nonce to the encrypted value.
+ // This is used for encrypting object headers and inlined data for small objects.
+ // This does not compress anything.
+
+ pub fn encrypt_blob<'a>(&self, blob: &'a [u8]) -> Result<Cow<'a, [u8]>, Error> {
+ match self {
+ Self::SseC { client_key, .. } => {
+ let cipher = Aes256Gcm::new(&client_key);
+ let nonce = Aes256Gcm::generate_nonce(&mut OsRng);
+ let ciphertext = cipher
+ .encrypt(&nonce, blob)
+ .ok_or_internal_error("Encryption failed")?;
+ Ok(Cow::Owned([nonce.to_vec(), ciphertext].concat()))
+ }
+ Self::Plaintext => Ok(Cow::Borrowed(blob)),
+ }
+ }
+
+ pub fn decrypt_blob<'a>(&self, blob: &'a [u8]) -> Result<Cow<'a, [u8]>, Error> {
+ match self {
+ Self::SseC { client_key, .. } => {
+ let cipher = Aes256Gcm::new(&client_key);
+ let nonce_size = <Aes256Gcm as AeadCore>::NonceSize::to_usize();
+ let nonce = Nonce::from_slice(
+ blob.get(..nonce_size)
+ .ok_or_internal_error("invalid encrypted data")?,
+ );
+ let plaintext = cipher
+ .decrypt(nonce, &blob[nonce_size..])
+ .ok_or_bad_request(
+ "Invalid encryption key, could not decrypt object metadata.",
+ )?;
+ Ok(Cow::Owned(plaintext))
+ }
+ Self::Plaintext => Ok(Cow::Borrowed(blob)),
+ }
+ }
+
+ // ---- function for encrypting / decrypting byte streams ----
+
+ /// Get a data block from the storage node, and decrypt+decompress it
+ /// if necessary. If object is plaintext, just get it without any processing.
+ pub async fn get_block(
+ &self,
+ garage: &Garage,
+ hash: &Hash,
+ order: Option<OrderTag>,
+ ) -> Result<ByteStream, GarageError> {
+ let raw_block = garage
+ .block_manager
+ .rpc_get_block_streaming(hash, order)
+ .await?;
+ Ok(self.decrypt_block_stream(raw_block))
+ }
+
+ pub fn decrypt_block_stream(&self, stream: ByteStream) -> ByteStream {
+ match self {
+ Self::Plaintext => stream,
+ Self::SseC {
+ client_key,
+ compression_level,
+ ..
+ } => {
+ let plaintext = DecryptStream::new(stream, *client_key);
+ if compression_level.is_some() {
+ let reader = stream_asyncread(Box::pin(plaintext));
+ let reader = BufReader::new(reader);
+ let reader = async_compression::tokio::bufread::ZstdDecoder::new(reader);
+ Box::pin(tokio_util::io::ReaderStream::new(reader))
+ } else {
+ Box::pin(plaintext)
+ }
+ }
+ }
+ }
+
+ /// Encrypt a data block if encryption is set, for use before
+ /// putting the data blocks into storage
+ pub fn encrypt_block(&self, block: Bytes) -> Result<Bytes, Error> {
+ match self {
+ Self::Plaintext => Ok(block),
+ Self::SseC {
+ client_key,
+ compression_level,
+ ..
+ } => {
+ let block = if let Some(level) = compression_level {
+ Cow::Owned(
+ garage_block::zstd_encode(block.as_ref(), *level)
+ .ok_or_internal_error("failed to compress data block")?,
+ )
+ } else {
+ Cow::Borrowed(block.as_ref())
+ };
+
+ let mut ret = Vec::with_capacity(block.len() + 32 + block.len() / 64);
+
+ let mut nonce: Nonce<StreamNonceSize> = Default::default();
+ OsRng.fill_bytes(&mut nonce);
+ ret.extend_from_slice(nonce.as_slice());
+
+ let mut cipher = EncryptorLE31::<Aes256Gcm>::new(&client_key, &nonce);
+ let mut iter = block.chunks(STREAM_ENC_PLAIN_CHUNK_SIZE).peekable();
+
+ if iter.peek().is_none() {
+ // Empty stream: we encrypt an empty last chunk
+ let chunk_enc = cipher
+ .encrypt_last(&[][..])
+ .ok_or_internal_error("failed to encrypt chunk")?;
+ ret.extend_from_slice(&chunk_enc);
+ } else {
+ loop {
+ let chunk = iter.next().unwrap();
+ if iter.peek().is_some() {
+ let chunk_enc = cipher
+ .encrypt_next(chunk)
+ .ok_or_internal_error("failed to encrypt chunk")?;
+ assert_eq!(chunk.len(), STREAM_ENC_PLAIN_CHUNK_SIZE);
+ assert_eq!(chunk_enc.len(), STREAM_ENC_CYPER_CHUNK_SIZE);
+ ret.extend_from_slice(&chunk_enc);
+ } else {
+ // use encrypt_last for the last chunk
+ let chunk_enc = cipher
+ .encrypt_last(chunk)
+ .ok_or_internal_error("failed to encrypt chunk")?;
+ ret.extend_from_slice(&chunk_enc);
+ break;
+ }
+ }
+ }
+
+ Ok(ret.into())
+ }
+ }
+ }
+}
+
+fn parse_request_headers(
+ headers: &HeaderMap,
+ alg_header: &HeaderName,
+ key_header: &HeaderName,
+ md5_header: &HeaderName,
+) -> Result<Option<(Key<Aes256Gcm>, Md5Output)>, Error> {
+ let alg = headers.get(alg_header).map(HeaderValue::as_bytes);
+ let key = headers.get(key_header).map(HeaderValue::as_bytes);
+ let md5 = headers.get(md5_header).map(HeaderValue::as_bytes);
+
+ match alg {
+ Some(CUSTOMER_ALGORITHM_AES256) => {
+ use md5::{Digest, Md5};
+
+ let key_b64 =
+ key.ok_or_bad_request("Missing server-side-encryption-customer-key header")?;
+ let key_bytes: [u8; 32] = BASE64_STANDARD
+ .decode(&key_b64)
+ .ok_or_bad_request(
+ "Invalid server-side-encryption-customer-key header: invalid base64",
+ )?
+ .try_into()
+ .ok()
+ .ok_or_bad_request(
+ "Invalid server-side-encryption-customer-key header: invalid length",
+ )?;
+
+ let md5_b64 =
+ md5.ok_or_bad_request("Missing server-side-encryption-customer-key-md5 header")?;
+ let md5_bytes = BASE64_STANDARD.decode(&md5_b64).ok_or_bad_request(
+ "Invalid server-side-encryption-customer-key-md5 header: invalid bass64",
+ )?;
+
+ let mut hasher = Md5::new();
+ hasher.update(&key_bytes[..]);
+ let our_md5 = hasher.finalize();
+ if our_md5.as_slice() != md5_bytes.as_slice() {
+ return Err(Error::bad_request(
+ "Server-side encryption client key MD5 checksum does not match",
+ ));
+ }
+
+ Ok(Some((key_bytes.into(), our_md5)))
+ }
+ Some(alg) => Err(Error::InvalidEncryptionAlgorithm(
+ String::from_utf8_lossy(alg).into_owned(),
+ )),
+ None => {
+ if key.is_some() || md5.is_some() {
+ Err(Error::bad_request(
+ "Unexpected server-side-encryption-customer-key{,-md5} header(s)",
+ ))
+ } else {
+ Ok(None)
+ }
+ }
+ }
+}
+
+// ---- encrypt & decrypt streams ----
+
+#[pin_project::pin_project]
+struct DecryptStream {
+ #[pin]
+ stream: ByteStream,
+ done_reading: bool,
+ buf: BytesBuf,
+ key: Key<Aes256Gcm>,
+ state: DecryptStreamState,
+}
+
+enum DecryptStreamState {
+ Starting,
+ Running(DecryptorLE31<Aes256Gcm>),
+ Done,
+}
+
+impl DecryptStream {
+ fn new(stream: ByteStream, key: Key<Aes256Gcm>) -> Self {
+ Self {
+ stream,
+ done_reading: false,
+ buf: BytesBuf::new(),
+ key,
+ state: DecryptStreamState::Starting,
+ }
+ }
+}
+
+impl Stream for DecryptStream {
+ type Item = Result<Bytes, std::io::Error>;
+
+ fn poll_next(
+ self: Pin<&mut Self>,
+ cx: &mut task::Context<'_>,
+ ) -> task::Poll<Option<Self::Item>> {
+ use std::task::Poll;
+
+ let mut this = self.project();
+
+ // The first bytes of the stream should contain the starting nonce.
+ // If we don't have a Running state, it means that we haven't
+ // yet read the nonce.
+ while matches!(this.state, DecryptStreamState::Starting) {
+ let nonce_size = StreamNonceSize::to_usize();
+ if let Some(nonce) = this.buf.take_exact(nonce_size) {
+ let nonce = Nonce::from_slice(nonce.as_ref());
+ *this.state = DecryptStreamState::Running(DecryptorLE31::new(&this.key, nonce));
+ break;
+ }
+
+ match futures::ready!(this.stream.as_mut().poll_next(cx)) {
+ Some(Ok(bytes)) => {
+ this.buf.extend(bytes);
+ }
+ Some(Err(e)) => {
+ return Poll::Ready(Some(Err(e)));
+ }
+ None => {
+ return Poll::Ready(Some(Err(std::io::Error::new(
+ std::io::ErrorKind::UnexpectedEof,
+ "Decrypt: unexpected EOF, could not read nonce",
+ ))));
+ }
+ }
+ }
+
+ // Read at least one byte more than the encrypted chunk size
+ // (if possible), so that we know if we are decrypting the
+ // last chunk or not.
+ while !*this.done_reading && this.buf.len() <= STREAM_ENC_CYPER_CHUNK_SIZE {
+ match futures::ready!(this.stream.as_mut().poll_next(cx)) {
+ Some(Ok(bytes)) => {
+ this.buf.extend(bytes);
+ }
+ Some(Err(e)) => {
+ return Poll::Ready(Some(Err(e)));
+ }
+ None => {
+ *this.done_reading = true;
+ break;
+ }
+ }
+ }
+
+ if matches!(this.state, DecryptStreamState::Done) {
+ if !this.buf.is_empty() {
+ return Poll::Ready(Some(Err(std::io::Error::new(
+ std::io::ErrorKind::Other,
+ "Decrypt: unexpected bytes after last encrypted chunk",
+ ))));
+ }
+ return Poll::Ready(None);
+ }
+
+ let res = if this.buf.len() > STREAM_ENC_CYPER_CHUNK_SIZE {
+ // we have strictly more bytes than the encrypted chunk size,
+ // so we know this is not the last
+ let DecryptStreamState::Running(ref mut cipher) = this.state else {
+ unreachable!()
+ };
+ let chunk = this.buf.take_exact(STREAM_ENC_CYPER_CHUNK_SIZE).unwrap();
+ let chunk_dec = cipher.decrypt_next(chunk.as_ref());
+ if let Ok(c) = &chunk_dec {
+ assert_eq!(c.len(), STREAM_ENC_PLAIN_CHUNK_SIZE);
+ }
+ chunk_dec
+ } else {
+ // We have one encrypted chunk size or less, even though we tried
+ // to read more, so this is the last chunk. Decrypt using the
+ // appropriate decrypt_last() function that then destroys the cipher.
+ let state = std::mem::replace(this.state, DecryptStreamState::Done);
+ let DecryptStreamState::Running(cipher) = state else {
+ unreachable!()
+ };
+ let chunk = this.buf.take_all();
+ cipher.decrypt_last(chunk.as_ref())
+ };
+
+ match res {
+ Ok(bytes) if bytes.is_empty() => Poll::Ready(None),
+ Ok(bytes) => Poll::Ready(Some(Ok(bytes.into()))),
+ Err(_) => Poll::Ready(Some(Err(std::io::Error::new(
+ std::io::ErrorKind::Other,
+ "Decryption failed",
+ )))),
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ use futures::stream::StreamExt;
+ use garage_net::stream::read_stream_to_end;
+
+ fn stream() -> ByteStream {
+ Box::pin(
+ futures::stream::iter(16usize..1024)
+ .map(|i| Ok(Bytes::from(vec![(i % 256) as u8; (i * 37) % 1024]))),
+ )
+ }
+
+ async fn test_block_enc(compression_level: Option<i32>) {
+ let enc = EncryptionParams::SseC {
+ client_key: Aes256Gcm::generate_key(&mut OsRng),
+ client_key_md5: Default::default(), // not needed
+ compression_level,
+ };
+
+ let block_plain = read_stream_to_end(stream()).await.unwrap().into_bytes();
+
+ let block_enc = enc.encrypt_block(block_plain.clone()).unwrap();
+
+ let block_dec =
+ enc.decrypt_block_stream(Box::pin(futures::stream::once(async { Ok(block_enc) })));
+ let block_dec = read_stream_to_end(block_dec).await.unwrap().into_bytes();
+
+ assert_eq!(block_plain, block_dec);
+ assert!(block_dec.len() > 128000);
+ }
+
+ #[tokio::test]
+ async fn test_encrypt_block() {
+ test_block_enc(None).await
+ }
+
+ #[tokio::test]
+ async fn test_encrypt_block_compressed() {
+ test_block_enc(Some(1)).await
+ }
+}
diff --git a/src/api/s3/error.rs b/src/api/s3/error.rs
index f86c19a6..5cb5d04e 100644
--- a/src/api/s3/error.rs
+++ b/src/api/s3/error.rs
@@ -65,6 +65,10 @@ pub enum Error {
#[error(display = "Invalid HTTP range: {:?}", _0)]
InvalidRange(#[error(from)] (http_range::HttpRangeParseError, u64)),
+ /// The client sent a range header with invalid value
+ #[error(display = "Invalid encryption algorithm: {:?}, should be AES256", _0)]
+ InvalidEncryptionAlgorithm(String),
+
/// The client sent a request for an action not supported by garage
#[error(display = "Unimplemented action: {}", _0)]
NotImplemented(String),
@@ -126,6 +130,7 @@ impl Error {
Error::InvalidXml(_) => "MalformedXML",
Error::InvalidRange(_) => "InvalidRange",
Error::InvalidUtf8Str(_) | Error::InvalidUtf8String(_) => "InvalidRequest",
+ Error::InvalidEncryptionAlgorithm(_) => "InvalidEncryptionAlgorithmError",
}
}
}
@@ -143,6 +148,7 @@ impl ApiError for Error {
| Error::InvalidPart
| Error::InvalidPartOrder
| Error::EntityTooSmall
+ | Error::InvalidEncryptionAlgorithm(_)
| Error::InvalidXml(_)
| Error::InvalidUtf8Str(_)
| Error::InvalidUtf8String(_) => StatusCode::BAD_REQUEST,
diff --git a/src/api/s3/get.rs b/src/api/s3/get.rs
index ed996fb1..ec300ab7 100644
--- a/src/api/s3/get.rs
+++ b/src/api/s3/get.rs
@@ -1,10 +1,12 @@
//! Function related to GET and HEAD requests
+use std::collections::BTreeMap;
use std::convert::TryInto;
use std::sync::Arc;
use std::time::{Duration, UNIX_EPOCH};
+use bytes::Bytes;
use futures::future;
-use futures::stream::{self, StreamExt};
+use futures::stream::{self, Stream, StreamExt};
use http::header::{
ACCEPT_RANGES, CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LANGUAGE,
CONTENT_LENGTH, CONTENT_RANGE, CONTENT_TYPE, ETAG, EXPIRES, IF_MODIFIED_SINCE, IF_NONE_MATCH,
@@ -25,6 +27,7 @@ use garage_model::s3::version_table::*;
use crate::helpers::*;
use crate::s3::api_server::ResBody;
+use crate::s3::encryption::EncryptionParams;
use crate::s3::error::*;
const X_AMZ_MP_PARTS_COUNT: &str = "x-amz-mp-parts-count";
@@ -42,6 +45,8 @@ pub struct GetObjectOverrides {
fn object_headers(
version: &ObjectVersion,
version_meta: &ObjectVersionMeta,
+ headers: &ObjectVersionHeaders,
+ encryption: EncryptionParams,
) -> http::response::Builder {
debug!("Version meta: {:?}", version_meta);
@@ -49,7 +54,6 @@ fn object_headers(
let date_str = httpdate::fmt_http_date(date);
let mut resp = Response::builder()
- .header(CONTENT_TYPE, version_meta.headers.content_type.to_string())
.header(LAST_MODIFIED, date_str)
.header(ACCEPT_RANGES, "bytes".to_string());
@@ -57,10 +61,27 @@ fn object_headers(
resp = resp.header(ETAG, format!("\"{}\"", version_meta.etag));
}
- for (k, v) in version_meta.headers.other.iter() {
- resp = resp.header(k, v.to_string());
+ // When metadata is retrieved through the REST API, Amazon S3 combines headers that
+ // have the same name (ignoring case) into a comma-delimited list.
+ // See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingMetadata.html
+ let mut headers_by_name = BTreeMap::new();
+ for (name, value) in headers.0.iter() {
+ match headers_by_name.get_mut(name) {
+ None => {
+ headers_by_name.insert(name, vec![value.as_str()]);
+ }
+ Some(headers) => {
+ headers.push(value.as_str());
+ }
+ }
+ }
+
+ for (name, values) in headers_by_name {
+ resp = resp.header(name, values.join(","));
}
+ encryption.add_response_headers(&mut resp);
+
resp
}
@@ -175,21 +196,27 @@ pub async fn handle_head_without_ctx(
return Ok(cached);
}
+ let (encryption, headers) =
+ EncryptionParams::check_decrypt(&garage, req.headers(), &version_meta.encryption)?;
+
if let Some(pn) = part_number {
match version_data {
- ObjectVersionData::Inline(_, bytes) => {
+ ObjectVersionData::Inline(_, _) => {
if pn != 1 {
return Err(Error::InvalidPart);
}
- Ok(object_headers(object_version, version_meta)
- .header(CONTENT_LENGTH, format!("{}", bytes.len()))
- .header(
- CONTENT_RANGE,
- format!("bytes 0-{}/{}", bytes.len() - 1, bytes.len()),
- )
- .header(X_AMZ_MP_PARTS_COUNT, "1")
- .status(StatusCode::PARTIAL_CONTENT)
- .body(empty_body())?)
+ let bytes_len = version_meta.size;
+ Ok(
+ object_headers(object_version, version_meta, &headers, encryption)
+ .header(CONTENT_LENGTH, format!("{}", bytes_len))
+ .header(
+ CONTENT_RANGE,
+ format!("bytes 0-{}/{}", bytes_len - 1, bytes_len),
+ )
+ .header(X_AMZ_MP_PARTS_COUNT, "1")
+ .status(StatusCode::PARTIAL_CONTENT)
+ .body(empty_body())?,
+ )
}
ObjectVersionData::FirstBlock(_, _) => {
let version = garage
@@ -201,28 +228,32 @@ pub async fn handle_head_without_ctx(
let (part_offset, part_end) =
calculate_part_bounds(&version, pn).ok_or(Error::InvalidPart)?;
- Ok(object_headers(object_version, version_meta)
- .header(CONTENT_LENGTH, format!("{}", part_end - part_offset))
- .header(
- CONTENT_RANGE,
- format!(
- "bytes {}-{}/{}",
- part_offset,
- part_end - 1,
- version_meta.size
- ),
- )
- .header(X_AMZ_MP_PARTS_COUNT, format!("{}", version.n_parts()?))
- .status(StatusCode::PARTIAL_CONTENT)
- .body(empty_body())?)
+ Ok(
+ object_headers(object_version, version_meta, &headers, encryption)
+ .header(CONTENT_LENGTH, format!("{}", part_end - part_offset))
+ .header(
+ CONTENT_RANGE,
+ format!(
+ "bytes {}-{}/{}",
+ part_offset,
+ part_end - 1,
+ version_meta.size
+ ),
+ )
+ .header(X_AMZ_MP_PARTS_COUNT, format!("{}", version.n_parts()?))
+ .status(StatusCode::PARTIAL_CONTENT)
+ .body(empty_body())?,
+ )
}
_ => unreachable!(),
}
} else {
- Ok(object_headers(object_version, version_meta)
- .header(CONTENT_LENGTH, format!("{}", version_meta.size))
- .status(StatusCode::OK)
- .body(empty_body())?)
+ Ok(
+ object_headers(object_version, version_meta, &headers, encryption)
+ .header(CONTENT_LENGTH, format!("{}", version_meta.size))
+ .status(StatusCode::OK)
+ .body(empty_body())?,
+ )
}
}
@@ -273,23 +304,41 @@ pub async fn handle_get_without_ctx(
return Ok(cached);
}
+ let (enc, headers) =
+ EncryptionParams::check_decrypt(&garage, req.headers(), &last_v_meta.encryption)?;
+
match (part_number, parse_range_header(req, last_v_meta.size)?) {
(Some(_), Some(_)) => Err(Error::bad_request(
"Cannot specify both partNumber and Range header",
)),
- (Some(pn), None) => handle_get_part(garage, last_v, last_v_data, last_v_meta, pn).await,
+ (Some(pn), None) => {
+ handle_get_part(garage, last_v, last_v_data, last_v_meta, enc, &headers, pn).await
+ }
(None, Some(range)) => {
handle_get_range(
garage,
last_v,
last_v_data,
last_v_meta,
+ enc,
+ &headers,
range.start,
range.start + range.length,
)
.await
}
- (None, None) => handle_get_full(garage, last_v, last_v_data, last_v_meta, overrides).await,
+ (None, None) => {
+ handle_get_full(
+ garage,
+ last_v,
+ last_v_data,
+ last_v_meta,
+ enc,
+ &headers,
+ overrides,
+ )
+ .await
+ }
}
}
@@ -298,17 +347,36 @@ async fn handle_get_full(
version: &ObjectVersion,
version_data: &ObjectVersionData,
version_meta: &ObjectVersionMeta,
+ encryption: EncryptionParams,
+ headers: &ObjectVersionHeaders,
overrides: GetObjectOverrides,
) -> Result<Response<ResBody>, Error> {
- let mut resp_builder = object_headers(version, version_meta)
+ let mut resp_builder = object_headers(version, version_meta, &headers, encryption)
.header(CONTENT_LENGTH, format!("{}", version_meta.size))
.status(StatusCode::OK);
getobject_override_headers(overrides, &mut resp_builder)?;
+ let stream = full_object_byte_stream(garage, version, version_data, encryption);
+
+ Ok(resp_builder.body(response_body_from_stream(stream))?)
+}
+
+pub fn full_object_byte_stream(
+ garage: Arc<Garage>,
+ version: &ObjectVersion,
+ version_data: &ObjectVersionData,
+ encryption: EncryptionParams,
+) -> ByteStream {
match &version_data {
ObjectVersionData::DeleteMarker => unreachable!(),
ObjectVersionData::Inline(_, bytes) => {
- Ok(resp_builder.body(bytes_body(bytes.to_vec().into()))?)
+ let bytes = bytes.to_vec();
+ Box::pin(futures::stream::once(async move {
+ encryption
+ .decrypt_blob(&bytes)
+ .map(|x| Bytes::from(x.to_vec()))
+ .map_err(std_error_from_read_error)
+ }))
}
ObjectVersionData::FirstBlock(_, first_block_hash) => {
let (tx, rx) = mpsc::channel::<ByteStream>(2);
@@ -324,19 +392,18 @@ async fn handle_get_full(
garage2.version_table.get(&version_uuid, &EmptyKey).await
});
- let stream_block_0 = garage
- .block_manager
- .rpc_get_block_streaming(&first_block_hash, Some(order_stream.order(0)))
+ let stream_block_0 = encryption
+ .get_block(&garage, &first_block_hash, Some(order_stream.order(0)))
.await?;
+
tx.send(stream_block_0)
.await
.ok_or_message("channel closed")?;
let version = version_fut.await.unwrap()?.ok_or(Error::NoSuchKey)?;
for (i, (_, vb)) in version.blocks.items().iter().enumerate().skip(1) {
- let stream_block_i = garage
- .block_manager
- .rpc_get_block_streaming(&vb.hash, Some(order_stream.order(i as u64)))
+ let stream_block_i = encryption
+ .get_block(&garage, &vb.hash, Some(order_stream.order(i as u64)))
.await?;
tx.send(stream_block_i)
.await
@@ -354,8 +421,7 @@ async fn handle_get_full(
}
});
- let body = response_body_from_block_stream(rx);
- Ok(resp_builder.body(body)?)
+ Box::pin(tokio_stream::wrappers::ReceiverStream::new(rx).flatten())
}
}
}
@@ -365,13 +431,15 @@ async fn handle_get_range(
version: &ObjectVersion,
version_data: &ObjectVersionData,
version_meta: &ObjectVersionMeta,
+ encryption: EncryptionParams,
+ headers: &ObjectVersionHeaders,
begin: u64,
end: u64,
) -> Result<Response<ResBody>, Error> {
// Here we do not use getobject_override_headers because we don't
// want to add any overridden headers (those should not be added
// when returning PARTIAL_CONTENT)
- let resp_builder = object_headers(version, version_meta)
+ let resp_builder = object_headers(version, version_meta, headers, encryption)
.header(CONTENT_LENGTH, format!("{}", end - begin))
.header(
CONTENT_RANGE,
@@ -382,6 +450,7 @@ async fn handle_get_range(
match &version_data {
ObjectVersionData::DeleteMarker => unreachable!(),
ObjectVersionData::Inline(_meta, bytes) => {
+ let bytes = encryption.decrypt_blob(&bytes)?;
if end as usize <= bytes.len() {
let body = bytes_body(bytes[begin as usize..end as usize].to_vec().into());
Ok(resp_builder.body(body)?)
@@ -398,7 +467,8 @@ async fn handle_get_range(
.await?
.ok_or(Error::NoSuchKey)?;
- let body = body_from_blocks_range(garage, version.blocks.items(), begin, end);
+ let body =
+ body_from_blocks_range(garage, encryption, version.blocks.items(), begin, end);
Ok(resp_builder.body(body)?)
}
}
@@ -409,17 +479,21 @@ async fn handle_get_part(
object_version: &ObjectVersion,
version_data: &ObjectVersionData,
version_meta: &ObjectVersionMeta,
+ encryption: EncryptionParams,
+ headers: &ObjectVersionHeaders,
part_number: u64,
) -> Result<Response<ResBody>, Error> {
// Same as for get_range, no getobject_override_headers
- let resp_builder =
- object_headers(object_version, version_meta).status(StatusCode::PARTIAL_CONTENT);
+ let resp_builder = object_headers(object_version, version_meta, headers, encryption)
+ .status(StatusCode::PARTIAL_CONTENT);
match version_data {
ObjectVersionData::Inline(_, bytes) => {
if part_number != 1 {
return Err(Error::InvalidPart);
}
+ let bytes = encryption.decrypt_blob(&bytes)?;
+ assert_eq!(bytes.len() as u64, version_meta.size);
Ok(resp_builder
.header(CONTENT_LENGTH, format!("{}", bytes.len()))
.header(
@@ -427,7 +501,7 @@ async fn handle_get_part(
format!("bytes {}-{}/{}", 0, bytes.len() - 1, bytes.len()),
)
.header(X_AMZ_MP_PARTS_COUNT, "1")
- .body(bytes_body(bytes.to_vec().into()))?)
+ .body(bytes_body(bytes.into_owned().into()))?)
}
ObjectVersionData::FirstBlock(_, _) => {
let version = garage
@@ -439,7 +513,8 @@ async fn handle_get_part(
let (begin, end) =
calculate_part_bounds(&version, part_number).ok_or(Error::InvalidPart)?;
- let body = body_from_blocks_range(garage, version.blocks.items(), begin, end);
+ let body =
+ body_from_blocks_range(garage, encryption, version.blocks.items(), begin, end);
Ok(resp_builder
.header(CONTENT_LENGTH, format!("{}", end - begin))
@@ -494,6 +569,7 @@ fn calculate_part_bounds(v: &Version, part_number: u64) -> Option<(u64, u64)> {
fn body_from_blocks_range(
garage: Arc<Garage>,
+ encryption: EncryptionParams,
all_blocks: &[(VersionBlockKey, VersionBlock)],
begin: u64,
end: u64,
@@ -523,12 +599,11 @@ fn body_from_blocks_range(
tokio::spawn(async move {
match async {
- let garage = garage.clone();
for (i, (block, block_offset)) in blocks.iter().enumerate() {
- let block_stream = garage
- .block_manager
- .rpc_get_block_streaming(&block.hash, Some(order_stream.order(i as u64)))
- .await?
+ let block_stream = encryption
+ .get_block(&garage, &block.hash, Some(order_stream.order(i as u64)))
+ .await?;
+ let block_stream = block_stream
.scan(*block_offset, move |chunk_offset, chunk| {
let r = match chunk {
Ok(chunk_bytes) => {
@@ -588,19 +663,30 @@ fn body_from_blocks_range(
}
fn response_body_from_block_stream(rx: mpsc::Receiver<ByteStream>) -> ResBody {
- let body_stream = tokio_stream::wrappers::ReceiverStream::new(rx)
- .flatten()
- .map(|x| {
- x.map(hyper::body::Frame::data)
- .map_err(|e| Error::from(garage_util::error::Error::from(e)))
- });
+ let body_stream = tokio_stream::wrappers::ReceiverStream::new(rx).flatten();
+ response_body_from_stream(body_stream)
+}
+
+fn response_body_from_stream<S>(stream: S) -> ResBody
+where
+ S: Stream<Item = Result<Bytes, std::io::Error>> + Send + Sync + 'static,
+{
+ let body_stream = stream.map(|x| {
+ x.map(hyper::body::Frame::data)
+ .map_err(|e| Error::from(garage_util::error::Error::from(e)))
+ });
ResBody::new(http_body_util::StreamBody::new(body_stream))
}
fn error_stream_item<E: std::fmt::Display>(e: E) -> ByteStream {
- let err = std::io::Error::new(
+ Box::pin(stream::once(future::ready(Err(std_error_from_read_error(
+ e,
+ )))))
+}
+
+fn std_error_from_read_error<E: std::fmt::Display>(e: E) -> std::io::Error {
+ std::io::Error::new(
std::io::ErrorKind::Other,
- format!("Error while getting object data: {}", e),
- );
- Box::pin(stream::once(future::ready(Err(err))))
+ format!("Error while reading object data: {}", e),
+ )
}
diff --git a/src/api/s3/list.rs b/src/api/s3/list.rs
index 302c03f4..1678f1fa 100644
--- a/src/api/s3/list.rs
+++ b/src/api/s3/list.rs
@@ -944,9 +944,8 @@ mod tests {
timestamp: TS,
state: ObjectVersionState::Uploading {
multipart: true,
- headers: ObjectVersionHeaders {
- content_type: "text/plain".to_string(),
- other: BTreeMap::<String, String>::new(),
+ encryption: ObjectVersionEncryption::Plaintext {
+ headers: ObjectVersionHeaders(vec![]),
},
},
}
diff --git a/src/api/s3/mod.rs b/src/api/s3/mod.rs
index cbdb94ab..1eb95d40 100644
--- a/src/api/s3/mod.rs
+++ b/src/api/s3/mod.rs
@@ -13,5 +13,6 @@ mod post_object;
mod put;
mod website;
+mod encryption;
mod router;
pub mod xml;
diff --git a/src/api/s3/multipart.rs b/src/api/s3/multipart.rs
index 1d5aeb26..fcc5769f 100644
--- a/src/api/s3/multipart.rs
+++ b/src/api/s3/multipart.rs
@@ -16,6 +16,7 @@ use garage_model::s3::version_table::*;
use crate::helpers::*;
use crate::s3::api_server::{ReqBody, ResBody};
+use crate::s3::encryption::EncryptionParams;
use crate::s3::error::*;
use crate::s3::put::*;
use crate::s3::xml as s3_xml;
@@ -41,13 +42,17 @@ pub async fn handle_create_multipart_upload(
let headers = get_headers(req.headers())?;
+ // Determine whether object should be encrypted, and if so the key
+ let encryption = EncryptionParams::new_from_headers(&garage, req.headers())?;
+ let object_encryption = encryption.encrypt_headers(headers)?;
+
// Create object in object table
let object_version = ObjectVersion {
uuid: upload_id,
timestamp,
state: ObjectVersionState::Uploading {
multipart: true,
- headers,
+ encryption: object_encryption,
},
};
let object = Object::new(*bucket_id, key.to_string(), vec![object_version]);
@@ -68,7 +73,9 @@ pub async fn handle_create_multipart_upload(
};
let xml = s3_xml::to_xml_with_header(&result)?;
- Ok(Response::new(string_body(xml)))
+ let mut resp = Response::builder();
+ encryption.add_response_headers(&mut resp);
+ Ok(resp.body(string_body(xml))?)
}
pub async fn handle_put_part(
@@ -91,12 +98,21 @@ pub async fn handle_put_part(
// Read first chuck, and at the same time try to get object to see if it exists
let key = key.to_string();
- let stream = body_stream(req.into_body());
+ let (req_head, req_body) = req.into_parts();
+ let stream = body_stream(req_body);
let mut chunker = StreamChunker::new(stream, garage.config.block_size);
- let ((_, _, mut mpu), first_block) =
+ let ((_, object_version, mut mpu), first_block) =
futures::try_join!(get_upload(&ctx, &key, &upload_id), chunker.next(),)?;
+ // Check encryption params
+ let object_encryption = match object_version.state {
+ ObjectVersionState::Uploading { encryption, .. } => encryption,
+ _ => unreachable!(),
+ };
+ let (encryption, _) =
+ EncryptionParams::check_decrypt(&garage, &req_head.headers, &object_encryption)?;
+
// Check object is valid and part can be accepted
let first_block = first_block.ok_or_bad_request("Empty body")?;
@@ -136,24 +152,32 @@ pub async fn handle_put_part(
garage.version_table.insert(&version).await?;
// Copy data to version
- let (total_size, data_md5sum, data_sha256sum, _) =
- read_and_put_blocks(&ctx, &version, part_number, first_block, &mut chunker).await?;
+ let (total_size, data_md5sum, data_sha256sum, _) = read_and_put_blocks(
+ &ctx,
+ &version,
+ encryption,
+ part_number,
+ first_block,
+ &mut chunker,
+ )
+ .await?;
// Verify that checksums map
ensure_checksum_matches(
- data_md5sum.as_slice(),
+ &data_md5sum,
data_sha256sum,
content_md5.as_deref(),
content_sha256,
)?;
// Store part etag in version
- let data_md5sum_hex = hex::encode(data_md5sum);
+ let etag = encryption.etag_from_md5(&data_md5sum);
+
mpu.parts.put(
mpu_part_key,
MpuPart {
version: version_uuid,
- etag: Some(data_md5sum_hex.clone()),
+ etag: Some(etag.clone()),
size: Some(total_size),
},
);
@@ -163,11 +187,9 @@ pub async fn handle_put_part(
// We won't have to clean up on drop.
interrupted_cleanup.cancel();
- let response = Response::builder()
- .header("ETag", format!("\"{}\"", data_md5sum_hex))
- .body(empty_body())
- .unwrap();
- Ok(response)
+ let mut resp = Response::builder().header("ETag", format!("\"{}\"", etag));
+ encryption.add_response_headers(&mut resp);
+ Ok(resp.body(empty_body())?)
}
struct InterruptedCleanup(Option<InterruptedCleanupInner>);
@@ -241,8 +263,8 @@ pub async fn handle_complete_multipart_upload(
return Err(Error::bad_request("No data was uploaded"));
}
- let headers = match object_version.state {
- ObjectVersionState::Uploading { headers, .. } => headers,
+ let object_encryption = match object_version.state {
+ ObjectVersionState::Uploading { encryption, .. } => encryption,
_ => unreachable!(),
};
@@ -344,7 +366,7 @@ pub async fn handle_complete_multipart_upload(
// Write final object version
object_version.state = ObjectVersionState::Complete(ObjectVersionData::FirstBlock(
ObjectVersionMeta {
- headers,
+ encryption: object_encryption,
size: total_size,
etag: etag.clone(),
},
diff --git a/src/api/s3/post_object.rs b/src/api/s3/post_object.rs
index 66f8174c..7c4219a7 100644
--- a/src/api/s3/post_object.rs
+++ b/src/api/s3/post_object.rs
@@ -18,6 +18,7 @@ use garage_model::garage::Garage;
use crate::helpers::*;
use crate::s3::api_server::ResBody;
use crate::s3::cors::*;
+use crate::s3::encryption::EncryptionParams;
use crate::s3::error::*;
use crate::s3::put::{get_headers, save_stream};
use crate::s3::xml as s3_xml;
@@ -48,13 +49,17 @@ pub async fn handle_post_object(
let mut multipart = Multipart::with_constraints(stream, boundary, constraints);
let mut params = HeaderMap::new();
- let field = loop {
+ let file_field = loop {
let field = if let Some(field) = multipart.next_field().await? {
field
} else {
return Err(Error::bad_request("Request did not contain a file"));
};
- let name: HeaderName = if let Some(Ok(name)) = field.name().map(TryInto::try_into) {
+ let name: HeaderName = if let Some(Ok(name)) = field
+ .name()
+ .map(str::to_ascii_lowercase)
+ .map(TryInto::try_into)
+ {
name
} else {
continue;
@@ -93,10 +98,14 @@ pub async fn handle_post_object(
.ok_or_bad_request("No policy was provided")?
.to_str()?;
let authorization = Authorization::parse_form(&params)?;
+ let content_md5 = params
+ .get("content-md5")
+ .map(HeaderValue::to_str)
+ .transpose()?;
let key = if key.contains("${filename}") {
// if no filename is provided, don't replace. This matches the behavior of AWS.
- if let Some(filename) = field.file_name() {
+ if let Some(filename) = file_field.file_name() {
key.replace("${filename}", filename)
} else {
key.to_owned()
@@ -143,9 +152,8 @@ pub async fn handle_post_object(
let mut conditions = decoded_policy.into_conditions()?;
for (param_key, value) in params.iter() {
- let mut param_key = param_key.to_string();
- param_key.make_ascii_lowercase();
- match param_key.as_str() {
+ let param_key = param_key.as_str();
+ match param_key {
"policy" | "x-amz-signature" => (), // this is always accepted, as it's required to validate other fields
"content-type" => {
let conds = conditions.params.remove("content-type").ok_or_else(|| {
@@ -190,7 +198,7 @@ pub async fn handle_post_object(
// how aws seems to behave.
continue;
}
- let conds = conditions.params.remove(&param_key).ok_or_else(|| {
+ let conds = conditions.params.remove(param_key).ok_or_else(|| {
Error::bad_request(format!("Key '{}' is not allowed in policy", param_key))
})?;
for cond in conds {
@@ -218,8 +226,9 @@ pub async fn handle_post_object(
let headers = get_headers(&params)?;
- let stream = field.map(|r| r.map_err(Into::into));
+ let encryption = EncryptionParams::new_from_headers(&garage, &params)?;
+ let stream = file_field.map(|r| r.map_err(Into::into));
let ctx = ReqCtx {
garage,
bucket_id,
@@ -228,17 +237,18 @@ pub async fn handle_post_object(
api_key,
};
- let (_, md5) = save_stream(
+ let res = save_stream(
&ctx,
headers,
+ encryption,
StreamLimiter::new(stream, conditions.content_length),
&key,
- None,
+ content_md5.map(str::to_string),
None,
)
.await?;
- let etag = format!("\"{}\"", md5);
+ let etag = format!("\"{}\"", res.etag);
let mut resp = if let Some(mut target) = params
.get("success_action_redirect")
@@ -252,11 +262,12 @@ pub async fn handle_post_object(
.append_pair("key", &key)
.append_pair("etag", &etag);
let target = target.to_string();
- Response::builder()
+ let mut resp = Response::builder()
.status(StatusCode::SEE_OTHER)
.header(header::LOCATION, target.clone())
- .header(header::ETAG, etag)
- .body(string_body(target))?
+ .header(header::ETAG, etag);
+ encryption.add_response_headers(&mut resp);
+ resp.body(string_body(target))?
} else {
let path = head
.uri
@@ -283,9 +294,10 @@ pub async fn handle_post_object(
.get("success_action_status")
.and_then(|h| h.to_str().ok())
.unwrap_or("204");
- let builder = Response::builder()
+ let mut builder = Response::builder()
.header(header::LOCATION, location.clone())
.header(header::ETAG, etag.clone());
+ encryption.add_response_headers(&mut builder);
match action {
"200" => builder.status(StatusCode::OK).body(empty_body())?,
"201" => {
diff --git a/src/api/s3/put.rs b/src/api/s3/put.rs
index 685cca80..941e4122 100644
--- a/src/api/s3/put.rs
+++ b/src/api/s3/put.rs
@@ -1,4 +1,4 @@
-use std::collections::{BTreeMap, HashMap};
+use std::collections::HashMap;
use std::sync::Arc;
use base64::prelude::*;
@@ -36,10 +36,18 @@ use garage_model::s3::version_table::*;
use crate::helpers::*;
use crate::s3::api_server::{ReqBody, ResBody};
+use crate::s3::encryption::EncryptionParams;
use crate::s3::error::*;
const PUT_BLOCKS_MAX_PARALLEL: usize = 3;
+pub struct SaveStreamResult {
+ pub version_uuid: Uuid,
+ pub version_timestamp: u64,
+ /// Etag WITHOUT THE QUOTES (just the hex value)
+ pub etag: String,
+}
+
pub async fn handle_put(
ctx: ReqCtx,
req: Request<ReqBody>,
@@ -50,6 +58,9 @@ pub async fn handle_put(
let headers = get_headers(req.headers())?;
debug!("Object headers: {:?}", headers);
+ // Determine whether object should be encrypted, and if so the key
+ let encryption = EncryptionParams::new_from_headers(&ctx.garage, req.headers())?;
+
let content_md5 = match req.headers().get("content-md5") {
Some(x) => Some(x.to_str()?.to_string()),
None => None,
@@ -57,19 +68,33 @@ pub async fn handle_put(
let stream = body_stream(req.into_body());
- save_stream(&ctx, headers, stream, key, content_md5, content_sha256)
- .await
- .map(|(uuid, md5)| put_response(uuid, md5))
+ let res = save_stream(
+ &ctx,
+ headers,
+ encryption,
+ stream,
+ key,
+ content_md5,
+ content_sha256,
+ )
+ .await?;
+
+ let mut resp = Response::builder()
+ .header("x-amz-version-id", hex::encode(res.version_uuid))
+ .header("ETag", format!("\"{}\"", res.etag));
+ encryption.add_response_headers(&mut resp);
+ Ok(resp.body(empty_body())?)
}
pub(crate) async fn save_stream<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
ctx: &ReqCtx,
headers: ObjectVersionHeaders,
+ encryption: EncryptionParams,
body: S,
key: &String,
content_md5: Option<String>,
content_sha256: Option<FixedBytes32>,
-) -> Result<(Uuid, String), Error> {
+) -> Result<SaveStreamResult, Error> {
let ReqCtx {
garage, bucket_id, ..
} = ctx;
@@ -82,6 +107,8 @@ pub(crate) async fn save_stream<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
let first_block = first_block_opt.unwrap_or_default();
+ let object_encryption = encryption.encrypt_headers(headers)?;
+
// Generate identity of new version
let version_uuid = gen_uuid();
let version_timestamp = next_timestamp(existing_object.as_ref());
@@ -92,37 +119,43 @@ pub(crate) async fn save_stream<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
let mut md5sum = Md5::new();
md5sum.update(&first_block[..]);
let data_md5sum = md5sum.finalize();
- let data_md5sum_hex = hex::encode(data_md5sum);
let data_sha256sum = sha256sum(&first_block[..]);
- let size = first_block.len() as u64;
ensure_checksum_matches(
- data_md5sum.as_slice(),
+ &data_md5sum,
data_sha256sum,
content_md5.as_deref(),
content_sha256,
)?;
+ let size = first_block.len() as u64;
check_quotas(ctx, size, existing_object.as_ref()).await?;
+ let etag = encryption.etag_from_md5(&data_md5sum);
+ let inline_data = encryption.encrypt_blob(&first_block)?.to_vec();
+
let object_version = ObjectVersion {
uuid: version_uuid,
timestamp: version_timestamp,
state: ObjectVersionState::Complete(ObjectVersionData::Inline(
ObjectVersionMeta {
- headers,
+ encryption: object_encryption,
size,
- etag: data_md5sum_hex.clone(),
+ etag: etag.clone(),
},
- first_block.to_vec(),
+ inline_data,
)),
};
let object = Object::new(*bucket_id, key.into(), vec![object_version]);
garage.object_table.insert(&object).await?;
- return Ok((version_uuid, data_md5sum_hex));
+ return Ok(SaveStreamResult {
+ version_uuid,
+ version_timestamp,
+ etag,
+ });
}
// The following consists in many steps that can each fail.
@@ -142,7 +175,7 @@ pub(crate) async fn save_stream<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
uuid: version_uuid,
timestamp: version_timestamp,
state: ObjectVersionState::Uploading {
- headers: headers.clone(),
+ encryption: object_encryption.clone(),
multipart: false,
},
};
@@ -165,10 +198,10 @@ pub(crate) async fn save_stream<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
// Transfer data and verify checksum
let (total_size, data_md5sum, data_sha256sum, first_block_hash) =
- read_and_put_blocks(ctx, &version, 1, first_block, &mut chunker).await?;
+ read_and_put_blocks(ctx, &version, encryption, 1, first_block, &mut chunker).await?;
ensure_checksum_matches(
- data_md5sum.as_slice(),
+ &data_md5sum,
data_sha256sum,
content_md5.as_deref(),
content_sha256,
@@ -177,12 +210,13 @@ pub(crate) async fn save_stream<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
check_quotas(ctx, total_size, existing_object.as_ref()).await?;
// Save final object state, marked as Complete
- let md5sum_hex = hex::encode(data_md5sum);
+ let etag = encryption.etag_from_md5(&data_md5sum);
+
object_version.state = ObjectVersionState::Complete(ObjectVersionData::FirstBlock(
ObjectVersionMeta {
- headers,
+ encryption: object_encryption,
size: total_size,
- etag: md5sum_hex.clone(),
+ etag: etag.clone(),
},
first_block_hash,
));
@@ -193,7 +227,11 @@ pub(crate) async fn save_stream<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
// We won't have to clean up on drop.
interrupted_cleanup.cancel();
- Ok((version_uuid, md5sum_hex))
+ Ok(SaveStreamResult {
+ version_uuid,
+ version_timestamp,
+ etag,
+ })
}
/// Validate MD5 sum against content-md5 header
@@ -248,7 +286,7 @@ pub(crate) async fn check_quotas(
.await?;
let counters = counters
- .map(|x| x.filtered_values(&garage.system.ring.borrow()))
+ .map(|x| x.filtered_values(&garage.system.cluster_layout()))
.unwrap_or_default();
let (prev_cnt_obj, prev_cnt_size) = match prev_object {
@@ -290,6 +328,7 @@ pub(crate) async fn check_quotas(
pub(crate) async fn read_and_put_blocks<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
ctx: &ReqCtx,
version: &Version,
+ encryption: EncryptionParams,
part_number: u64,
first_block: Bytes,
chunker: &mut StreamChunker<S>,
@@ -349,12 +388,31 @@ pub(crate) async fn read_and_put_blocks<S: Stream<Item = Result<Bytes, Error>> +
))
};
- let (block_tx3, mut block_rx3) = mpsc::channel::<Result<(Bytes, Hash), Error>>(1);
- let hash_blocks = async {
+ let (block_tx3, mut block_rx3) = mpsc::channel::<Result<(Bytes, u64, Hash), Error>>(1);
+ let encrypt_hash_blocks = async {
let mut first_block_hash = None;
while let Some(next) = block_rx2.recv().await {
match next {
Ok(block) => {
+ let unencrypted_len = block.len() as u64;
+ let block = if encryption.is_encrypted() {
+ let res =
+ tokio::task::spawn_blocking(move || encryption.encrypt_block(block))
+ .with_context(Context::current_with_span(
+ tracer.start("Encrypt block"),
+ ))
+ .await
+ .unwrap();
+ match res {
+ Ok(b) => b,
+ Err(e) => {
+ block_tx3.send(Err(e)).await?;
+ break;
+ }
+ }
+ } else {
+ block
+ };
let hash = async_blake2sum(block.clone())
.with_context(Context::current_with_span(
tracer.start("Hash block (blake2)"),
@@ -363,7 +421,7 @@ pub(crate) async fn read_and_put_blocks<S: Stream<Item = Result<Bytes, Error>> +
if first_block_hash.is_none() {
first_block_hash = Some(hash);
}
- block_tx3.send(Ok((block, hash))).await?;
+ block_tx3.send(Ok((block, unencrypted_len, hash))).await?;
}
Err(e) => {
block_tx3.send(Err(e)).await?;
@@ -398,7 +456,7 @@ pub(crate) async fn read_and_put_blocks<S: Stream<Item = Result<Bytes, Error>> +
block_rx3.recv().await
}
};
- let (block, hash) = tokio::select! {
+ let (block, unencrypted_len, hash) = tokio::select! {
result = write_futs_next => {
result?;
continue;
@@ -410,17 +468,18 @@ pub(crate) async fn read_and_put_blocks<S: Stream<Item = Result<Bytes, Error>> +
};
// For next block to be written: count its size and spawn future to write it
- let offset = written_bytes;
- written_bytes += block.len() as u64;
write_futs.push_back(put_block_and_meta(
ctx,
version,
part_number,
- offset,
+ written_bytes,
hash,
block,
+ unencrypted_len,
+ encryption.is_encrypted(),
order_stream.order(written_bytes),
));
+ written_bytes += unencrypted_len;
}
while let Some(res) = write_futs.next().await {
res?;
@@ -429,7 +488,7 @@ pub(crate) async fn read_and_put_blocks<S: Stream<Item = Result<Bytes, Error>> +
};
let (_, stream_hash_result, block_hash_result, final_result) =
- futures::join!(read_blocks, hash_stream, hash_blocks, put_blocks);
+ futures::join!(read_blocks, hash_stream, encrypt_hash_blocks, put_blocks);
let total_size = final_result?;
// unwrap here is ok, because if hasher failed, it is because something failed
@@ -449,6 +508,8 @@ async fn put_block_and_meta(
offset: u64,
hash: Hash,
block: Bytes,
+ size: u64,
+ is_encrypted: bool,
order_tag: OrderTag,
) -> Result<(), GarageError> {
let ReqCtx { garage, .. } = ctx;
@@ -459,10 +520,7 @@ async fn put_block_and_meta(
part_number,
offset,
},
- VersionBlock {
- hash,
- size: block.len() as u64,
- },
+ VersionBlock { hash, size },
);
let block_ref = BlockRef {
@@ -474,7 +532,7 @@ async fn put_block_and_meta(
futures::try_join!(
garage
.block_manager
- .rpc_put_block(hash, block, Some(order_tag)),
+ .rpc_put_block(hash, block, is_encrypted, Some(order_tag)),
garage.version_table.insert(&version),
garage.block_ref_table.insert(&block_ref),
)?;
@@ -517,14 +575,6 @@ impl<S: Stream<Item = Result<Bytes, Error>> + Unpin> StreamChunker<S> {
}
}
-pub fn put_response(version_uuid: Uuid, md5sum_hex: String) -> Response<ResBody> {
- Response::builder()
- .header("x-amz-version-id", hex::encode(version_uuid))
- .header("ETag", format!("\"{}\"", md5sum_hex))
- .body(empty_body())
- .unwrap()
-}
-
struct InterruptedCleanup(Option<InterruptedCleanupInner>);
struct InterruptedCleanupInner {
garage: Arc<Garage>,
@@ -559,57 +609,35 @@ impl Drop for InterruptedCleanup {
// ============ helpers ============
-pub(crate) fn get_mime_type(headers: &HeaderMap<HeaderValue>) -> Result<String, Error> {
- Ok(headers
- .get(hyper::header::CONTENT_TYPE)
- .map(|x| x.to_str())
- .unwrap_or(Ok("blob"))?
- .to_string())
-}
-
pub(crate) fn get_headers(headers: &HeaderMap<HeaderValue>) -> Result<ObjectVersionHeaders, Error> {
- let content_type = get_mime_type(headers)?;
- let mut other = BTreeMap::new();
+ let mut ret = Vec::new();
// Preserve standard headers
let standard_header = vec![
+ hyper::header::CONTENT_TYPE,
hyper::header::CACHE_CONTROL,
hyper::header::CONTENT_DISPOSITION,
hyper::header::CONTENT_ENCODING,
hyper::header::CONTENT_LANGUAGE,
hyper::header::EXPIRES,
];
- for h in standard_header.iter() {
- if let Some(v) = headers.get(h) {
- match v.to_str() {
- Ok(v_str) => {
- other.insert(h.to_string(), v_str.to_string());
- }
- Err(e) => {
- warn!("Discarding header {}, error in .to_str(): {}", h, e);
- }
- }
+ for name in standard_header.iter() {
+ if let Some(value) = headers.get(name) {
+ ret.push((name.to_string(), value.to_str()?.to_string()));
}
}
// Preserve x-amz-meta- headers
- for (k, v) in headers.iter() {
- if k.as_str().starts_with("x-amz-meta-") {
- match std::str::from_utf8(v.as_bytes()) {
- Ok(v_str) => {
- other.insert(k.to_string(), v_str.to_string());
- }
- Err(e) => {
- warn!("Discarding header {}, error in .to_str(): {}", k, e);
- }
- }
+ for (name, value) in headers.iter() {
+ if name.as_str().starts_with("x-amz-meta-") {
+ ret.push((
+ name.to_string(),
+ std::str::from_utf8(value.as_bytes())?.to_string(),
+ ));
}
}
- Ok(ObjectVersionHeaders {
- content_type,
- other,
- })
+ Ok(ObjectVersionHeaders(ret))
}
pub(crate) fn next_timestamp(existing_object: Option<&Object>) -> u64 {