diff options
Diffstat (limited to 'src/api/s3')
-rw-r--r-- | src/api/s3/api_server.rs | 33 | ||||
-rw-r--r-- | src/api/s3/copy.rs | 121 | ||||
-rw-r--r-- | src/api/s3/cors.rs | 22 | ||||
-rw-r--r-- | src/api/s3/delete.rs | 42 | ||||
-rw-r--r-- | src/api/s3/get.rs | 6 | ||||
-rw-r--r-- | src/api/s3/lifecycle.rs | 401 | ||||
-rw-r--r-- | src/api/s3/list.rs | 414 | ||||
-rw-r--r-- | src/api/s3/mod.rs | 2 | ||||
-rw-r--r-- | src/api/s3/multipart.rs | 468 | ||||
-rw-r--r-- | src/api/s3/put.rs | 474 | ||||
-rw-r--r-- | src/api/s3/website.rs | 22 |
11 files changed, 1252 insertions, 753 deletions
diff --git a/src/api/s3/api_server.rs b/src/api/s3/api_server.rs index 27837297..887839dd 100644 --- a/src/api/s3/api_server.rs +++ b/src/api/s3/api_server.rs @@ -1,4 +1,3 @@ -use std::net::SocketAddr; use std::sync::Arc; use async_trait::async_trait; @@ -10,6 +9,7 @@ use hyper::{Body, Request, Response}; use opentelemetry::{trace::SpanRef, KeyValue}; use garage_util::error::Error as GarageError; +use garage_util::socket_address::UnixOrTCPSocketAddress; use garage_model::garage::Garage; use garage_model::key_table::Key; @@ -26,7 +26,9 @@ use crate::s3::copy::*; use crate::s3::cors::*; use crate::s3::delete::*; use crate::s3::get::*; +use crate::s3::lifecycle::*; use crate::s3::list::*; +use crate::s3::multipart::*; use crate::s3::post_object::handle_post_object; use crate::s3::put::*; use crate::s3::router::Endpoint; @@ -44,12 +46,12 @@ pub(crate) struct S3ApiEndpoint { impl S3ApiServer { pub async fn run( garage: Arc<Garage>, - addr: SocketAddr, + addr: UnixOrTCPSocketAddress, s3_region: String, shutdown_signal: impl Future<Output = ()>, ) -> Result<(), GarageError> { ApiServer::new(s3_region, S3ApiServer { garage }) - .run_server(addr, shutdown_signal) + .run_server(addr, None, shutdown_signal) .await } @@ -256,7 +258,7 @@ impl ApiHandler for S3ApiServer { bucket_name, bucket_id, delimiter: delimiter.map(|d| d.to_string()), - page_size: max_keys.map(|p| p.clamp(1, 1000)).unwrap_or(1000), + page_size: max_keys.unwrap_or(1000).clamp(1, 1000), prefix: prefix.unwrap_or_default(), urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false), }, @@ -286,7 +288,7 @@ impl ApiHandler for S3ApiServer { bucket_name, bucket_id, delimiter: delimiter.map(|d| d.to_string()), - page_size: max_keys.map(|p| p.clamp(1, 1000)).unwrap_or(1000), + page_size: max_keys.unwrap_or(1000).clamp(1, 1000), urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false), prefix: prefix.unwrap_or_default(), }, @@ -319,7 +321,7 @@ impl ApiHandler for S3ApiServer { bucket_name, bucket_id, delimiter: delimiter.map(|d| d.to_string()), - page_size: max_uploads.map(|p| p.clamp(1, 1000)).unwrap_or(1000), + page_size: max_uploads.unwrap_or(1000).clamp(1, 1000), prefix: prefix.unwrap_or_default(), urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false), }, @@ -342,8 +344,8 @@ impl ApiHandler for S3ApiServer { bucket_id, key, upload_id, - part_number_marker: part_number_marker.map(|p| p.clamp(1, 10000)), - max_parts: max_parts.map(|p| p.clamp(1, 1000)).unwrap_or(1000), + part_number_marker: part_number_marker.map(|p| p.min(10000)), + max_parts: max_parts.unwrap_or(1000).clamp(1, 1000), }, ) .await @@ -353,14 +355,21 @@ impl ApiHandler for S3ApiServer { } Endpoint::GetBucketWebsite {} => handle_get_website(&bucket).await, Endpoint::PutBucketWebsite {} => { - handle_put_website(garage, bucket_id, req, content_sha256).await + handle_put_website(garage, bucket.clone(), req, content_sha256).await } - Endpoint::DeleteBucketWebsite {} => handle_delete_website(garage, bucket_id).await, + Endpoint::DeleteBucketWebsite {} => handle_delete_website(garage, bucket.clone()).await, Endpoint::GetBucketCors {} => handle_get_cors(&bucket).await, Endpoint::PutBucketCors {} => { - handle_put_cors(garage, bucket_id, req, content_sha256).await + handle_put_cors(garage, bucket.clone(), req, content_sha256).await + } + Endpoint::DeleteBucketCors {} => handle_delete_cors(garage, bucket.clone()).await, + Endpoint::GetBucketLifecycleConfiguration {} => handle_get_lifecycle(&bucket).await, + Endpoint::PutBucketLifecycleConfiguration {} => { + handle_put_lifecycle(garage, bucket.clone(), req, content_sha256).await + } + Endpoint::DeleteBucketLifecycle {} => { + handle_delete_lifecycle(garage, bucket.clone()).await } - Endpoint::DeleteBucketCors {} => handle_delete_cors(garage, bucket_id).await, endpoint => Err(Error::NotImplemented(endpoint.name().to_owned())), }; diff --git a/src/api/s3/copy.rs b/src/api/s3/copy.rs index 7eb6459d..68b4f0c9 100644 --- a/src/api/s3/copy.rs +++ b/src/api/s3/copy.rs @@ -2,7 +2,7 @@ use std::pin::Pin; use std::sync::Arc; use std::time::{Duration, SystemTime, UNIX_EPOCH}; -use futures::{stream, stream::Stream, StreamExt, TryFutureExt}; +use futures::{stream, stream::Stream, StreamExt}; use md5::{Digest as Md5Digest, Md5}; use bytes::Bytes; @@ -18,12 +18,14 @@ use garage_util::time::*; use garage_model::garage::Garage; use garage_model::key_table::Key; use garage_model::s3::block_ref_table::*; +use garage_model::s3::mpu_table::*; use garage_model::s3::object_table::*; use garage_model::s3::version_table::*; use crate::helpers::parse_bucket_key; use crate::s3::error::*; -use crate::s3::put::{decode_upload_id, get_headers}; +use crate::s3::multipart; +use crate::s3::put::get_headers; use crate::s3::xml::{self as s3_xml, xmlns_tag}; pub async fn handle_copy( @@ -92,7 +94,10 @@ pub async fn handle_copy( let tmp_dest_object_version = ObjectVersion { uuid: new_uuid, timestamp: new_timestamp, - state: ObjectVersionState::Uploading(new_meta.headers.clone()), + state: ObjectVersionState::Uploading { + headers: new_meta.headers.clone(), + multipart: false, + }, }; let tmp_dest_object = Object::new( dest_bucket_id, @@ -105,8 +110,14 @@ pub async fn handle_copy( // this means that the BlockRef entries linked to this version cannot be // marked as deleted (they are marked as deleted only if the Version // doesn't exist or is marked as deleted). - let mut dest_version = - Version::new(new_uuid, dest_bucket_id, dest_key.to_string(), false); + let mut dest_version = Version::new( + new_uuid, + VersionBacklink::Object { + bucket_id: dest_bucket_id, + key: dest_key.to_string(), + }, + false, + ); garage.version_table.insert(&dest_version).await?; // Fill in block list for version and insert block refs @@ -179,17 +190,13 @@ pub async fn handle_upload_part_copy( ) -> Result<Response<Body>, Error> { let copy_precondition = CopyPreconditionHeaders::parse(req)?; - let dest_version_uuid = decode_upload_id(upload_id)?; + let dest_upload_id = multipart::decode_upload_id(upload_id)?; let dest_key = dest_key.to_string(); - let (source_object, dest_object) = futures::try_join!( + let (source_object, (_, _, mut dest_mpu)) = futures::try_join!( get_copy_source(&garage, api_key, req), - garage - .object_table - .get(&dest_bucket_id, &dest_key) - .map_err(Error::from), + multipart::get_upload(&garage, &dest_bucket_id, &dest_key, &dest_upload_id) )?; - let dest_object = dest_object.ok_or(Error::NoSuchKey)?; let (source_object_version, source_version_data, source_version_meta) = extract_source_info(&source_object)?; @@ -217,15 +224,6 @@ pub async fn handle_upload_part_copy( }, }; - // Check destination version is indeed in uploading state - if !dest_object - .versions() - .iter() - .any(|v| v.uuid == dest_version_uuid && v.is_uploading()) - { - return Err(Error::NoSuchUpload); - } - // Check source version is not inlined match source_version_data { ObjectVersionData::DeleteMarker => unreachable!(), @@ -242,23 +240,11 @@ pub async fn handle_upload_part_copy( // Fetch source versin with its block list, // and destination version to check part hasn't yet been uploaded - let (source_version, dest_version) = futures::try_join!( - garage - .version_table - .get(&source_object_version.uuid, &EmptyKey), - garage.version_table.get(&dest_version_uuid, &EmptyKey), - )?; - let source_version = source_version.ok_or(Error::NoSuchKey)?; - - // Check this part number hasn't yet been uploaded - if let Some(dv) = dest_version { - if dv.has_part_number(part_number) { - return Err(Error::bad_request(format!( - "Part number {} has already been uploaded", - part_number - ))); - } - } + let source_version = garage + .version_table + .get(&source_object_version.uuid, &EmptyKey) + .await? + .ok_or(Error::NoSuchKey)?; // We want to reuse blocks from the source version as much as possible. // However, we still need to get the data from these blocks @@ -299,6 +285,33 @@ pub async fn handle_upload_part_copy( current_offset = block_end; } + // Calculate the identity of destination part: timestamp, version id + let dest_version_id = gen_uuid(); + let dest_mpu_part_key = MpuPartKey { + part_number, + timestamp: dest_mpu.next_timestamp(part_number), + }; + + // Create the uploaded part + dest_mpu.parts.clear(); + dest_mpu.parts.put( + dest_mpu_part_key, + MpuPart { + version: dest_version_id, + etag: None, + size: None, + }, + ); + garage.mpu_table.insert(&dest_mpu).await?; + + let mut dest_version = Version::new( + dest_version_id, + VersionBacklink::MultipartUpload { + upload_id: dest_upload_id, + }, + false, + ); + // Now, actually copy the blocks let mut md5hasher = Md5::new(); @@ -348,8 +361,8 @@ pub async fn handle_upload_part_copy( let must_upload = existing_block_hash.is_none(); let final_hash = existing_block_hash.unwrap_or_else(|| blake2sum(&data[..])); - let mut version = Version::new(dest_version_uuid, dest_bucket_id, dest_key.clone(), false); - version.blocks.put( + dest_version.blocks.clear(); + dest_version.blocks.put( VersionBlockKey { part_number, offset: current_offset, @@ -363,7 +376,7 @@ pub async fn handle_upload_part_copy( let block_ref = BlockRef { block: final_hash, - version: dest_version_uuid, + version: dest_version_id, deleted: false.into(), }; @@ -378,23 +391,33 @@ pub async fn handle_upload_part_copy( Ok(()) } }, - // Thing 2: we need to insert the block in the version - garage.version_table.insert(&version), - // Thing 3: we need to add a block reference - garage.block_ref_table.insert(&block_ref), + async { + // Thing 2: we need to insert the block in the version + garage.version_table.insert(&dest_version).await?; + // Thing 3: we need to add a block reference + garage.block_ref_table.insert(&block_ref).await + }, // Thing 4: we need to prefetch the next block defragmenter.next(), )?; - next_block = res.3; + next_block = res.2; } + assert_eq!(current_offset, source_range.length); + let data_md5sum = md5hasher.finalize(); let etag = hex::encode(data_md5sum); // Put the part's ETag in the Versiontable - let mut version = Version::new(dest_version_uuid, dest_bucket_id, dest_key.clone(), false); - version.parts_etags.put(part_number, etag.clone()); - garage.version_table.insert(&version).await?; + dest_mpu.parts.put( + dest_mpu_part_key, + MpuPart { + version: dest_version_id, + etag: Some(etag.clone()), + size: Some(current_offset), + }, + ); + garage.mpu_table.insert(&dest_mpu).await?; // LGTM let resp_xml = s3_xml::to_xml_with_header(&CopyPartResult { diff --git a/src/api/s3/cors.rs b/src/api/s3/cors.rs index c7273464..49097ad1 100644 --- a/src/api/s3/cors.rs +++ b/src/api/s3/cors.rs @@ -44,14 +44,11 @@ pub async fn handle_get_cors(bucket: &Bucket) -> Result<Response<Body>, Error> { pub async fn handle_delete_cors( garage: Arc<Garage>, - bucket_id: Uuid, + mut bucket: Bucket, ) -> Result<Response<Body>, Error> { - let mut bucket = garage - .bucket_helper() - .get_existing_bucket(bucket_id) - .await?; - - let param = bucket.params_mut().unwrap(); + let param = bucket + .params_mut() + .ok_or_internal_error("Bucket should not be deleted at this point")?; param.cors_config.update(None); garage.bucket_table.insert(&bucket).await?; @@ -63,7 +60,7 @@ pub async fn handle_delete_cors( pub async fn handle_put_cors( garage: Arc<Garage>, - bucket_id: Uuid, + mut bucket: Bucket, req: Request<Body>, content_sha256: Option<Hash>, ) -> Result<Response<Body>, Error> { @@ -73,12 +70,9 @@ pub async fn handle_put_cors( verify_signed_content(content_sha256, &body[..])?; } - let mut bucket = garage - .bucket_helper() - .get_existing_bucket(bucket_id) - .await?; - - let param = bucket.params_mut().unwrap(); + let param = bucket + .params_mut() + .ok_or_internal_error("Bucket should not be deleted at this point")?; let conf: CorsConfiguration = from_reader(&body as &[u8])?; conf.validate()?; diff --git a/src/api/s3/delete.rs b/src/api/s3/delete.rs index b337155f..1c491eac 100644 --- a/src/api/s3/delete.rs +++ b/src/api/s3/delete.rs @@ -3,12 +3,12 @@ use std::sync::Arc; use hyper::{Body, Request, Response, StatusCode}; use garage_util::data::*; -use garage_util::time::*; use garage_model::garage::Garage; use garage_model::s3::object_table::*; use crate::s3::error::*; +use crate::s3::put::next_timestamp; use crate::s3::xml as s3_xml; use crate::signature::verify_signed_content; @@ -23,40 +23,36 @@ async fn handle_delete_internal( .await? .ok_or(Error::NoSuchKey)?; // No need to delete - let interesting_versions = object.versions().iter().filter(|v| { - !matches!( - v.state, - ObjectVersionState::Aborted - | ObjectVersionState::Complete(ObjectVersionData::DeleteMarker) - ) - }); - - let mut version_to_delete = None; - let mut timestamp = now_msec(); - for v in interesting_versions { - if v.timestamp + 1 > timestamp || version_to_delete.is_none() { - version_to_delete = Some(v.uuid); + let del_timestamp = next_timestamp(Some(&object)); + let del_uuid = gen_uuid(); + + let deleted_version = object + .versions() + .iter() + .rev() + .find(|v| !matches!(&v.state, ObjectVersionState::Aborted)) + .or_else(|| object.versions().iter().rev().next()); + let deleted_version = match deleted_version { + Some(dv) => dv.uuid, + None => { + warn!("Object has no versions: {:?}", object); + Uuid::from([0u8; 32]) } - timestamp = std::cmp::max(timestamp, v.timestamp + 1); - } - - let deleted_version = version_to_delete.ok_or(Error::NoSuchKey)?; - - let version_uuid = gen_uuid(); + }; let object = Object::new( bucket_id, key.into(), vec![ObjectVersion { - uuid: version_uuid, - timestamp, + uuid: del_uuid, + timestamp: del_timestamp, state: ObjectVersionState::Complete(ObjectVersionData::DeleteMarker), }], ); garage.object_table.insert(&object).await?; - Ok((deleted_version, version_uuid)) + Ok((deleted_version, del_uuid)) } pub async fn handle_delete( diff --git a/src/api/s3/get.rs b/src/api/s3/get.rs index cde7b461..5e682726 100644 --- a/src/api/s3/get.rs +++ b/src/api/s3/get.rs @@ -149,7 +149,6 @@ pub async fn handle_head( let (part_offset, part_end) = calculate_part_bounds(&version, pn).ok_or(Error::InvalidPart)?; - let n_parts = version.parts_etags.items().len(); Ok(object_headers(object_version, version_meta) .header(CONTENT_LENGTH, format!("{}", part_end - part_offset)) @@ -162,7 +161,7 @@ pub async fn handle_head( version_meta.size ), ) - .header(X_AMZ_MP_PARTS_COUNT, format!("{}", n_parts)) + .header(X_AMZ_MP_PARTS_COUNT, format!("{}", version.n_parts()?)) .status(StatusCode::PARTIAL_CONTENT) .body(Body::empty())?) } @@ -376,7 +375,6 @@ async fn handle_get_part( let (begin, end) = calculate_part_bounds(&version, part_number).ok_or(Error::InvalidPart)?; - let n_parts = version.parts_etags.items().len(); let body = body_from_blocks_range(garage, version.blocks.items(), begin, end); @@ -386,7 +384,7 @@ async fn handle_get_part( CONTENT_RANGE, format!("bytes {}-{}/{}", begin, end - 1, version_meta.size), ) - .header(X_AMZ_MP_PARTS_COUNT, format!("{}", n_parts)) + .header(X_AMZ_MP_PARTS_COUNT, format!("{}", version.n_parts()?)) .body(body)?) } _ => unreachable!(), diff --git a/src/api/s3/lifecycle.rs b/src/api/s3/lifecycle.rs new file mode 100644 index 00000000..1e7d6755 --- /dev/null +++ b/src/api/s3/lifecycle.rs @@ -0,0 +1,401 @@ +use quick_xml::de::from_reader; +use std::sync::Arc; + +use hyper::{Body, Request, Response, StatusCode}; + +use serde::{Deserialize, Serialize}; + +use crate::s3::error::*; +use crate::s3::xml::{to_xml_with_header, xmlns_tag, IntValue, Value}; +use crate::signature::verify_signed_content; + +use garage_model::bucket_table::{ + parse_lifecycle_date, Bucket, LifecycleExpiration as GarageLifecycleExpiration, + LifecycleFilter as GarageLifecycleFilter, LifecycleRule as GarageLifecycleRule, +}; +use garage_model::garage::Garage; +use garage_util::data::*; + +pub async fn handle_get_lifecycle(bucket: &Bucket) -> Result<Response<Body>, Error> { + let param = bucket + .params() + .ok_or_internal_error("Bucket should not be deleted at this point")?; + + if let Some(lifecycle) = param.lifecycle_config.get() { + let wc = LifecycleConfiguration::from_garage_lifecycle_config(lifecycle); + let xml = to_xml_with_header(&wc)?; + Ok(Response::builder() + .status(StatusCode::OK) + .header(http::header::CONTENT_TYPE, "application/xml") + .body(Body::from(xml))?) + } else { + Ok(Response::builder() + .status(StatusCode::NO_CONTENT) + .body(Body::empty())?) + } +} + +pub async fn handle_delete_lifecycle( + garage: Arc<Garage>, + mut bucket: Bucket, +) -> Result<Response<Body>, Error> { + let param = bucket + .params_mut() + .ok_or_internal_error("Bucket should not be deleted at this point")?; + + param.lifecycle_config.update(None); + garage.bucket_table.insert(&bucket).await?; + + Ok(Response::builder() + .status(StatusCode::NO_CONTENT) + .body(Body::empty())?) +} + +pub async fn handle_put_lifecycle( + garage: Arc<Garage>, + mut bucket: Bucket, + req: Request<Body>, + content_sha256: Option<Hash>, +) -> Result<Response<Body>, Error> { + let body = hyper::body::to_bytes(req.into_body()).await?; + + if let Some(content_sha256) = content_sha256 { + verify_signed_content(content_sha256, &body[..])?; + } + + let param = bucket + .params_mut() + .ok_or_internal_error("Bucket should not be deleted at this point")?; + + let conf: LifecycleConfiguration = from_reader(&body as &[u8])?; + let config = conf + .validate_into_garage_lifecycle_config() + .ok_or_bad_request("Invalid lifecycle configuration")?; + + param.lifecycle_config.update(Some(config)); + garage.bucket_table.insert(&bucket).await?; + + Ok(Response::builder() + .status(StatusCode::OK) + .body(Body::empty())?) +} + +// ---- SERIALIZATION AND DESERIALIZATION TO/FROM S3 XML ---- + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] +pub struct LifecycleConfiguration { + #[serde(serialize_with = "xmlns_tag", skip_deserializing)] + pub xmlns: (), + #[serde(rename = "Rule")] + pub lifecycle_rules: Vec<LifecycleRule>, +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] +pub struct LifecycleRule { + #[serde(rename = "ID")] + pub id: Option<Value>, + #[serde(rename = "Status")] + pub status: Value, + #[serde(rename = "Filter", default)] + pub filter: Option<Filter>, + #[serde(rename = "Expiration", default)] + pub expiration: Option<Expiration>, + #[serde(rename = "AbortIncompleteMultipartUpload", default)] + pub abort_incomplete_mpu: Option<AbortIncompleteMpu>, +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Default)] +pub struct Filter { + #[serde(rename = "And")] + pub and: Option<Box<Filter>>, + #[serde(rename = "Prefix")] + pub prefix: Option<Value>, + #[serde(rename = "ObjectSizeGreaterThan")] + pub size_gt: Option<IntValue>, + #[serde(rename = "ObjectSizeLessThan")] + pub size_lt: Option<IntValue>, +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] +pub struct Expiration { + #[serde(rename = "Days")] + pub days: Option<IntValue>, + #[serde(rename = "Date")] + pub at_date: Option<Value>, +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] +pub struct AbortIncompleteMpu { + #[serde(rename = "DaysAfterInitiation")] + pub days: IntValue, +} + +impl LifecycleConfiguration { + pub fn validate_into_garage_lifecycle_config( + self, + ) -> Result<Vec<GarageLifecycleRule>, &'static str> { + let mut ret = vec![]; + for rule in self.lifecycle_rules { + ret.push(rule.validate_into_garage_lifecycle_rule()?); + } + Ok(ret) + } + + pub fn from_garage_lifecycle_config(config: &[GarageLifecycleRule]) -> Self { + Self { + xmlns: (), + lifecycle_rules: config + .iter() + .map(LifecycleRule::from_garage_lifecycle_rule) + .collect(), + } + } +} + +impl LifecycleRule { + pub fn validate_into_garage_lifecycle_rule(self) -> Result<GarageLifecycleRule, &'static str> { + let enabled = match self.status.0.as_str() { + "Enabled" => true, + "Disabled" => false, + _ => return Err("invalid value for <Status>"), + }; + + let filter = self + .filter + .map(Filter::validate_into_garage_lifecycle_filter) + .transpose()? + .unwrap_or_default(); + + let abort_incomplete_mpu_days = self.abort_incomplete_mpu.map(|x| x.days.0 as usize); + + let expiration = self + .expiration + .map(Expiration::validate_into_garage_lifecycle_expiration) + .transpose()?; + + Ok(GarageLifecycleRule { + id: self.id.map(|x| x.0), + enabled, + filter, + abort_incomplete_mpu_days, + expiration, + }) + } + + pub fn from_garage_lifecycle_rule(rule: &GarageLifecycleRule) -> Self { + Self { + id: rule.id.as_deref().map(Value::from), + status: if rule.enabled { + Value::from("Enabled") + } else { + Value::from("Disabled") + }, + filter: Filter::from_garage_lifecycle_filter(&rule.filter), + abort_incomplete_mpu: rule + .abort_incomplete_mpu_days + .map(|days| AbortIncompleteMpu { + days: IntValue(days as i64), + }), + expiration: rule + .expiration + .as_ref() + .map(Expiration::from_garage_lifecycle_expiration), + } + } +} + +impl Filter { + pub fn count(&self) -> i32 { + fn count<T>(x: &Option<T>) -> i32 { + x.as_ref().map(|_| 1).unwrap_or(0) + } + count(&self.prefix) + count(&self.size_gt) + count(&self.size_lt) + } + + pub fn validate_into_garage_lifecycle_filter( + self, + ) -> Result<GarageLifecycleFilter, &'static str> { + if self.count() > 0 && self.and.is_some() { + Err("Filter tag cannot contain both <And> and another condition") + } else if let Some(and) = self.and { + if and.and.is_some() { + return Err("Nested <And> tags"); + } + Ok(and.internal_into_garage_lifecycle_filter()) + } else if self.count() > 1 { + Err("Multiple Filter conditions must be wrapped in an <And> tag") + } else { + Ok(self.internal_into_garage_lifecycle_filter()) + } + } + + fn internal_into_garage_lifecycle_filter(self) -> GarageLifecycleFilter { + GarageLifecycleFilter { + prefix: self.prefix.map(|x| x.0), + size_gt: self.size_gt.map(|x| x.0 as u64), + size_lt: self.size_lt.map(|x| x.0 as u64), + } + } + + pub fn from_garage_lifecycle_filter(rule: &GarageLifecycleFilter) -> Option<Self> { + let filter = Filter { + and: None, + prefix: rule.prefix.as_deref().map(Value::from), + size_gt: rule.size_gt.map(|x| IntValue(x as i64)), + size_lt: rule.size_lt.map(|x| IntValue(x as i64)), + }; + match filter.count() { + 0 => None, + 1 => Some(filter), + _ => Some(Filter { + and: Some(Box::new(filter)), + ..Default::default() + }), + } + } +} + +impl Expiration { + pub fn validate_into_garage_lifecycle_expiration( + self, + ) -> Result<GarageLifecycleExpiration, &'static str> { + match (self.days, self.at_date) { + (Some(_), Some(_)) => Err("cannot have both <Days> and <Date> in <Expiration>"), + (None, None) => Err("<Expiration> must contain either <Days> or <Date>"), + (Some(days), None) => Ok(GarageLifecycleExpiration::AfterDays(days.0 as usize)), + (None, Some(date)) => { + parse_lifecycle_date(&date.0)?; + Ok(GarageLifecycleExpiration::AtDate(date.0)) + } + } + } + + pub fn from_garage_lifecycle_expiration(exp: &GarageLifecycleExpiration) -> Self { + match exp { + GarageLifecycleExpiration::AfterDays(days) => Expiration { + days: Some(IntValue(*days as i64)), + at_date: None, + }, + GarageLifecycleExpiration::AtDate(date) => Expiration { + days: None, + at_date: Some(Value(date.to_string())), + }, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use quick_xml::de::from_str; + + #[test] + fn test_deserialize_lifecycle_config() -> Result<(), Error> { + let message = r#"<?xml version="1.0" encoding="UTF-8"?> +<LifecycleConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/"> + <Rule> + <ID>id1</ID> + <Status>Enabled</Status> + <Filter> + <Prefix>documents/</Prefix> + </Filter> + <AbortIncompleteMultipartUpload> + <DaysAfterInitiation>7</DaysAfterInitiation> + </AbortIncompleteMultipartUpload> + </Rule> + <Rule> + <ID>id2</ID> + <Status>Enabled</Status> + <Filter> + <And> + <Prefix>logs/</Prefix> + <ObjectSizeGreaterThan>1000000</ObjectSizeGreaterThan> + </And> + </Filter> + <Expiration> + <Days>365</Days> + </Expiration> + </Rule> +</LifecycleConfiguration>"#; + let conf: LifecycleConfiguration = from_str(message).unwrap(); + let ref_value = LifecycleConfiguration { + xmlns: (), + lifecycle_rules: vec![ + LifecycleRule { + id: Some("id1".into()), + status: "Enabled".into(), + filter: Some(Filter { + prefix: Some("documents/".into()), + ..Default::default() + }), + expiration: None, + abort_incomplete_mpu: Some(AbortIncompleteMpu { days: IntValue(7) }), + }, + LifecycleRule { + id: Some("id2".into()), + status: "Enabled".into(), + filter: Some(Filter { + and: Some(Box::new(Filter { + prefix: Some("logs/".into()), + size_gt: Some(IntValue(1000000)), + ..Default::default() + })), + ..Default::default() + }), + expiration: Some(Expiration { + days: Some(IntValue(365)), + at_date: None, + }), + abort_incomplete_mpu: None, + }, + ], + }; + assert_eq! { + ref_value, + conf + }; + + let message2 = to_xml_with_header(&ref_value)?; + + let cleanup = |c: &str| c.replace(char::is_whitespace, ""); + assert_eq!(cleanup(message), cleanup(&message2)); + + // Check validation + let validated = ref_value + .validate_into_garage_lifecycle_config() + .ok_or_bad_request("invalid xml config")?; + + let ref_config = vec![ + GarageLifecycleRule { + id: Some("id1".into()), + enabled: true, + filter: GarageLifecycleFilter { + prefix: Some("documents/".into()), + ..Default::default() + }, + expiration: None, + abort_incomplete_mpu_days: Some(7), + }, + GarageLifecycleRule { + id: Some("id2".into()), + enabled: true, + filter: GarageLifecycleFilter { + prefix: Some("logs/".into()), + size_gt: Some(1000000), + ..Default::default() + }, + expiration: Some(GarageLifecycleExpiration::AfterDays(365)), + abort_incomplete_mpu_days: None, + }, + ]; + assert_eq!(validated, ref_config); + + let message3 = to_xml_with_header(&LifecycleConfiguration::from_garage_lifecycle_config( + &validated, + ))?; + assert_eq!(cleanup(message), cleanup(&message3)); + + Ok(()) + } +} diff --git a/src/api/s3/list.rs b/src/api/s3/list.rs index 5cb0d65a..1b9e8cd5 100644 --- a/src/api/s3/list.rs +++ b/src/api/s3/list.rs @@ -1,4 +1,3 @@ -use std::cmp::Ordering; use std::collections::{BTreeMap, BTreeSet}; use std::iter::{Iterator, Peekable}; use std::sync::Arc; @@ -11,15 +10,15 @@ use garage_util::error::Error as GarageError; use garage_util::time::*; use garage_model::garage::Garage; +use garage_model::s3::mpu_table::*; use garage_model::s3::object_table::*; -use garage_model::s3::version_table::Version; -use garage_table::{EmptyKey, EnumerationOrder}; +use garage_table::EnumerationOrder; use crate::encoding::*; use crate::helpers::key_after_prefix; use crate::s3::error::*; -use crate::s3::put as s3_put; +use crate::s3::multipart as s3_multipart; use crate::s3::xml as s3_xml; const DUMMY_NAME: &str = "Dummy Key"; @@ -176,7 +175,9 @@ pub async fn handle_list_multipart_upload( t.get_range( &bucket, key, - Some(ObjectFilter::IsUploading), + Some(ObjectFilter::IsUploading { + check_multipart: Some(true), + }), count, EnumerationOrder::Forward, ) @@ -272,24 +273,26 @@ pub async fn handle_list_parts( ) -> Result<Response<Body>, Error> { debug!("ListParts {:?}", query); - let upload_id = s3_put::decode_upload_id(&query.upload_id)?; + let upload_id = s3_multipart::decode_upload_id(&query.upload_id)?; - let (object, version) = futures::try_join!( - garage.object_table.get(&query.bucket_id, &query.key), - garage.version_table.get(&upload_id, &EmptyKey), - )?; + let (_, _, mpu) = + s3_multipart::get_upload(&garage, &query.bucket_id, &query.key, &upload_id).await?; - let (info, next) = fetch_part_info(query, object, version, upload_id)?; + let (info, next) = fetch_part_info(query, &mpu)?; let result = s3_xml::ListPartsResult { xmlns: (), + + // Query parameters bucket: s3_xml::Value(query.bucket_name.to_string()), key: s3_xml::Value(query.key.to_string()), upload_id: s3_xml::Value(query.upload_id.to_string()), part_number_marker: query.part_number_marker.map(|e| s3_xml::IntValue(e as i64)), - next_part_number_marker: next.map(|e| s3_xml::IntValue(e as i64)), max_parts: s3_xml::IntValue(query.max_parts as i64), - is_truncated: s3_xml::Value(next.map(|_| "true").unwrap_or("false").to_string()), + + // Result values + next_part_number_marker: next.map(|e| s3_xml::IntValue(e as i64)), + is_truncated: s3_xml::Value(format!("{}", next.is_some())), parts: info .iter() .map(|part| s3_xml::PartItem { @@ -299,6 +302,8 @@ pub async fn handle_list_parts( size: s3_xml::IntValue(part.size as i64), }) .collect(), + + // Dummy result values (unsupported features) initiator: s3_xml::Initiator { display_name: s3_xml::Value(DUMMY_NAME.to_string()), id: s3_xml::Value(DUMMY_KEY.to_string()), @@ -335,8 +340,8 @@ struct UploadInfo { } #[derive(Debug, PartialEq)] -struct PartInfo { - etag: String, +struct PartInfo<'a> { + etag: &'a str, timestamp: u64, part_number: u64, size: u64, @@ -421,8 +426,10 @@ where // Drop the first key if needed // Only AfterKey requires it according to the S3 spec and our implem. match (&cursor, iter.peek()) { - (RangeBegin::AfterKey { key }, Some(object)) if &object.key == key => iter.next(), - (_, _) => None, + (RangeBegin::AfterKey { key }, Some(object)) if &object.key == key => { + iter.next(); + } + _ => (), }; while let Some(object) = iter.peek() { @@ -431,16 +438,22 @@ where return Ok(None); } - cursor = match acc.extract(query, &cursor, &mut iter) { - ExtractionResult::Extracted { key } => RangeBegin::AfterKey { key }, + match acc.extract(query, &cursor, &mut iter) { + ExtractionResult::Extracted { key } => { + cursor = RangeBegin::AfterKey { key }; + } ExtractionResult::SkipTo { key, fallback_key } => { - RangeBegin::IncludingKey { key, fallback_key } + cursor = RangeBegin::IncludingKey { key, fallback_key }; } ExtractionResult::FilledAtUpload { key, upload } => { - return Ok(Some(RangeBegin::AfterUpload { key, upload })) + return Ok(Some(RangeBegin::AfterUpload { key, upload })); + } + ExtractionResult::Filled => { + return Ok(Some(cursor)); + } + ExtractionResult::NoMore => { + return Ok(None); } - ExtractionResult::Filled => return Ok(Some(cursor)), - ExtractionResult::NoMore => return Ok(None), }; } @@ -456,107 +469,51 @@ where } } -fn fetch_part_info( +fn fetch_part_info<'a>( query: &ListPartsQuery, - object: Option<Object>, - version: Option<Version>, - upload_id: Uuid, -) -> Result<(Vec<PartInfo>, Option<u64>), Error> { - // Check results - let object = object.ok_or(Error::NoSuchKey)?; - - let obj_version = object - .versions() - .iter() - .find(|v| v.uuid == upload_id && v.is_uploading()) - .ok_or(Error::NoSuchUpload)?; - - let version = version.ok_or(Error::NoSuchKey)?; - - // Cut the beginning of our 2 vectors if required - let (etags, blocks) = match &query.part_number_marker { - Some(marker) => { - let next = marker + 1; - - let part_idx = into_ok_or_err( - version - .parts_etags - .items() - .binary_search_by(|(part_num, _)| part_num.cmp(&next)), - ); - let parts = &version.parts_etags.items()[part_idx..]; - - let block_idx = into_ok_or_err( - version - .blocks - .items() - .binary_search_by(|(vkey, _)| vkey.part_number.cmp(&next)), - ); - let blocks = &version.blocks.items()[block_idx..]; - - (parts, blocks) - } - None => (version.parts_etags.items(), version.blocks.items()), - }; - - // Use the block vector to compute a (part_number, size) vector - let mut size = Vec::<(u64, u64)>::new(); - blocks.iter().for_each(|(key, val)| { - let mut new_size = val.size; - match size.pop() { - Some((part_number, size)) if part_number == key.part_number => new_size += size, - Some(v) => size.push(v), - None => (), - } - size.push((key.part_number, new_size)) - }); - - // Merge the etag vector and size vector to build a PartInfo vector - let max_parts = query.max_parts as usize; - let (mut etag_iter, mut size_iter) = (etags.iter().peekable(), size.iter().peekable()); - - let mut info = Vec::<PartInfo>::with_capacity(max_parts); - - while info.len() < max_parts { - match (etag_iter.peek(), size_iter.peek()) { - (Some((ep, etag)), Some((sp, size))) => match ep.cmp(sp) { - Ordering::Less => { - debug!("ETag information ignored due to missing corresponding block information. Query: {:?}", query); - etag_iter.next(); - } - Ordering::Equal => { - info.push(PartInfo { - etag: etag.to_string(), - timestamp: obj_version.timestamp, - part_number: *ep, - size: *size, - }); - etag_iter.next(); - size_iter.next(); + mpu: &'a MultipartUpload, +) -> Result<(Vec<PartInfo<'a>>, Option<u64>), Error> { + assert!((1..=1000).contains(&query.max_parts)); // see s3/api_server.rs + + // Parse multipart upload part list, removing parts not yet finished + // and failed part uploads that were overwritten + let mut parts: Vec<PartInfo<'a>> = Vec::with_capacity(mpu.parts.items().len()); + for (pk, p) in mpu.parts.items().iter() { + if let (Some(etag), Some(size)) = (&p.etag, p.size) { + let part_info = PartInfo { + part_number: pk.part_number, + timestamp: pk.timestamp, + etag, + size, + }; + match parts.last_mut() { + Some(lastpart) if lastpart.part_number == pk.part_number => { + *lastpart = part_info; } - Ordering::Greater => { - debug!("Block information ignored due to missing corresponding ETag information. Query: {:?}", query); - size_iter.next(); + _ => { + parts.push(part_info); } - }, - (None, None) => return Ok((info, None)), - _ => { - debug!( - "Additional block or ETag information ignored. Query: {:?}", - query - ); - return Ok((info, None)); } } } - match info.last() { - Some(part_info) => { - let pagination = Some(part_info.part_number); - Ok((info, pagination)) - } - None => Ok((info, None)), + // Cut the beginning if we have a marker + if let Some(marker) = &query.part_number_marker { + let next = marker + 1; + let part_idx = parts + .binary_search_by(|part| part.part_number.cmp(&next)) + .unwrap_or_else(|x| x); + parts = parts.split_off(part_idx); + } + + // Cut the end if we have too many parts + if parts.len() > query.max_parts as usize { + parts.truncate(query.max_parts as usize); + let pagination = Some(parts.last().unwrap().part_number); + return Ok((parts, pagination)); } + + Ok((parts, None)) } /* @@ -570,8 +527,8 @@ fn fetch_part_info( /// This key can be the prefix in the base case, or intermediate /// points in the dataset if we are continuing a previous listing. impl ListObjectsQuery { - fn build_accumulator(&self) -> Accumulator<String, ObjectInfo> { - Accumulator::<String, ObjectInfo>::new(self.common.page_size) + fn build_accumulator(&self) -> ObjectAccumulator { + ObjectAccumulator::new(self.common.page_size) } fn begin(&self) -> Result<RangeBegin, Error> { @@ -580,9 +537,10 @@ impl ListObjectsQuery { // In V2 mode, the continuation token is defined as an opaque // string in the spec, so we can do whatever we want with it. // In our case, it is defined as either [ or ] (for include + // or exclude), followed by a base64-encoded string // representing the key to start with. - (Some(token), _) => match &token[..1] { - "[" => Ok(RangeBegin::IncludingKey { + (Some(token), _) => match &token.get(..1) { + Some("[") => Ok(RangeBegin::IncludingKey { key: String::from_utf8( BASE64_STANDARD .decode(token[1..].as_bytes()) @@ -590,7 +548,7 @@ impl ListObjectsQuery { )?, fallback_key: None, }), - "]" => Ok(RangeBegin::AfterKey { + Some("]") => Ok(RangeBegin::AfterKey { key: String::from_utf8( BASE64_STANDARD .decode(token[1..].as_bytes()) @@ -631,8 +589,8 @@ impl ListObjectsQuery { } impl ListMultipartUploadsQuery { - fn build_accumulator(&self) -> Accumulator<Uuid, UploadInfo> { - Accumulator::<Uuid, UploadInfo>::new(self.common.page_size) + fn build_accumulator(&self) -> UploadAccumulator { + UploadAccumulator::new(self.common.page_size) } fn begin(&self) -> Result<RangeBegin, Error> { @@ -651,7 +609,7 @@ impl ListMultipartUploadsQuery { }), uuid => Ok(RangeBegin::AfterUpload { key: key_marker.to_string(), - upload: s3_put::decode_upload_id(uuid)?, + upload: s3_multipart::decode_upload_id(uuid)?, }), }, @@ -716,6 +674,7 @@ impl<K: std::cmp::Ord, V> Accumulator<K, V> { Some(p) => p, None => return None, }; + assert!(pfx.starts_with(&query.prefix)); // Try to register this prefix // If not possible, we can return early @@ -726,8 +685,11 @@ impl<K: std::cmp::Ord, V> Accumulator<K, V> { // We consume the whole common prefix from the iterator let mut last_pfx_key = &object.key; loop { - last_pfx_key = match objects.peek() { - Some(o) if o.key.starts_with(pfx) => &o.key, + match objects.peek() { + Some(o) if o.key.starts_with(pfx) => { + last_pfx_key = &o.key; + objects.next(); + } Some(_) => { return Some(ExtractionResult::Extracted { key: last_pfx_key.to_owned(), @@ -743,8 +705,6 @@ impl<K: std::cmp::Ord, V> Accumulator<K, V> { } } }; - - objects.next(); } } @@ -759,12 +719,11 @@ impl<K: std::cmp::Ord, V> Accumulator<K, V> { } // Otherwise, we need to check if we can add it - match self.is_full() { - true => false, - false => { - self.common_prefixes.insert(key); - true - } + if self.is_full() { + false + } else { + self.common_prefixes.insert(key); + true } } @@ -772,12 +731,11 @@ impl<K: std::cmp::Ord, V> Accumulator<K, V> { // It is impossible to add twice a key, this is an error assert!(!self.keys.contains_key(&key)); - match self.is_full() { - true => false, - false => { - self.keys.insert(key, value); - true - } + if self.is_full() { + false + } else { + self.keys.insert(key, value); + true } } } @@ -794,6 +752,7 @@ impl ExtractAccumulator for ObjectAccumulator { } let object = objects.next().expect("This iterator can not be empty as it is checked earlier in the code. This is a logic bug, please report it."); + assert!(object.key.starts_with(&query.prefix)); let version = match object.versions().iter().find(|x| x.is_data()) { Some(v) => v, @@ -843,7 +802,7 @@ impl ExtractAccumulator for UploadAccumulator { let mut uploads_for_key = object .versions() .iter() - .filter(|x| x.is_uploading()) + .filter(|x| x.is_uploading(Some(true))) .collect::<Vec<&ObjectVersion>>(); // S3 logic requires lexicographically sorted upload ids. @@ -918,14 +877,6 @@ impl ExtractAccumulator for UploadAccumulator { * Utility functions */ -/// This is a stub for Result::into_ok_or_err that is not yet in Rust stable -fn into_ok_or_err<T>(r: Result<T, T>) -> T { - match r { - Ok(r) => r, - Err(r) => r, - } -} - /// Returns the common prefix of the object given the query prefix and delimiter fn common_prefix<'a>(object: &'a Object, query: &ListQueryCommon) -> Option<&'a str> { match &query.delimiter { @@ -951,7 +902,6 @@ fn uriencode_maybe(s: &str, yes: bool) -> s3_xml::Value { #[cfg(test)] mod tests { use super::*; - use garage_model::s3::version_table::*; use garage_util::*; use std::iter::FromIterator; @@ -991,10 +941,13 @@ mod tests { ObjectVersion { uuid: Uuid::from(uuid), timestamp: TS, - state: ObjectVersionState::Uploading(ObjectVersionHeaders { - content_type: "text/plain".to_string(), - other: BTreeMap::<String, String>::new(), - }), + state: ObjectVersionState::Uploading { + multipart: true, + headers: ObjectVersionHeaders { + content_type: "text/plain".to_string(), + other: BTreeMap::<String, String>::new(), + }, + }, } } @@ -1169,83 +1122,77 @@ mod tests { Ok(()) } - fn version() -> Version { + fn mpu() -> MultipartUpload { let uuid = Uuid::from([0x08; 32]); - let blocks = vec![ + let parts = vec![ ( - VersionBlockKey { + MpuPartKey { part_number: 1, - offset: 1, + timestamp: TS, }, - VersionBlock { - hash: uuid, - size: 3, + MpuPart { + version: uuid, + size: Some(3), + etag: Some("etag1".into()), }, ), ( - VersionBlockKey { - part_number: 1, - offset: 2, + MpuPartKey { + part_number: 2, + timestamp: TS, }, - VersionBlock { - hash: uuid, - size: 2, + MpuPart { + version: uuid, + size: None, + etag: None, }, ), ( - VersionBlockKey { - part_number: 2, - offset: 1, + MpuPartKey { + part_number: 3, + timestamp: TS, }, - VersionBlock { - hash: uuid, - size: 8, + MpuPart { + version: uuid, + size: Some(10), + etag: Some("etag2".into()), }, ), ( - VersionBlockKey { + MpuPartKey { part_number: 5, - offset: 1, + timestamp: TS, }, - VersionBlock { - hash: uuid, - size: 7, + MpuPart { + version: uuid, + size: Some(7), + etag: Some("etag3".into()), }, ), ( - VersionBlockKey { + MpuPartKey { part_number: 8, - offset: 1, + timestamp: TS, }, - VersionBlock { - hash: uuid, - size: 5, + MpuPart { + version: uuid, + size: Some(5), + etag: Some("etag4".into()), }, ), ]; - let etags = vec![ - (1, "etag1".to_string()), - (3, "etag2".to_string()), - (5, "etag3".to_string()), - (8, "etag4".to_string()), - (9, "etag5".to_string()), - ]; - Version { - bucket_id: uuid, - key: "a".to_string(), - uuid, + MultipartUpload { + upload_id: uuid, + timestamp: TS, deleted: false.into(), - blocks: crdt::Map::<VersionBlockKey, VersionBlock>::from_iter(blocks), - parts_etags: crdt::Map::<u64, String>::from_iter(etags), + parts: crdt::Map::<MpuPartKey, MpuPart>::from_iter(parts), + bucket_id: uuid, + key: "a".into(), } } - fn obj() -> Object { - Object::new(bucket(), "d".to_string(), vec![objup_version([0x08; 32])]) - } - #[test] fn test_fetch_part_info() -> Result<(), Error> { let uuid = Uuid::from([0x08; 32]); @@ -1258,82 +1205,85 @@ mod tests { max_parts: 2, }; - assert!( - fetch_part_info(&query, None, None, uuid).is_err(), - "No object and version should fail" - ); - assert!( - fetch_part_info(&query, Some(obj()), None, uuid).is_err(), - "No version should faild" - ); - assert!( - fetch_part_info(&query, None, Some(version()), uuid).is_err(), - "No object should fail" - ); + let mpu = mpu(); // Start from the beginning but with limited size to trigger pagination - let (info, pagination) = fetch_part_info(&query, Some(obj()), Some(version()), uuid)?; - assert_eq!(pagination.unwrap(), 5); + let (info, pagination) = fetch_part_info(&query, &mpu)?; + assert_eq!(pagination.unwrap(), 3); assert_eq!( info, vec![ PartInfo { - etag: "etag1".to_string(), + etag: "etag1", timestamp: TS, part_number: 1, - size: 5 + size: 3 }, PartInfo { - etag: "etag3".to_string(), + etag: "etag2", timestamp: TS, - part_number: 5, - size: 7 + part_number: 3, + size: 10 }, ] ); // Use previous pagination to make a new request query.part_number_marker = Some(pagination.unwrap()); - let (info, pagination) = fetch_part_info(&query, Some(obj()), Some(version()), uuid)?; + let (info, pagination) = fetch_part_info(&query, &mpu)?; assert!(pagination.is_none()); assert_eq!( info, - vec![PartInfo { - etag: "etag4".to_string(), - timestamp: TS, - part_number: 8, - size: 5 - },] + vec![ + PartInfo { + etag: "etag3", + timestamp: TS, + part_number: 5, + size: 7 + }, + PartInfo { + etag: "etag4", + timestamp: TS, + part_number: 8, + size: 5 + }, + ] ); // Trying to access a part that is way larger than registered ones query.part_number_marker = Some(9999); - let (info, pagination) = fetch_part_info(&query, Some(obj()), Some(version()), uuid)?; + let (info, pagination) = fetch_part_info(&query, &mpu)?; assert!(pagination.is_none()); assert_eq!(info, vec![]); // Try without any limitation query.max_parts = 1000; query.part_number_marker = None; - let (info, pagination) = fetch_part_info(&query, Some(obj()), Some(version()), uuid)?; + let (info, pagination) = fetch_part_info(&query, &mpu)?; assert!(pagination.is_none()); assert_eq!( info, vec![ PartInfo { - etag: "etag1".to_string(), + etag: "etag1", timestamp: TS, part_number: 1, - size: 5 + size: 3 + }, + PartInfo { + etag: "etag2", + timestamp: TS, + part_number: 3, + size: 10 }, PartInfo { - etag: "etag3".to_string(), + etag: "etag3", timestamp: TS, part_number: 5, size: 7 }, PartInfo { - etag: "etag4".to_string(), + etag: "etag4", timestamp: TS, part_number: 8, size: 5 diff --git a/src/api/s3/mod.rs b/src/api/s3/mod.rs index 7b56d4d8..cbdb94ab 100644 --- a/src/api/s3/mod.rs +++ b/src/api/s3/mod.rs @@ -6,7 +6,9 @@ mod copy; pub mod cors; mod delete; pub mod get; +mod lifecycle; mod list; +mod multipart; mod post_object; mod put; mod website; diff --git a/src/api/s3/multipart.rs b/src/api/s3/multipart.rs new file mode 100644 index 00000000..6b786318 --- /dev/null +++ b/src/api/s3/multipart.rs @@ -0,0 +1,468 @@ +use std::collections::HashMap; +use std::sync::Arc; + +use futures::prelude::*; +use hyper::body::Body; +use hyper::{Request, Response}; +use md5::{Digest as Md5Digest, Md5}; + +use garage_table::*; +use garage_util::async_hash::*; +use garage_util::data::*; + +use garage_model::bucket_table::Bucket; +use garage_model::garage::Garage; +use garage_model::s3::block_ref_table::*; +use garage_model::s3::mpu_table::*; +use garage_model::s3::object_table::*; +use garage_model::s3::version_table::*; + +use crate::s3::error::*; +use crate::s3::put::*; +use crate::s3::xml as s3_xml; +use crate::signature::verify_signed_content; + +// ---- + +pub async fn handle_create_multipart_upload( + garage: Arc<Garage>, + req: &Request<Body>, + bucket_name: &str, + bucket_id: Uuid, + key: &String, +) -> Result<Response<Body>, Error> { + let existing_object = garage.object_table.get(&bucket_id, &key).await?; + + let upload_id = gen_uuid(); + let timestamp = next_timestamp(existing_object.as_ref()); + + let headers = get_headers(req.headers())?; + + // Create object in object table + let object_version = ObjectVersion { + uuid: upload_id, + timestamp, + state: ObjectVersionState::Uploading { + multipart: true, + headers, + }, + }; + let object = Object::new(bucket_id, key.to_string(), vec![object_version]); + garage.object_table.insert(&object).await?; + + // Create multipart upload in mpu table + // This multipart upload will hold references to uploaded parts + // (which are entries in the Version table) + let mpu = MultipartUpload::new(upload_id, timestamp, bucket_id, key.into(), false); + garage.mpu_table.insert(&mpu).await?; + + // Send success response + let result = s3_xml::InitiateMultipartUploadResult { + xmlns: (), + bucket: s3_xml::Value(bucket_name.to_string()), + key: s3_xml::Value(key.to_string()), + upload_id: s3_xml::Value(hex::encode(upload_id)), + }; + let xml = s3_xml::to_xml_with_header(&result)?; + + Ok(Response::new(Body::from(xml.into_bytes()))) +} + +pub async fn handle_put_part( + garage: Arc<Garage>, + req: Request<Body>, + bucket_id: Uuid, + key: &str, + part_number: u64, + upload_id: &str, + content_sha256: Option<Hash>, +) -> Result<Response<Body>, Error> { + let upload_id = decode_upload_id(upload_id)?; + + let content_md5 = match req.headers().get("content-md5") { + Some(x) => Some(x.to_str()?.to_string()), + None => None, + }; + + // Read first chuck, and at the same time try to get object to see if it exists + let key = key.to_string(); + + let body = req.into_body().map_err(Error::from); + let mut chunker = StreamChunker::new(body, garage.config.block_size); + + let ((_, _, mut mpu), first_block) = futures::try_join!( + get_upload(&garage, &bucket_id, &key, &upload_id), + chunker.next(), + )?; + + // Check object is valid and part can be accepted + let first_block = first_block.ok_or_bad_request("Empty body")?; + + // Calculate part identity: timestamp, version id + let version_uuid = gen_uuid(); + let mpu_part_key = MpuPartKey { + part_number, + timestamp: mpu.next_timestamp(part_number), + }; + + // The following consists in many steps that can each fail. + // Keep track that some cleanup will be needed if things fail + // before everything is finished (cleanup is done using the Drop trait). + let mut interrupted_cleanup = InterruptedCleanup(Some(InterruptedCleanupInner { + garage: garage.clone(), + upload_id, + version_uuid, + })); + + // Create version and link version from MPU + mpu.parts.clear(); + mpu.parts.put( + mpu_part_key, + MpuPart { + version: version_uuid, + etag: None, + size: None, + }, + ); + garage.mpu_table.insert(&mpu).await?; + + let version = Version::new( + version_uuid, + VersionBacklink::MultipartUpload { upload_id }, + false, + ); + garage.version_table.insert(&version).await?; + + // Copy data to version + let first_block_hash = async_blake2sum(first_block.clone()).await; + + let (total_size, data_md5sum, data_sha256sum) = read_and_put_blocks( + &garage, + &version, + part_number, + first_block, + first_block_hash, + &mut chunker, + ) + .await?; + + // Verify that checksums map + ensure_checksum_matches( + data_md5sum.as_slice(), + data_sha256sum, + content_md5.as_deref(), + content_sha256, + )?; + + // Store part etag in version + let data_md5sum_hex = hex::encode(data_md5sum); + mpu.parts.put( + mpu_part_key, + MpuPart { + version: version_uuid, + etag: Some(data_md5sum_hex.clone()), + size: Some(total_size), + }, + ); + garage.mpu_table.insert(&mpu).await?; + + // We were not interrupted, everything went fine. + // We won't have to clean up on drop. + interrupted_cleanup.cancel(); + + let response = Response::builder() + .header("ETag", format!("\"{}\"", data_md5sum_hex)) + .body(Body::empty()) + .unwrap(); + Ok(response) +} + +struct InterruptedCleanup(Option<InterruptedCleanupInner>); +struct InterruptedCleanupInner { + garage: Arc<Garage>, + upload_id: Uuid, + version_uuid: Uuid, +} + +impl InterruptedCleanup { + fn cancel(&mut self) { + drop(self.0.take()); + } +} +impl Drop for InterruptedCleanup { + fn drop(&mut self) { + if let Some(info) = self.0.take() { + tokio::spawn(async move { + let version = Version::new( + info.version_uuid, + VersionBacklink::MultipartUpload { + upload_id: info.upload_id, + }, + true, + ); + if let Err(e) = info.garage.version_table.insert(&version).await { + warn!("Cannot cleanup after aborted UploadPart: {}", e); + } + }); + } + } +} + +pub async fn handle_complete_multipart_upload( + garage: Arc<Garage>, + req: Request<Body>, + bucket_name: &str, + bucket: &Bucket, + key: &str, + upload_id: &str, + content_sha256: Option<Hash>, +) -> Result<Response<Body>, Error> { + let body = hyper::body::to_bytes(req.into_body()).await?; + + if let Some(content_sha256) = content_sha256 { + verify_signed_content(content_sha256, &body[..])?; + } + + let body_xml = roxmltree::Document::parse(std::str::from_utf8(&body)?)?; + let body_list_of_parts = parse_complete_multipart_upload_body(&body_xml) + .ok_or_bad_request("Invalid CompleteMultipartUpload XML")?; + debug!( + "CompleteMultipartUpload list of parts: {:?}", + body_list_of_parts + ); + + let upload_id = decode_upload_id(upload_id)?; + + // Get object and multipart upload + let key = key.to_string(); + let (object, mut object_version, mpu) = + get_upload(&garage, &bucket.id, &key, &upload_id).await?; + + if mpu.parts.is_empty() { + return Err(Error::bad_request("No data was uploaded")); + } + + let headers = match object_version.state { + ObjectVersionState::Uploading { headers, .. } => headers, + _ => unreachable!(), + }; + + // Check that part numbers are an increasing sequence. + // (it doesn't need to start at 1 nor to be a continuous sequence, + // see discussion in #192) + if body_list_of_parts.is_empty() { + return Err(Error::EntityTooSmall); + } + if !body_list_of_parts + .iter() + .zip(body_list_of_parts.iter().skip(1)) + .all(|(p1, p2)| p1.part_number < p2.part_number) + { + return Err(Error::InvalidPartOrder); + } + + // Check that the list of parts they gave us corresponds to parts we have here + debug!("Parts stored in multipart upload: {:?}", mpu.parts.items()); + let mut have_parts = HashMap::new(); + for (pk, pv) in mpu.parts.items().iter() { + have_parts.insert(pk.part_number, pv); + } + let mut parts = vec![]; + for req_part in body_list_of_parts.iter() { + match have_parts.get(&req_part.part_number) { + Some(part) if part.etag.as_ref() == Some(&req_part.etag) && part.size.is_some() => { + parts.push(*part) + } + _ => return Err(Error::InvalidPart), + } + } + + let grg = &garage; + let parts_versions = futures::future::try_join_all(parts.iter().map(|p| async move { + grg.version_table + .get(&p.version, &EmptyKey) + .await? + .ok_or_internal_error("Part version missing from version table") + })) + .await?; + + // Create final version and block refs + let mut final_version = Version::new( + upload_id, + VersionBacklink::Object { + bucket_id: bucket.id, + key: key.to_string(), + }, + false, + ); + for (part_number, part_version) in parts_versions.iter().enumerate() { + if part_version.deleted.get() { + return Err(Error::InvalidPart); + } + for (vbk, vb) in part_version.blocks.items().iter() { + final_version.blocks.put( + VersionBlockKey { + part_number: (part_number + 1) as u64, + offset: vbk.offset, + }, + *vb, + ); + } + } + garage.version_table.insert(&final_version).await?; + + let block_refs = final_version.blocks.items().iter().map(|(_, b)| BlockRef { + block: b.hash, + version: upload_id, + deleted: false.into(), + }); + garage.block_ref_table.insert_many(block_refs).await?; + + // Calculate etag of final object + // To understand how etags are calculated, read more here: + // https://teppen.io/2018/06/23/aws_s3_etags/ + let mut etag_md5_hasher = Md5::new(); + for part in parts.iter() { + etag_md5_hasher.update(part.etag.as_ref().unwrap().as_bytes()); + } + let etag = format!( + "{}-{}", + hex::encode(etag_md5_hasher.finalize()), + parts.len() + ); + + // Calculate total size of final object + let total_size = parts.iter().map(|x| x.size.unwrap()).sum(); + + if let Err(e) = check_quotas(&garage, bucket, total_size, Some(&object)).await { + object_version.state = ObjectVersionState::Aborted; + let final_object = Object::new(bucket.id, key.clone(), vec![object_version]); + garage.object_table.insert(&final_object).await?; + + return Err(e); + } + + // Write final object version + object_version.state = ObjectVersionState::Complete(ObjectVersionData::FirstBlock( + ObjectVersionMeta { + headers, + size: total_size, + etag: etag.clone(), + }, + final_version.blocks.items()[0].1.hash, + )); + + let final_object = Object::new(bucket.id, key.clone(), vec![object_version]); + garage.object_table.insert(&final_object).await?; + + // Send response saying ok we're done + let result = s3_xml::CompleteMultipartUploadResult { + xmlns: (), + location: None, + bucket: s3_xml::Value(bucket_name.to_string()), + key: s3_xml::Value(key), + etag: s3_xml::Value(format!("\"{}\"", etag)), + }; + let xml = s3_xml::to_xml_with_header(&result)?; + + Ok(Response::new(Body::from(xml.into_bytes()))) +} + +pub async fn handle_abort_multipart_upload( + garage: Arc<Garage>, + bucket_id: Uuid, + key: &str, + upload_id: &str, +) -> Result<Response<Body>, Error> { + let upload_id = decode_upload_id(upload_id)?; + + let (_, mut object_version, _) = + get_upload(&garage, &bucket_id, &key.to_string(), &upload_id).await?; + + object_version.state = ObjectVersionState::Aborted; + let final_object = Object::new(bucket_id, key.to_string(), vec![object_version]); + garage.object_table.insert(&final_object).await?; + + Ok(Response::new(Body::from(vec![]))) +} + +// ======== helpers ============ + +#[allow(clippy::ptr_arg)] +pub(crate) async fn get_upload( + garage: &Garage, + bucket_id: &Uuid, + key: &String, + upload_id: &Uuid, +) -> Result<(Object, ObjectVersion, MultipartUpload), Error> { + let (object, mpu) = futures::try_join!( + garage.object_table.get(bucket_id, key).map_err(Error::from), + garage + .mpu_table + .get(upload_id, &EmptyKey) + .map_err(Error::from), + )?; + + let object = object.ok_or(Error::NoSuchUpload)?; + let mpu = mpu.ok_or(Error::NoSuchUpload)?; + + let object_version = object + .versions() + .iter() + .find(|v| v.uuid == *upload_id && v.is_uploading(Some(true))) + .ok_or(Error::NoSuchUpload)? + .clone(); + + Ok((object, object_version, mpu)) +} + +pub fn decode_upload_id(id: &str) -> Result<Uuid, Error> { + let id_bin = hex::decode(id).map_err(|_| Error::NoSuchUpload)?; + if id_bin.len() != 32 { + return Err(Error::NoSuchUpload); + } + let mut uuid = [0u8; 32]; + uuid.copy_from_slice(&id_bin[..]); + Ok(Uuid::from(uuid)) +} + +#[derive(Debug)] +struct CompleteMultipartUploadPart { + etag: String, + part_number: u64, +} + +fn parse_complete_multipart_upload_body( + xml: &roxmltree::Document, +) -> Option<Vec<CompleteMultipartUploadPart>> { + let mut parts = vec![]; + + let root = xml.root(); + let cmu = root.first_child()?; + if !cmu.has_tag_name("CompleteMultipartUpload") { + return None; + } + + for item in cmu.children() { + // Only parse <Part> nodes + if !item.is_element() { + continue; + } + + if item.has_tag_name("Part") { + let etag = item.children().find(|e| e.has_tag_name("ETag"))?.text()?; + let part_number = item + .children() + .find(|e| e.has_tag_name("PartNumber"))? + .text()?; + parts.push(CompleteMultipartUploadPart { + etag: etag.trim_matches('"').to_string(), + part_number: part_number.parse().ok()?, + }); + } else { + return None; + } + } + + Some(parts) +} diff --git a/src/api/s3/put.rs b/src/api/s3/put.rs index 350ab884..606facc4 100644 --- a/src/api/s3/put.rs +++ b/src/api/s3/put.rs @@ -1,8 +1,9 @@ -use std::collections::{BTreeMap, BTreeSet, HashMap}; +use std::collections::{BTreeMap, HashMap}; use std::sync::Arc; use base64::prelude::*; use futures::prelude::*; +use futures::try_join; use hyper::body::{Body, Bytes}; use hyper::header::{HeaderMap, HeaderValue}; use hyper::{Request, Response}; @@ -30,14 +31,12 @@ use garage_model::s3::object_table::*; use garage_model::s3::version_table::*; use crate::s3::error::*; -use crate::s3::xml as s3_xml; -use crate::signature::verify_signed_content; pub async fn handle_put( garage: Arc<Garage>, req: Request<Body>, bucket: &Bucket, - key: &str, + key: &String, content_sha256: Option<Hash>, ) -> Result<Response<Body>, Error> { // Retrieve interesting headers from request @@ -70,16 +69,24 @@ pub(crate) async fn save_stream<S: Stream<Item = Result<Bytes, Error>> + Unpin>( headers: ObjectVersionHeaders, body: S, bucket: &Bucket, - key: &str, + key: &String, content_md5: Option<String>, content_sha256: Option<FixedBytes32>, ) -> Result<(Uuid, String), Error> { + let mut chunker = StreamChunker::new(body, garage.config.block_size); + let (first_block_opt, existing_object) = try_join!( + chunker.next(), + garage + .object_table + .get(&bucket.id, key) + .map_err(Error::from), + )?; + + let first_block = first_block_opt.unwrap_or_default(); + // Generate identity of new version let version_uuid = gen_uuid(); - let version_timestamp = now_msec(); - - let mut chunker = StreamChunker::new(body, garage.config.block_size); - let first_block = chunker.next().await?.unwrap_or_default(); + let version_timestamp = next_timestamp(existing_object.as_ref()); // If body is small enough, store it directly in the object table // as "inline data". We can then return immediately. @@ -99,7 +106,7 @@ pub(crate) async fn save_stream<S: Stream<Item = Result<Bytes, Error>> + Unpin>( content_sha256, )?; - check_quotas(&garage, bucket, key, size).await?; + check_quotas(&garage, bucket, size, existing_object.as_ref()).await?; let object_version = ObjectVersion { uuid: version_uuid, @@ -123,20 +130,23 @@ pub(crate) async fn save_stream<S: Stream<Item = Result<Bytes, Error>> + Unpin>( // The following consists in many steps that can each fail. // Keep track that some cleanup will be needed if things fail // before everything is finished (cleanup is done using the Drop trait). - let mut interrupted_cleanup = InterruptedCleanup(Some(( - garage.clone(), - bucket.id, - key.into(), + let mut interrupted_cleanup = InterruptedCleanup(Some(InterruptedCleanupInner { + garage: garage.clone(), + bucket_id: bucket.id, + key: key.into(), version_uuid, version_timestamp, - ))); + })); // Write version identifier in object table so that we have a trace // that we are uploading something let mut object_version = ObjectVersion { uuid: version_uuid, timestamp: version_timestamp, - state: ObjectVersionState::Uploading(headers.clone()), + state: ObjectVersionState::Uploading { + headers: headers.clone(), + multipart: false, + }, }; let object = Object::new(bucket.id, key.into(), vec![object_version.clone()]); garage.object_table.insert(&object).await?; @@ -145,7 +155,14 @@ pub(crate) async fn save_stream<S: Stream<Item = Result<Bytes, Error>> + Unpin>( // Write this entry now, even with empty block list, // to prevent block_ref entries from being deleted (they can be deleted // if the reference a version that isn't found in the version table) - let version = Version::new(version_uuid, bucket.id, key.into(), false); + let version = Version::new( + version_uuid, + VersionBacklink::Object { + bucket_id: bucket.id, + key: key.into(), + }, + false, + ); garage.version_table.insert(&version).await?; // Transfer data and verify checksum @@ -168,7 +185,7 @@ pub(crate) async fn save_stream<S: Stream<Item = Result<Bytes, Error>> + Unpin>( content_sha256, )?; - check_quotas(&garage, bucket, key, total_size).await?; + check_quotas(&garage, bucket, total_size, existing_object.as_ref()).await?; // Save final object state, marked as Complete let md5sum_hex = hex::encode(data_md5sum); @@ -192,7 +209,7 @@ pub(crate) async fn save_stream<S: Stream<Item = Result<Bytes, Error>> + Unpin>( /// Validate MD5 sum against content-md5 header /// and sha256sum against signed content-sha256 -fn ensure_checksum_matches( +pub(crate) fn ensure_checksum_matches( data_md5sum: &[u8], data_sha256sum: garage_util::data::FixedBytes32, content_md5: Option<&str>, @@ -218,22 +235,22 @@ fn ensure_checksum_matches( } /// Check that inserting this object with this size doesn't exceed bucket quotas -async fn check_quotas( +pub(crate) async fn check_quotas( garage: &Arc<Garage>, bucket: &Bucket, - key: &str, size: u64, + prev_object: Option<&Object>, ) -> Result<(), Error> { let quotas = bucket.state.as_option().unwrap().quotas.get(); if quotas.max_objects.is_none() && quotas.max_size.is_none() { return Ok(()); }; - let key = key.to_string(); - let (prev_object, counters) = futures::try_join!( - garage.object_table.get(&bucket.id, &key), - garage.object_counter_table.table.get(&bucket.id, &EmptyKey), - )?; + let counters = garage + .object_counter_table + .table + .get(&bucket.id, &EmptyKey) + .await?; let counters = counters .map(|x| x.filtered_values(&garage.system.ring.borrow())) @@ -267,7 +284,7 @@ async fn check_quotas( if cnt_size_diff > 0 && current_size + cnt_size_diff > ms as i64 { return Err(Error::forbidden(format!( "Bucket size quota is reached, maximum total size of objects for this bucket: {}. The bucket is already {} bytes, and this object would add {} bytes.", - ms, current_size, size + ms, current_size, cnt_size_diff ))); } } @@ -275,7 +292,7 @@ async fn check_quotas( Ok(()) } -async fn read_and_put_blocks<S: Stream<Item = Result<Bytes, Error>> + Unpin>( +pub(crate) async fn read_and_put_blocks<S: Stream<Item = Result<Bytes, Error>> + Unpin>( garage: &Garage, version: &Version, part_number: u64, @@ -381,7 +398,7 @@ async fn put_block_meta( Ok(()) } -struct StreamChunker<S: Stream<Item = Result<Bytes, Error>>> { +pub(crate) struct StreamChunker<S: Stream<Item = Result<Bytes, Error>>> { stream: S, read_all: bool, block_size: usize, @@ -389,7 +406,7 @@ struct StreamChunker<S: Stream<Item = Result<Bytes, Error>>> { } impl<S: Stream<Item = Result<Bytes, Error>> + Unpin> StreamChunker<S> { - fn new(stream: S, block_size: usize) -> Self { + pub(crate) fn new(stream: S, block_size: usize) -> Self { Self { stream, read_all: false, @@ -398,7 +415,7 @@ impl<S: Stream<Item = Result<Bytes, Error>> + Unpin> StreamChunker<S> { } } - async fn next(&mut self) -> Result<Option<Bytes>, Error> { + pub(crate) async fn next(&mut self) -> Result<Option<Bytes>, Error> { while !self.read_all && self.buf.len() < self.block_size { if let Some(block) = self.stream.next().await { let bytes = block?; @@ -425,7 +442,14 @@ pub fn put_response(version_uuid: Uuid, md5sum_hex: String) -> Response<Body> { .unwrap() } -struct InterruptedCleanup(Option<(Arc<Garage>, Uuid, String, Uuid, u64)>); +struct InterruptedCleanup(Option<InterruptedCleanupInner>); +struct InterruptedCleanupInner { + garage: Arc<Garage>, + bucket_id: Uuid, + key: String, + version_uuid: Uuid, + version_timestamp: u64, +} impl InterruptedCleanup { fn cancel(&mut self) { @@ -434,15 +458,15 @@ impl InterruptedCleanup { } impl Drop for InterruptedCleanup { fn drop(&mut self) { - if let Some((garage, bucket_id, key, version_uuid, version_ts)) = self.0.take() { + if let Some(info) = self.0.take() { tokio::spawn(async move { let object_version = ObjectVersion { - uuid: version_uuid, - timestamp: version_ts, + uuid: info.version_uuid, + timestamp: info.version_timestamp, state: ObjectVersionState::Aborted, }; - let object = Object::new(bucket_id, key, vec![object_version]); - if let Err(e) = garage.object_table.insert(&object).await { + let object = Object::new(info.bucket_id, info.key, vec![object_version]); + if let Err(e) = info.garage.object_table.insert(&object).await { warn!("Cannot cleanup after aborted PutObject: {}", e); } }); @@ -450,326 +474,9 @@ impl Drop for InterruptedCleanup { } } -// ---- - -pub async fn handle_create_multipart_upload( - garage: Arc<Garage>, - req: &Request<Body>, - bucket_name: &str, - bucket_id: Uuid, - key: &str, -) -> Result<Response<Body>, Error> { - let version_uuid = gen_uuid(); - let headers = get_headers(req.headers())?; - - // Create object in object table - let object_version = ObjectVersion { - uuid: version_uuid, - timestamp: now_msec(), - state: ObjectVersionState::Uploading(headers), - }; - let object = Object::new(bucket_id, key.to_string(), vec![object_version]); - garage.object_table.insert(&object).await?; - - // Insert empty version so that block_ref entries refer to something - // (they are inserted concurrently with blocks in the version table, so - // there is the possibility that they are inserted before the version table - // is created, in which case it is allowed to delete them, e.g. in repair_*) - let version = Version::new(version_uuid, bucket_id, key.into(), false); - garage.version_table.insert(&version).await?; - - // Send success response - let result = s3_xml::InitiateMultipartUploadResult { - xmlns: (), - bucket: s3_xml::Value(bucket_name.to_string()), - key: s3_xml::Value(key.to_string()), - upload_id: s3_xml::Value(hex::encode(version_uuid)), - }; - let xml = s3_xml::to_xml_with_header(&result)?; - - Ok(Response::new(Body::from(xml.into_bytes()))) -} - -pub async fn handle_put_part( - garage: Arc<Garage>, - req: Request<Body>, - bucket_id: Uuid, - key: &str, - part_number: u64, - upload_id: &str, - content_sha256: Option<Hash>, -) -> Result<Response<Body>, Error> { - let version_uuid = decode_upload_id(upload_id)?; - - let content_md5 = match req.headers().get("content-md5") { - Some(x) => Some(x.to_str()?.to_string()), - None => None, - }; - - // Read first chuck, and at the same time try to get object to see if it exists - let key = key.to_string(); - - let body = req.into_body().map_err(Error::from); - let mut chunker = StreamChunker::new(body, garage.config.block_size); - - let (object, version, first_block) = futures::try_join!( - garage - .object_table - .get(&bucket_id, &key) - .map_err(Error::from), - garage - .version_table - .get(&version_uuid, &EmptyKey) - .map_err(Error::from), - chunker.next(), - )?; - - // Check object is valid and multipart block can be accepted - let first_block = first_block.ok_or_bad_request("Empty body")?; - let object = object.ok_or_bad_request("Object not found")?; - - if !object - .versions() - .iter() - .any(|v| v.uuid == version_uuid && v.is_uploading()) - { - return Err(Error::NoSuchUpload); - } - - // Check part hasn't already been uploaded - if let Some(v) = version { - if v.has_part_number(part_number) { - return Err(Error::bad_request(format!( - "Part number {} has already been uploaded", - part_number - ))); - } - } - - // Copy block to store - let version = Version::new(version_uuid, bucket_id, key, false); - - let first_block_hash = async_blake2sum(first_block.clone()).await; - - let (_, data_md5sum, data_sha256sum) = read_and_put_blocks( - &garage, - &version, - part_number, - first_block, - first_block_hash, - &mut chunker, - ) - .await?; - - // Verify that checksums map - ensure_checksum_matches( - data_md5sum.as_slice(), - data_sha256sum, - content_md5.as_deref(), - content_sha256, - )?; - - // Store part etag in version - let data_md5sum_hex = hex::encode(data_md5sum); - let mut version = version; - version - .parts_etags - .put(part_number, data_md5sum_hex.clone()); - garage.version_table.insert(&version).await?; - - let response = Response::builder() - .header("ETag", format!("\"{}\"", data_md5sum_hex)) - .body(Body::empty()) - .unwrap(); - Ok(response) -} - -pub async fn handle_complete_multipart_upload( - garage: Arc<Garage>, - req: Request<Body>, - bucket_name: &str, - bucket: &Bucket, - key: &str, - upload_id: &str, - content_sha256: Option<Hash>, -) -> Result<Response<Body>, Error> { - let body = hyper::body::to_bytes(req.into_body()).await?; - - if let Some(content_sha256) = content_sha256 { - verify_signed_content(content_sha256, &body[..])?; - } - - let body_xml = roxmltree::Document::parse(std::str::from_utf8(&body)?)?; - let body_list_of_parts = parse_complete_multipart_upload_body(&body_xml) - .ok_or_bad_request("Invalid CompleteMultipartUpload XML")?; - debug!( - "CompleteMultipartUpload list of parts: {:?}", - body_list_of_parts - ); - - let version_uuid = decode_upload_id(upload_id)?; - - // Get object and version - let key = key.to_string(); - let (object, version) = futures::try_join!( - garage.object_table.get(&bucket.id, &key), - garage.version_table.get(&version_uuid, &EmptyKey), - )?; - - let object = object.ok_or(Error::NoSuchKey)?; - let mut object_version = object - .versions() - .iter() - .find(|v| v.uuid == version_uuid && v.is_uploading()) - .cloned() - .ok_or(Error::NoSuchUpload)?; - - let version = version.ok_or(Error::NoSuchKey)?; - if version.blocks.is_empty() { - return Err(Error::bad_request("No data was uploaded")); - } - - let headers = match object_version.state { - ObjectVersionState::Uploading(headers) => headers, - _ => unreachable!(), - }; - - // Check that part numbers are an increasing sequence. - // (it doesn't need to start at 1 nor to be a continuous sequence, - // see discussion in #192) - if body_list_of_parts.is_empty() { - return Err(Error::EntityTooSmall); - } - if !body_list_of_parts - .iter() - .zip(body_list_of_parts.iter().skip(1)) - .all(|(p1, p2)| p1.part_number < p2.part_number) - { - return Err(Error::InvalidPartOrder); - } - - // Garage-specific restriction, see #204: part numbers must be - // consecutive starting at 1 - if body_list_of_parts[0].part_number != 1 - || !body_list_of_parts - .iter() - .zip(body_list_of_parts.iter().skip(1)) - .all(|(p1, p2)| p1.part_number + 1 == p2.part_number) - { - return Err(Error::NotImplemented("Garage does not support completing a Multipart upload with non-consecutive part numbers. This is a restriction of Garage's data model, which might be fixed in a future release. See issue #204 for more information on this topic.".into())); - } - - // Check that the list of parts they gave us corresponds to the parts we have here - debug!("Expected parts from request: {:?}", body_list_of_parts); - debug!("Parts stored in version: {:?}", version.parts_etags.items()); - let parts = version - .parts_etags - .items() - .iter() - .map(|pair| (&pair.0, &pair.1)); - let same_parts = body_list_of_parts - .iter() - .map(|x| (&x.part_number, &x.etag)) - .eq(parts); - if !same_parts { - return Err(Error::InvalidPart); - } - - // Check that all blocks belong to one of the parts - let block_parts = version - .blocks - .items() - .iter() - .map(|(bk, _)| bk.part_number) - .collect::<BTreeSet<_>>(); - let same_parts = body_list_of_parts - .iter() - .map(|x| x.part_number) - .eq(block_parts.into_iter()); - if !same_parts { - return Err(Error::bad_request( - "Part numbers in block list and part list do not match. This can happen if a part was partially uploaded. Please abort the multipart upload and try again." - )); - } - - // Calculate etag of final object - // To understand how etags are calculated, read more here: - // https://teppen.io/2018/06/23/aws_s3_etags/ - let num_parts = body_list_of_parts.len(); - let mut etag_md5_hasher = Md5::new(); - for (_, etag) in version.parts_etags.items().iter() { - etag_md5_hasher.update(etag.as_bytes()); - } - let etag = format!("{}-{}", hex::encode(etag_md5_hasher.finalize()), num_parts); - - // Calculate total size of final object - let total_size = version.blocks.items().iter().map(|x| x.1.size).sum(); - - if let Err(e) = check_quotas(&garage, bucket, &key, total_size).await { - object_version.state = ObjectVersionState::Aborted; - let final_object = Object::new(bucket.id, key.clone(), vec![object_version]); - garage.object_table.insert(&final_object).await?; - - return Err(e); - } - - // Write final object version - object_version.state = ObjectVersionState::Complete(ObjectVersionData::FirstBlock( - ObjectVersionMeta { - headers, - size: total_size, - etag: etag.clone(), - }, - version.blocks.items()[0].1.hash, - )); - - let final_object = Object::new(bucket.id, key.clone(), vec![object_version]); - garage.object_table.insert(&final_object).await?; - - // Send response saying ok we're done - let result = s3_xml::CompleteMultipartUploadResult { - xmlns: (), - location: None, - bucket: s3_xml::Value(bucket_name.to_string()), - key: s3_xml::Value(key), - etag: s3_xml::Value(format!("\"{}\"", etag)), - }; - let xml = s3_xml::to_xml_with_header(&result)?; - - Ok(Response::new(Body::from(xml.into_bytes()))) -} - -pub async fn handle_abort_multipart_upload( - garage: Arc<Garage>, - bucket_id: Uuid, - key: &str, - upload_id: &str, -) -> Result<Response<Body>, Error> { - let version_uuid = decode_upload_id(upload_id)?; - - let object = garage - .object_table - .get(&bucket_id, &key.to_string()) - .await?; - let object = object.ok_or(Error::NoSuchKey)?; - - let object_version = object - .versions() - .iter() - .find(|v| v.uuid == version_uuid && v.is_uploading()); - let mut object_version = match object_version { - None => return Err(Error::NoSuchUpload), - Some(x) => x.clone(), - }; +// ============ helpers ============ - object_version.state = ObjectVersionState::Aborted; - let final_object = Object::new(bucket_id, key.to_string(), vec![object_version]); - garage.object_table.insert(&final_object).await?; - - Ok(Response::new(Body::from(vec![]))) -} - -fn get_mime_type(headers: &HeaderMap<HeaderValue>) -> Result<String, Error> { +pub(crate) fn get_mime_type(headers: &HeaderMap<HeaderValue>) -> Result<String, Error> { Ok(headers .get(hyper::header::CONTENT_TYPE) .map(|x| x.to_str()) @@ -822,53 +529,10 @@ pub(crate) fn get_headers(headers: &HeaderMap<HeaderValue>) -> Result<ObjectVers }) } -pub fn decode_upload_id(id: &str) -> Result<Uuid, Error> { - let id_bin = hex::decode(id).map_err(|_| Error::NoSuchUpload)?; - if id_bin.len() != 32 { - return Err(Error::NoSuchUpload); - } - let mut uuid = [0u8; 32]; - uuid.copy_from_slice(&id_bin[..]); - Ok(Uuid::from(uuid)) -} - -#[derive(Debug)] -struct CompleteMultipartUploadPart { - etag: String, - part_number: u64, -} - -fn parse_complete_multipart_upload_body( - xml: &roxmltree::Document, -) -> Option<Vec<CompleteMultipartUploadPart>> { - let mut parts = vec![]; - - let root = xml.root(); - let cmu = root.first_child()?; - if !cmu.has_tag_name("CompleteMultipartUpload") { - return None; - } - - for item in cmu.children() { - // Only parse <Part> nodes - if !item.is_element() { - continue; - } - - if item.has_tag_name("Part") { - let etag = item.children().find(|e| e.has_tag_name("ETag"))?.text()?; - let part_number = item - .children() - .find(|e| e.has_tag_name("PartNumber"))? - .text()?; - parts.push(CompleteMultipartUploadPart { - etag: etag.trim_matches('"').to_string(), - part_number: part_number.parse().ok()?, - }); - } else { - return None; - } - } - - Some(parts) +pub(crate) fn next_timestamp(existing_object: Option<&Object>) -> u64 { + existing_object + .as_ref() + .and_then(|obj| obj.versions().iter().map(|v| v.timestamp).max()) + .map(|t| std::cmp::max(t + 1, now_msec())) + .unwrap_or_else(now_msec) } diff --git a/src/api/s3/website.rs b/src/api/s3/website.rs index 77738971..7f2ab925 100644 --- a/src/api/s3/website.rs +++ b/src/api/s3/website.rs @@ -43,14 +43,11 @@ pub async fn handle_get_website(bucket: &Bucket) -> Result<Response<Body>, Error pub async fn handle_delete_website( garage: Arc<Garage>, - bucket_id: Uuid, + mut bucket: Bucket, ) -> Result<Response<Body>, Error> { - let mut bucket = garage - .bucket_helper() - .get_existing_bucket(bucket_id) - .await?; - - let param = bucket.params_mut().unwrap(); + let param = bucket + .params_mut() + .ok_or_internal_error("Bucket should not be deleted at this point")?; param.website_config.update(None); garage.bucket_table.insert(&bucket).await?; @@ -62,7 +59,7 @@ pub async fn handle_delete_website( pub async fn handle_put_website( garage: Arc<Garage>, - bucket_id: Uuid, + mut bucket: Bucket, req: Request<Body>, content_sha256: Option<Hash>, ) -> Result<Response<Body>, Error> { @@ -72,12 +69,9 @@ pub async fn handle_put_website( verify_signed_content(content_sha256, &body[..])?; } - let mut bucket = garage - .bucket_helper() - .get_existing_bucket(bucket_id) - .await?; - - let param = bucket.params_mut().unwrap(); + let param = bucket + .params_mut() + .ok_or_internal_error("Bucket should not be deleted at this point")?; let conf: WebsiteConfiguration = from_reader(&body as &[u8])?; conf.validate()?; |