aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--default.nix20
-rw-r--r--doc/book/src/reference_manual/s3_compatibility.md2
-rwxr-xr-xscript/dev-bucket.sh2
-rwxr-xr-xscript/dev-clean.sh2
-rwxr-xr-xscript/dev-cluster.sh2
-rwxr-xr-xscript/dev-configure.sh2
-rw-r--r--script/dev-env-aws.sh2
-rwxr-xr-xscript/test-smoke.sh140
-rw-r--r--shell.nix1
-rw-r--r--src/api/api_server.rs55
-rw-r--r--src/api/s3_bucket.rs3
-rw-r--r--src/api/s3_list.rs1172
-rw-r--r--src/api/s3_put.rs2
-rw-r--r--src/api/s3_router.rs2
-rw-r--r--src/api/s3_xml.rs106
-rw-r--r--src/garage/admin.rs3
-rw-r--r--src/model/object_table.rs14
17 files changed, 1266 insertions, 264 deletions
diff --git a/default.nix b/default.nix
index e0336174..faf6f522 100644
--- a/default.nix
+++ b/default.nix
@@ -24,6 +24,11 @@ in let
rustChannel = pkgs.rustPlatform.rust;
overrides = pkgs.buildPackages.rustBuilder.overrides.all ++ [
+ /*
+ We want to inject the git version while keeping the build deterministic.
+ As we do not want to consider the .git folder as part of the input source,
+ we ask the user (the CI often) to pass the value to Nix.
+ */
(pkgs.rustBuilder.rustLib.makeOverride {
name = "garage";
overrideAttrs = drv: if git_version != null then {
@@ -33,6 +38,21 @@ in let
'';
} else {};
})
+
+ /*
+ On a sandbox pure NixOS environment, /usr/bin/file is not available.
+ This is a known problem: https://github.com/NixOS/nixpkgs/issues/98440
+ We simply patch the file as suggested
+ */
+ /*(pkgs.rustBuilder.rustLib.makeOverride {
+ name = "libsodium-sys";
+ overrideAttrs = drv: {
+ preConfigure = ''
+ ${drv.preConfigure or ""}
+ sed -i 's,/usr/bin/file,${file}/bin/file,g' ./configure
+ '';
+ }
+ })*/
];
packageFun = import ./Cargo.nix;
diff --git a/doc/book/src/reference_manual/s3_compatibility.md b/doc/book/src/reference_manual/s3_compatibility.md
index 5f44de78..7d517ac4 100644
--- a/doc/book/src/reference_manual/s3_compatibility.md
+++ b/doc/book/src/reference_manual/s3_compatibility.md
@@ -43,6 +43,8 @@ All APIs that are not mentionned are not implemented and will return a 400 bad r
| ListBuckets | Implemented |
| ListObjects | Implemented, bugs? (see below) |
| ListObjectsV2 | Implemented |
+| ListMultipartUpload | Implemented |
+| ListParts | Missing |
| PutObject | Implemented |
| PutBucketWebsite | Partially implemented (see below)|
| UploadPart | Implemented |
diff --git a/script/dev-bucket.sh b/script/dev-bucket.sh
index 438d934d..0ce998a1 100755
--- a/script/dev-bucket.sh
+++ b/script/dev-bucket.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -ex
diff --git a/script/dev-clean.sh b/script/dev-clean.sh
index 60065a3d..eea57864 100755
--- a/script/dev-clean.sh
+++ b/script/dev-clean.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -ex
diff --git a/script/dev-cluster.sh b/script/dev-cluster.sh
index d76c98a4..c631d6c0 100755
--- a/script/dev-cluster.sh
+++ b/script/dev-cluster.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -e
diff --git a/script/dev-configure.sh b/script/dev-configure.sh
index f4bb338f..f0a7843d 100755
--- a/script/dev-configure.sh
+++ b/script/dev-configure.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -ex
diff --git a/script/dev-env-aws.sh b/script/dev-env-aws.sh
index 864fd4c2..9436c2c7 100644
--- a/script/dev-env-aws.sh
+++ b/script/dev-env-aws.sh
@@ -1,5 +1,3 @@
-#!/bin/bash
-
export AWS_ACCESS_KEY_ID=`cat /tmp/garage.s3 |cut -d' ' -f1`
export AWS_SECRET_ACCESS_KEY=`cat /tmp/garage.s3 |cut -d' ' -f2`
export AWS_DEFAULT_REGION='garage'
diff --git a/script/test-smoke.sh b/script/test-smoke.sh
index 1f900ece..2505ae38 100755
--- a/script/test-smoke.sh
+++ b/script/test-smoke.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
set -ex
@@ -10,6 +10,7 @@ GARAGE_DEBUG="${REPO_FOLDER}/target/debug/"
GARAGE_RELEASE="${REPO_FOLDER}/target/release/"
NIX_RELEASE="${REPO_FOLDER}/result/bin/"
PATH="${GARAGE_DEBUG}:${GARAGE_RELEASE}:${NIX_RELEASE}:$PATH"
+CMDOUT=/tmp/garage.cmd.tmp
# @FIXME Duck is not ready for testing, we have a bug
SKIP_DUCK=1
@@ -131,6 +132,142 @@ if [ -z "$SKIP_AWS" ]; then
fi
if [ -z "$SKIP_AWS" ]; then
+ echo "🔌 Test S3API"
+
+ echo "Test Objects"
+ aws s3api put-object --bucket eprouvette --key a
+ aws s3api put-object --bucket eprouvette --key a/a
+ aws s3api put-object --bucket eprouvette --key a/b
+ aws s3api put-object --bucket eprouvette --key a/c
+ aws s3api put-object --bucket eprouvette --key a/d/a
+ aws s3api put-object --bucket eprouvette --key a/é
+ aws s3api put-object --bucket eprouvette --key b
+ aws s3api put-object --bucket eprouvette --key c
+
+
+ aws s3api list-objects-v2 --bucket eprouvette >$CMDOUT
+ [ $(jq '.Contents | length' $CMDOUT) == 8 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
+ aws s3api list-objects-v2 --bucket eprouvette --page-size 0 >$CMDOUT
+ [ $(jq '.Contents | length' $CMDOUT) == 8 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
+ aws s3api list-objects-v2 --bucket eprouvette --page-size 999999999999999 >$CMDOUT
+ [ $(jq '.Contents | length' $CMDOUT) == 8 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
+ aws s3api list-objects-v2 --bucket eprouvette --page-size 1 >$CMDOUT
+ [ $(jq '.Contents | length' $CMDOUT) == 8 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
+ aws s3api list-objects-v2 --bucket eprouvette --delimiter '/' >$CMDOUT
+ [ $(jq '.Contents | length' $CMDOUT) == 3 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
+ aws s3api list-objects-v2 --bucket eprouvette --delimiter '/' --page-size 1 >$CMDOUT
+ [ $(jq '.Contents | length' $CMDOUT) == 3 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
+ aws s3api list-objects-v2 --bucket eprouvette --prefix 'a/' >$CMDOUT
+ [ $(jq '.Contents | length' $CMDOUT) == 5 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
+ aws s3api list-objects-v2 --bucket eprouvette --prefix 'a/' --delimiter '/' >$CMDOUT
+ [ $(jq '.Contents | length' $CMDOUT) == 4 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
+ aws s3api list-objects-v2 --bucket eprouvette --prefix 'a/' --page-size 1 >$CMDOUT
+ [ $(jq '.Contents | length' $CMDOUT) == 5 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
+ aws s3api list-objects-v2 --bucket eprouvette --prefix 'a/' --delimiter '/' --page-size 1 >$CMDOUT
+ [ $(jq '.Contents | length' $CMDOUT) == 4 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
+ aws s3api list-objects-v2 --bucket eprouvette --start-after 'Z' >$CMDOUT
+ [ $(jq '.Contents | length' $CMDOUT) == 8 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
+ aws s3api list-objects-v2 --bucket eprouvette --start-after 'c' >$CMDOUT
+ ! [ -s $CMDOUT ]
+
+
+ aws s3api list-objects --bucket eprouvette >$CMDOUT
+ [ $(jq '.Contents | length' $CMDOUT) == 8 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
+ aws s3api list-objects --bucket eprouvette --page-size 1 >$CMDOUT
+ [ $(jq '.Contents | length' $CMDOUT) == 8 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
+ aws s3api list-objects --bucket eprouvette --delimiter '/' >$CMDOUT
+ [ $(jq '.Contents | length' $CMDOUT) == 3 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
+ # @FIXME it does not work as expected but might be a limitation of aws s3api
+ # The problem is the conjunction of a delimiter + pagination + v1 of listobjects
+ #aws s3api list-objects --bucket eprouvette --delimiter '/' --page-size 1 >$CMDOUT
+ #[ $(jq '.Contents | length' $CMDOUT) == 3 ]
+ #[ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
+ aws s3api list-objects --bucket eprouvette --prefix 'a/' >$CMDOUT
+ [ $(jq '.Contents | length' $CMDOUT) == 5 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
+ aws s3api list-objects --bucket eprouvette --prefix 'a/' --delimiter '/' >$CMDOUT
+ [ $(jq '.Contents | length' $CMDOUT) == 4 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
+ aws s3api list-objects --bucket eprouvette --prefix 'a/' --page-size 1 >$CMDOUT
+ [ $(jq '.Contents | length' $CMDOUT) == 5 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
+ # @FIXME idem
+ #aws s3api list-objects --bucket eprouvette --prefix 'a/' --delimiter '/' --page-size 1 >$CMDOUT
+ #[ $(jq '.Contents | length' $CMDOUT) == 4 ]
+ #[ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
+ aws s3api list-objects --bucket eprouvette --starting-token 'Z' >$CMDOUT
+ [ $(jq '.Contents | length' $CMDOUT) == 8 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
+ aws s3api list-objects --bucket eprouvette --starting-token 'c' >$CMDOUT
+ ! [ -s $CMDOUT ]
+
+ aws s3api list-objects-v2 --bucket eprouvette | \
+ jq -c '. | {Objects: [.Contents[] | {Key: .Key}], Quiet: true}' | \
+ aws s3api delete-objects --bucket eprouvette --delete file:///dev/stdin
+
+
+ echo "Test Multipart Upload"
+ aws s3api create-multipart-upload --bucket eprouvette --key a
+ aws s3api create-multipart-upload --bucket eprouvette --key a
+ aws s3api create-multipart-upload --bucket eprouvette --key c
+ aws s3api create-multipart-upload --bucket eprouvette --key c/a
+ aws s3api create-multipart-upload --bucket eprouvette --key c/b
+
+ aws s3api list-multipart-uploads --bucket eprouvette >$CMDOUT
+ [ $(jq '.Uploads | length' $CMDOUT) == 5 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
+ aws s3api list-multipart-uploads --bucket eprouvette --page-size 1 >$CMDOUT
+ [ $(jq '.Uploads | length' $CMDOUT) == 5 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
+ aws s3api list-multipart-uploads --bucket eprouvette --delimiter '/' >$CMDOUT
+ [ $(jq '.Uploads | length' $CMDOUT) == 3 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
+ aws s3api list-multipart-uploads --bucket eprouvette --delimiter '/' --page-size 1 >$CMDOUT
+ [ $(jq '.Uploads | length' $CMDOUT) == 3 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
+ aws s3api list-multipart-uploads --bucket eprouvette --prefix 'c' >$CMDOUT
+ [ $(jq '.Uploads | length' $CMDOUT) == 3 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
+ aws s3api list-multipart-uploads --bucket eprouvette --prefix 'c' --page-size 1 >$CMDOUT
+ [ $(jq '.Uploads | length' $CMDOUT) == 3 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
+ aws s3api list-multipart-uploads --bucket eprouvette --prefix 'c' --delimiter '/' >$CMDOUT
+ [ $(jq '.Uploads | length' $CMDOUT) == 1 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
+ aws s3api list-multipart-uploads --bucket eprouvette --prefix 'c' --delimiter '/' --page-size 1 >$CMDOUT
+ [ $(jq '.Uploads | length' $CMDOUT) == 1 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
+ aws s3api list-multipart-uploads --bucket eprouvette --starting-token 'ZZZZZ' >$CMDOUT
+ [ $(jq '.Uploads | length' $CMDOUT) == 5 ]
+ [ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
+ aws s3api list-multipart-uploads --bucket eprouvette --starting-token 'd' >$CMDOUT
+ ! [ -s $CMDOUT ]
+
+ aws s3api list-multipart-uploads --bucket eprouvette | \
+ jq -r '.Uploads[] | "\(.Key) \(.UploadId)"' | \
+ while read r; do
+ key=$(echo $r|cut -d' ' -f 1);
+ uid=$(echo $r|cut -d' ' -f 2);
+ aws s3api abort-multipart-upload --bucket eprouvette --key $key --upload-id $uid;
+ echo "Deleted ${key}:${uid}"
+ done
+fi
+
+if [ -z "$SKIP_AWS" ]; then
echo "🪣 Test bucket logic "
AWS_ACCESS_KEY_ID=`cat /tmp/garage.s3 |cut -d' ' -f1`
[ $(aws s3 ls | wc -l) == 1 ]
@@ -151,5 +288,6 @@ AWS_SECRET_ACCESS_KEY=`cat /tmp/garage.s3 |cut -d' ' -f2`
garage -c /tmp/config.1.toml bucket deny --read --write eprouvette --key $AWS_ACCESS_KEY_ID
garage -c /tmp/config.1.toml bucket delete --yes eprouvette
garage -c /tmp/config.1.toml key delete --yes $AWS_ACCESS_KEY_ID
+exec 3>&-
echo "✅ Success"
diff --git a/shell.nix b/shell.nix
index a4062f79..1d70730e 100644
--- a/shell.nix
+++ b/shell.nix
@@ -83,6 +83,7 @@ function refresh_toolchain {
pkgs.which
pkgs.openssl
pkgs.curl
+ pkgs.jq
] else [])
++
(if release then [
diff --git a/src/api/api_server.rs b/src/api/api_server.rs
index 41aa0046..16156e74 100644
--- a/src/api/api_server.rs
+++ b/src/api/api_server.rs
@@ -1,3 +1,4 @@
+use std::cmp::{max, min};
use std::net::SocketAddr;
use std::sync::Arc;
@@ -217,16 +218,18 @@ async fn handler_inner(garage: Arc<Garage>, req: Request<Body>) -> Result<Respon
handle_list(
garage,
&ListObjectsQuery {
+ common: ListQueryCommon {
+ bucket_name: bucket,
+ bucket_id,
+ delimiter: delimiter.map(|d| d.to_string()),
+ page_size: max_keys.map(|p| min(1000, max(1, p))).unwrap_or(1000),
+ prefix: prefix.unwrap_or_default(),
+ urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false),
+ },
is_v2: false,
- bucket_name: bucket,
- bucket_id,
- delimiter: delimiter.map(|d| d.to_string()),
- max_keys: max_keys.unwrap_or(1000),
- prefix: prefix.unwrap_or_default(),
marker,
continuation_token: None,
start_after: None,
- urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false),
},
)
.await
@@ -246,16 +249,18 @@ async fn handler_inner(garage: Arc<Garage>, req: Request<Body>) -> Result<Respon
handle_list(
garage,
&ListObjectsQuery {
+ common: ListQueryCommon {
+ bucket_name: bucket,
+ bucket_id,
+ delimiter: delimiter.map(|d| d.to_string()),
+ page_size: max_keys.map(|p| min(1000, max(1, p))).unwrap_or(1000),
+ urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false),
+ prefix: prefix.unwrap_or_default(),
+ },
is_v2: true,
- bucket_name: bucket,
- bucket_id,
- delimiter: delimiter.map(|d| d.to_string()),
- max_keys: max_keys.unwrap_or(1000),
- prefix: prefix.unwrap_or_default(),
marker: None,
continuation_token,
start_after,
- urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false),
},
)
.await
@@ -266,6 +271,32 @@ async fn handler_inner(garage: Arc<Garage>, req: Request<Body>) -> Result<Respon
)))
}
}
+ Endpoint::ListMultipartUploads {
+ bucket,
+ delimiter,
+ encoding_type,
+ key_marker,
+ max_uploads,
+ prefix,
+ upload_id_marker,
+ } => {
+ handle_list_multipart_upload(
+ garage,
+ &ListMultipartUploadsQuery {
+ common: ListQueryCommon {
+ bucket_name: bucket,
+ bucket_id,
+ delimiter: delimiter.map(|d| d.to_string()),
+ page_size: max_uploads.map(|p| min(1000, max(1, p))).unwrap_or(1000),
+ prefix: prefix.unwrap_or_default(),
+ urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false),
+ },
+ key_marker,
+ upload_id_marker,
+ },
+ )
+ .await
+ }
Endpoint::DeleteObjects { .. } => {
handle_delete_objects(garage, bucket_id, req, content_sha256).await
}
diff --git a/src/api/s3_bucket.rs b/src/api/s3_bucket.rs
index 425d2998..494224c8 100644
--- a/src/api/s3_bucket.rs
+++ b/src/api/s3_bucket.rs
@@ -7,6 +7,7 @@ use garage_model::bucket_alias_table::*;
use garage_model::bucket_table::Bucket;
use garage_model::garage::Garage;
use garage_model::key_table::Key;
+use garage_model::object_table::ObjectFilter;
use garage_model::permission::BucketKeyPerm;
use garage_table::util::*;
use garage_util::crdt::*;
@@ -226,7 +227,7 @@ pub async fn handle_delete_bucket(
// Check bucket is empty
let objects = garage
.object_table
- .get_range(&bucket_id, None, Some(DeletedFilter::NotDeleted), 10)
+ .get_range(&bucket_id, None, Some(ObjectFilter::IsData), 10)
.await?;
if !objects.is_empty() {
return Err(Error::BucketNotEmpty);
diff --git a/src/api/s3_list.rs b/src/api/s3_list.rs
index 07efb02d..ddc03375 100644
--- a/src/api/s3_list.rs
+++ b/src/api/s3_list.rs
@@ -1,4 +1,5 @@
use std::collections::{BTreeMap, BTreeSet};
+use std::iter::{Iterator, Peekable};
use std::sync::Arc;
use hyper::{Body, Response};
@@ -10,308 +11,721 @@ use garage_util::time::*;
use garage_model::garage::Garage;
use garage_model::object_table::*;
-use garage_table::DeletedFilter;
-
use crate::encoding::*;
use crate::error::*;
+use crate::s3_put;
use crate::s3_xml;
#[derive(Debug)]
-pub struct ListObjectsQuery {
- pub is_v2: bool,
+pub struct ListQueryCommon {
pub bucket_name: String,
pub bucket_id: Uuid,
pub delimiter: Option<String>,
- pub max_keys: usize,
+ pub page_size: usize,
pub prefix: String,
+ pub urlencode_resp: bool,
+}
+
+#[derive(Debug)]
+pub struct ListObjectsQuery {
+ pub is_v2: bool,
pub marker: Option<String>,
pub continuation_token: Option<String>,
pub start_after: Option<String>,
- pub urlencode_resp: bool,
+ pub common: ListQueryCommon,
}
#[derive(Debug)]
-struct ListResultInfo {
- last_modified: u64,
- size: u64,
- etag: String,
+pub struct ListMultipartUploadsQuery {
+ pub key_marker: Option<String>,
+ pub upload_id_marker: Option<String>,
+ pub common: ListQueryCommon,
}
pub async fn handle_list(
garage: Arc<Garage>,
query: &ListObjectsQuery,
) -> Result<Response<Body>, Error> {
- let mut result_keys = BTreeMap::<String, ListResultInfo>::new();
- let mut result_common_prefixes = BTreeSet::<String>::new();
-
- // Determine the key from where we want to start fetch objects
- // from the database, and whether the object at this key must
- // be included or excluded from the response.
- // This key can be the prefix in the base case, or intermediate
- // points in the dataset if we are continuing a previous listing.
- #[allow(clippy::collapsible_else_if)]
- let (mut next_chunk_start, mut next_chunk_exclude_start) = if query.is_v2 {
- if let Some(ct) = &query.continuation_token {
- // In V2 mode, the continuation token is defined as an opaque
- // string in the spec, so we can do whatever we want with it.
- // In our case, it is defined as either [ or ] (for include
- // and exclude, respectively), followed by a base64 string
- // representing the key to start with.
- let exclude = match &ct[..1] {
- "[" => false,
- "]" => true,
- _ => return Err(Error::BadRequest("Invalid continuation token".to_string())),
- };
- (
- String::from_utf8(base64::decode(ct[1..].as_bytes())?)?,
- exclude,
- )
- } else if let Some(sa) = &query.start_after {
- // StartAfter has defined semantics in the spec:
- // start listing at the first key immediately after.
- (sa.clone(), true)
- } else {
- // In the case where neither is specified, we start
- // listing at the specified prefix. If an object has this
- // exact same key, we include it. (TODO is this correct?)
- (query.prefix.clone(), false)
+ let io = |bucket, key, count| {
+ let t = &garage.object_table;
+ async move {
+ t.get_range(&bucket, key, Some(ObjectFilter::IsData), count)
+ .await
}
- } else {
- if let Some(mk) = &query.marker {
- // In V1 mode, the spec defines the Marker value to mean
- // the same thing as the StartAfter value in V2 mode.
- (mk.clone(), true)
- } else {
- // Base case, same as in V2 mode
- (query.prefix.clone(), false)
+ };
+
+ let mut acc = query.build_accumulator();
+ let pagination = fetch_list_entries(&query.common, query.begin()?, &mut acc, &io).await?;
+
+ let result = s3_xml::ListBucketResult {
+ xmlns: (),
+ // Sending back request information
+ name: s3_xml::Value(query.common.bucket_name.to_string()),
+ prefix: uriencode_maybe(&query.common.prefix, query.common.urlencode_resp),
+ max_keys: s3_xml::IntValue(query.common.page_size as i64),
+ delimiter: query
+ .common
+ .delimiter
+ .as_ref()
+ .map(|x| uriencode_maybe(x, query.common.urlencode_resp)),
+ encoding_type: match query.common.urlencode_resp {
+ true => Some(s3_xml::Value("url".to_string())),
+ false => None,
+ },
+ marker: match (!query.is_v2, &query.marker) {
+ (true, Some(k)) => Some(uriencode_maybe(k, query.common.urlencode_resp)),
+ _ => None,
+ },
+ start_after: match (query.is_v2, &query.start_after) {
+ (true, Some(sa)) => Some(uriencode_maybe(sa, query.common.urlencode_resp)),
+ _ => None,
+ },
+ continuation_token: match (query.is_v2, &query.continuation_token) {
+ (true, Some(ct)) => Some(s3_xml::Value(ct.to_string())),
+ _ => None,
+ },
+
+ // Pagination
+ is_truncated: s3_xml::Value(format!("{}", pagination.is_some())),
+ key_count: Some(s3_xml::IntValue(
+ acc.keys.len() as i64 + acc.common_prefixes.len() as i64,
+ )),
+ next_marker: match (!query.is_v2, &pagination) {
+ (true, Some(RangeBegin::AfterKey { key: k }))
+ | (
+ true,
+ Some(RangeBegin::IncludingKey {
+ fallback_key: Some(k),
+ ..
+ }),
+ ) => Some(uriencode_maybe(k, query.common.urlencode_resp)),
+ _ => None,
+ },
+ next_continuation_token: match (query.is_v2, &pagination) {
+ (true, Some(RangeBegin::AfterKey { key })) => Some(s3_xml::Value(format!(
+ "]{}",
+ base64::encode(key.as_bytes())
+ ))),
+ (true, Some(RangeBegin::IncludingKey { key, .. })) => Some(s3_xml::Value(format!(
+ "[{}",
+ base64::encode(key.as_bytes())
+ ))),
+ _ => None,
+ },
+
+ // Body
+ contents: acc
+ .keys
+ .iter()
+ .map(|(key, info)| s3_xml::ListBucketItem {
+ key: uriencode_maybe(key, query.common.urlencode_resp),
+ last_modified: s3_xml::Value(msec_to_rfc3339(info.last_modified)),
+ size: s3_xml::IntValue(info.size as i64),
+ etag: s3_xml::Value(info.etag.to_string()),
+ storage_class: s3_xml::Value("STANDARD".to_string()),
+ })
+ .collect(),
+ common_prefixes: acc
+ .common_prefixes
+ .iter()
+ .map(|pfx| s3_xml::CommonPrefix {
+ prefix: uriencode_maybe(pfx, query.common.urlencode_resp),
+ })
+ .collect(),
+ };
+
+ let xml = s3_xml::to_xml_with_header(&result)?;
+ Ok(Response::builder()
+ .header("Content-Type", "application/xml")
+ .body(Body::from(xml.into_bytes()))?)
+}
+
+pub async fn handle_list_multipart_upload(
+ garage: Arc<Garage>,
+ query: &ListMultipartUploadsQuery,
+) -> Result<Response<Body>, Error> {
+ let io = |bucket, key, count| {
+ let t = &garage.object_table;
+ async move {
+ t.get_range(&bucket, key, Some(ObjectFilter::IsUploading), count)
+ .await
}
};
- debug!(
- "List request: `{:?}` {} `{}`, start from {}, exclude first {}",
- query.delimiter, query.max_keys, query.prefix, next_chunk_start, next_chunk_exclude_start
- );
+ let mut acc = query.build_accumulator();
+ let pagination = fetch_list_entries(&query.common, query.begin()?, &mut acc, &io).await?;
+
+ let result = s3_xml::ListMultipartUploadsResult {
+ xmlns: (),
+
+ // Sending back some information about the request
+ bucket: s3_xml::Value(query.common.bucket_name.to_string()),
+ prefix: uriencode_maybe(&query.common.prefix, query.common.urlencode_resp),
+ delimiter: query
+ .common
+ .delimiter
+ .as_ref()
+ .map(|d| uriencode_maybe(d, query.common.urlencode_resp)),
+ max_uploads: s3_xml::IntValue(query.common.page_size as i64),
+ key_marker: query
+ .key_marker
+ .as_ref()
+ .map(|m| uriencode_maybe(m, query.common.urlencode_resp)),
+ upload_id_marker: query
+ .upload_id_marker
+ .as_ref()
+ .map(|m| s3_xml::Value(m.to_string())),
+ encoding_type: match query.common.urlencode_resp {
+ true => Some(s3_xml::Value("url".to_string())),
+ false => None,
+ },
+
+ // Handling pagination
+ is_truncated: s3_xml::Value(format!("{}", pagination.is_some())),
+ next_key_marker: match &pagination {
+ None => None,
+ Some(RangeBegin::AfterKey { key })
+ | Some(RangeBegin::AfterUpload { key, .. })
+ | Some(RangeBegin::IncludingKey { key, .. }) => {
+ Some(uriencode_maybe(key, query.common.urlencode_resp))
+ }
+ },
+ next_upload_id_marker: match pagination {
+ Some(RangeBegin::AfterUpload { upload, .. }) => {
+ Some(s3_xml::Value(hex::encode(upload)))
+ }
+ Some(RangeBegin::IncludingKey { .. }) => Some(s3_xml::Value("include".to_string())),
+ _ => None,
+ },
+
+ // Result body
+ upload: acc
+ .keys
+ .iter()
+ .map(|(uuid, info)| s3_xml::ListMultipartItem {
+ initiated: s3_xml::Value(msec_to_rfc3339(info.timestamp)),
+ key: uriencode_maybe(&info.key, query.common.urlencode_resp),
+ upload_id: s3_xml::Value(hex::encode(uuid)),
+ storage_class: s3_xml::Value("STANDARD".to_string()),
+ initiator: s3_xml::Initiator {
+ display_name: s3_xml::Value("Dummy Key".to_string()),
+ id: s3_xml::Value("GKDummyKey".to_string()),
+ },
+ owner: s3_xml::Owner {
+ display_name: s3_xml::Value("Dummy Key".to_string()),
+ id: s3_xml::Value("GKDummyKey".to_string()),
+ },
+ })
+ .collect(),
+ common_prefixes: acc
+ .common_prefixes
+ .iter()
+ .map(|c| s3_xml::CommonPrefix {
+ prefix: s3_xml::Value(c.to_string()),
+ })
+ .collect(),
+ };
+
+ let xml = s3_xml::to_xml_with_header(&result)?;
- // `truncated` is a boolean that determines whether there are
- // more items to be added.
- let truncated;
- // `last_processed_item` is the key of the last item
- // that was included in the listing before truncating.
- let mut last_processed_item = None;
+ Ok(Response::builder()
+ .header("Content-Type", "application/xml")
+ .body(Body::from(xml.into_bytes()))?)
+}
+
+/*
+ * Private enums and structs
+ */
+
+#[derive(Debug)]
+struct ObjectInfo {
+ last_modified: u64,
+ size: u64,
+ etag: String,
+}
+
+#[derive(Debug, PartialEq)]
+struct UploadInfo {
+ key: String,
+ timestamp: u64,
+}
+
+enum ExtractionResult {
+ NoMore,
+ Filled,
+ FilledAtUpload {
+ key: String,
+ upload: Uuid,
+ },
+ Extracted {
+ key: String,
+ },
+ // Fallback key is used for legacy APIs that only support
+ // exlusive pagination (and not inclusive one).
+ SkipTo {
+ key: String,
+ fallback_key: Option<String>,
+ },
+}
+
+#[derive(PartialEq, Clone, Debug)]
+enum RangeBegin {
+ // Fallback key is used for legacy APIs that only support
+ // exlusive pagination (and not inclusive one).
+ IncludingKey {
+ key: String,
+ fallback_key: Option<String>,
+ },
+ AfterKey {
+ key: String,
+ },
+ AfterUpload {
+ key: String,
+ upload: Uuid,
+ },
+}
+type Pagination = Option<RangeBegin>;
+
+/*
+ * Fetch list entries
+ */
+
+async fn fetch_list_entries<R, F>(
+ query: &ListQueryCommon,
+ begin: RangeBegin,
+ acc: &mut impl ExtractAccumulator,
+ mut io: F,
+) -> Result<Pagination, Error>
+where
+ R: futures::Future<Output = Result<Vec<Object>, GarageError>>,
+ F: FnMut(Uuid, Option<String>, usize) -> R,
+{
+ let mut cursor = begin;
+ // +1 is needed as we may need to skip the 1st key
+ // (range is inclusive while most S3 requests are exclusive)
+ let count = query.page_size + 1;
+
+ loop {
+ let start_key = match cursor {
+ RangeBegin::AfterKey { ref key }
+ | RangeBegin::AfterUpload { ref key, .. }
+ | RangeBegin::IncludingKey { ref key, .. } => Some(key.clone()),
+ };
- 'query_loop: loop {
// Fetch objects
- let objects = garage
- .object_table
- .get_range(
- &query.bucket_id,
- Some(next_chunk_start.clone()),
- Some(DeletedFilter::NotDeleted),
- query.max_keys + 1,
- )
- .await?;
+ let objects = io(query.bucket_id, start_key.clone(), count).await?;
+
debug!(
- "List: get range {} (max {}), results: {}",
- next_chunk_start,
- query.max_keys + 1,
+ "List: get range {:?} (max {}), results: {}",
+ start_key,
+ count,
objects.len()
);
- let current_chunk_start = next_chunk_start.clone();
-
- // Iterate on returned objects and add them to the response.
- // If a delimiter is specified, we take care of grouping objects
- // into CommonPrefixes.
- for object in objects.iter() {
- // If we have retrieved an object that doesn't start with
- // the prefix, we know we have finished listing our stuff.
- if !object.key.starts_with(&query.prefix) {
- truncated = false;
- break 'query_loop;
- }
+ let server_more = objects.len() >= count;
- // Exclude the starting key if we have to.
- if object.key == next_chunk_start && next_chunk_exclude_start {
- continue;
- }
+ let prev_req_cursor = cursor.clone();
+ let mut iter = objects.iter().peekable();
- // Find if this object has a currently valid (non-deleted,
- // non-still-uploading) version. If not, skip it.
- let version = match object.versions().iter().find(|x| x.is_data()) {
- Some(v) => v,
- None => continue,
- };
+ // Drop the first key if needed
+ // Only AfterKey requires it according to the S3 spec and our implem.
+ match (&cursor, iter.peek()) {
+ (RangeBegin::AfterKey { key }, Some(object)) if &object.key == key => iter.next(),
+ (_, _) => None,
+ };
- // If we don't have space to add this object to our response,
- // we will need to stop here and mark the key of this object
- // as the marker from where
- // we want to start again in the next list call.
- let cannot_add = result_keys.len() + result_common_prefixes.len() >= query.max_keys;
-
- // Determine whether this object should be grouped inside
- // a CommonPrefix because it contains the delimiter,
- // or if it should be returned as an object.
- let common_prefix = match &query.delimiter {
- Some(delimiter) => object.key[query.prefix.len()..]
- .find(delimiter)
- .map(|i| &object.key[..query.prefix.len() + i + delimiter.len()]),
- None => None,
- };
- if let Some(pfx) = common_prefix {
- // In the case where this object must be grouped in a
- // common prefix, handle it here.
- if !result_common_prefixes.contains(pfx) {
- // Determine the first listing key that starts after
- // the common prefix, by finding the next possible
- // string by alphabetical order.
- let mut first_key_after_prefix = pfx.to_string();
- let tail = first_key_after_prefix.pop().unwrap();
- first_key_after_prefix.push(((tail as u8) + 1) as char);
-
- // If this were the end of the chunk,
- // the next chunk should start after this prefix
- next_chunk_start = first_key_after_prefix;
- next_chunk_exclude_start = false;
-
- if cannot_add {
- truncated = true;
- break 'query_loop;
- }
- result_common_prefixes.insert(pfx.to_string());
+ while let Some(object) = iter.peek() {
+ if !object.key.starts_with(&query.prefix) {
+ // If the key is not in the requested prefix, we're done
+ return Ok(None);
+ }
+
+ cursor = match acc.extract(query, &cursor, &mut iter) {
+ ExtractionResult::Extracted { key } => RangeBegin::AfterKey { key },
+ ExtractionResult::SkipTo { key, fallback_key } => {
+ RangeBegin::IncludingKey { key, fallback_key }
}
- last_processed_item = Some(object.key.clone());
- continue;
+ ExtractionResult::FilledAtUpload { key, upload } => {
+ return Ok(Some(RangeBegin::AfterUpload { key, upload }))
+ }
+ ExtractionResult::Filled => return Ok(Some(cursor)),
+ ExtractionResult::NoMore => return Ok(None),
};
+ }
- // This is not a common prefix, we want to add it to our
- // response directly.
- next_chunk_start = object.key.clone();
+ if !server_more {
+ // We did not fully fill the accumulator despite exhausting all the data we have,
+ // we're done
+ return Ok(None);
+ }
- if cannot_add {
- truncated = true;
- next_chunk_exclude_start = false;
- break 'query_loop;
- }
+ if prev_req_cursor == cursor {
+ unreachable!("No progress has been done in the loop. This is a bug, please report it.");
+ }
+ }
+}
- let meta = match &version.state {
- ObjectVersionState::Complete(ObjectVersionData::Inline(meta, _)) => meta,
- ObjectVersionState::Complete(ObjectVersionData::FirstBlock(meta, _)) => meta,
- _ => unreachable!(),
- };
- let info = match result_keys.get(&object.key) {
- None => ListResultInfo {
- last_modified: version.timestamp,
- size: meta.size,
- etag: meta.etag.to_string(),
+/*
+ * ListQuery logic
+ */
+
+/// Determine the key from where we want to start fetch objects from the database
+///
+/// We choose whether the object at this key must
+/// be included or excluded from the response.
+/// This key can be the prefix in the base case, or intermediate
+/// points in the dataset if we are continuing a previous listing.
+impl ListObjectsQuery {
+ fn build_accumulator(&self) -> Accumulator<String, ObjectInfo> {
+ Accumulator::<String, ObjectInfo>::new(self.common.page_size)
+ }
+
+ fn begin(&self) -> Result<RangeBegin, Error> {
+ if self.is_v2 {
+ match (&self.continuation_token, &self.start_after) {
+ // In V2 mode, the continuation token is defined as an opaque
+ // string in the spec, so we can do whatever we want with it.
+ // In our case, it is defined as either [ or ] (for include
+ // representing the key to start with.
+ (Some(token), _) => match &token[..1] {
+ "[" => Ok(RangeBegin::IncludingKey {
+ key: String::from_utf8(base64::decode(token[1..].as_bytes())?)?,
+ fallback_key: None,
+ }),
+ "]" => Ok(RangeBegin::AfterKey {
+ key: String::from_utf8(base64::decode(token[1..].as_bytes())?)?,
+ }),
+ _ => Err(Error::BadRequest("Invalid continuation token".to_string())),
},
- Some(_lri) => {
- return Err(Error::InternalError(GarageError::Message(format!(
- "Duplicate key?? {} (this is a bug, please report it)",
- object.key
- ))))
+
+ // StartAfter has defined semantics in the spec:
+ // start listing at the first key immediately after.
+ (_, Some(key)) => Ok(RangeBegin::AfterKey {
+ key: key.to_string(),
+ }),
+
+ // In the case where neither is specified, we start
+ // listing at the specified prefix. If an object has this
+ // exact same key, we include it. (@TODO is this correct?)
+ _ => Ok(RangeBegin::IncludingKey {
+ key: self.common.prefix.to_string(),
+ fallback_key: None,
+ }),
+ }
+ } else {
+ match &self.marker {
+ // In V1 mode, the spec defines the Marker value to mean
+ // the same thing as the StartAfter value in V2 mode.
+ Some(key) => Ok(RangeBegin::AfterKey {
+ key: key.to_string(),
+ }),
+ _ => Ok(RangeBegin::IncludingKey {
+ key: self.common.prefix.to_string(),
+ fallback_key: None,
+ }),
+ }
+ }
+ }
+}
+
+impl ListMultipartUploadsQuery {
+ fn build_accumulator(&self) -> Accumulator<Uuid, UploadInfo> {
+ Accumulator::<Uuid, UploadInfo>::new(self.common.page_size)
+ }
+
+ fn begin(&self) -> Result<RangeBegin, Error> {
+ match (&self.upload_id_marker, &self.key_marker) {
+ // If both the upload id marker and the key marker are sets,
+ // the spec specifies that we must start listing uploads INCLUDING the given key,
+ // AFTER the specified upload id (sorted in a lexicographic order).
+ // To enable some optimisations, we emulate "IncludingKey" by extending the upload id
+ // semantic. We base our reasoning on the hypothesis that S3's upload ids are opaques
+ // while Garage's ones are 32 bytes hex encoded which enables us to extend this query
+ // with a specific "include" upload id.
+ (Some(up_marker), Some(key_marker)) => match &up_marker[..] {
+ "include" => Ok(RangeBegin::IncludingKey {
+ key: key_marker.to_string(),
+ fallback_key: None,
+ }),
+ uuid => Ok(RangeBegin::AfterUpload {
+ key: key_marker.to_string(),
+ upload: s3_put::decode_upload_id(uuid)?,
+ }),
+ },
+
+ // If only the key marker is specified, the spec says that we must start listing
+ // uploads AFTER the specified key.
+ (None, Some(key_marker)) => Ok(RangeBegin::AfterKey {
+ key: key_marker.to_string(),
+ }),
+ _ => Ok(RangeBegin::IncludingKey {
+ key: self.common.prefix.to_string(),
+ fallback_key: None,
+ }),
+ }
+ }
+}
+
+/*
+ * Accumulator logic
+ */
+
+trait ExtractAccumulator {
+ fn extract<'a>(
+ &mut self,
+ query: &ListQueryCommon,
+ cursor: &RangeBegin,
+ iter: &mut Peekable<impl Iterator<Item = &'a Object>>,
+ ) -> ExtractionResult;
+}
+
+struct Accumulator<K, V> {
+ common_prefixes: BTreeSet<String>,
+ keys: BTreeMap<K, V>,
+ max_capacity: usize,
+}
+
+type ObjectAccumulator = Accumulator<String, ObjectInfo>;
+type UploadAccumulator = Accumulator<Uuid, UploadInfo>;
+
+impl<K: std::cmp::Ord, V> Accumulator<K, V> {
+ fn new(page_size: usize) -> Accumulator<K, V> {
+ Accumulator {
+ common_prefixes: BTreeSet::<String>::new(),
+ keys: BTreeMap::<K, V>::new(),
+ max_capacity: page_size,
+ }
+ }
+
+ /// Observe the Object iterator and try to extract a single common prefix
+ ///
+ /// This function can consume an arbitrary number of items as long as they share the same
+ /// common prefix.
+ fn extract_common_prefix<'a>(
+ &mut self,
+ objects: &mut Peekable<impl Iterator<Item = &'a Object>>,
+ query: &ListQueryCommon,
+ ) -> Option<ExtractionResult> {
+ // Get the next object from the iterator
+ let object = objects.peek().expect("This iterator can not be empty as it is checked earlier in the code. This is a logic bug, please report it.");
+
+ // Check if this is a common prefix (requires a passed delimiter and its value in the key)
+ let pfx = match common_prefix(object, query) {
+ Some(p) => p,
+ None => return None,
+ };
+
+ // Try to register this prefix
+ // If not possible, we can return early
+ if !self.try_insert_common_prefix(pfx.to_string()) {
+ return Some(ExtractionResult::Filled);
+ }
+
+ // We consume the whole common prefix from the iterator
+ let mut last_pfx_key = &object.key;
+ loop {
+ last_pfx_key = match objects.peek() {
+ Some(o) if o.key.starts_with(pfx) => &o.key,
+ Some(_) => {
+ return Some(ExtractionResult::Extracted {
+ key: last_pfx_key.to_owned(),
+ })
+ }
+ None => {
+ return match key_after_prefix(pfx) {
+ Some(next) => Some(ExtractionResult::SkipTo {
+ key: next,
+ fallback_key: Some(last_pfx_key.to_owned()),
+ }),
+ None => Some(ExtractionResult::NoMore),
+ }
}
};
- result_keys.insert(object.key.clone(), info);
- last_processed_item = Some(object.key.clone());
- next_chunk_exclude_start = true;
+
+ objects.next();
}
+ }
- // If our database returned less objects than what we were asking for,
- // it means that no more objects are in the bucket. So we stop here.
- if objects.len() < query.max_keys + 1 {
- truncated = false;
- break 'query_loop;
+ fn is_full(&mut self) -> bool {
+ self.keys.len() + self.common_prefixes.len() >= self.max_capacity
+ }
+
+ fn try_insert_common_prefix(&mut self, key: String) -> bool {
+ // If we already have an entry, we can continue
+ if self.common_prefixes.contains(&key) {
+ return true;
}
- // Sanity check: we should have added at least an object
- // or a prefix to our returned result.
- if next_chunk_start == current_chunk_start || last_processed_item.is_none() {
- return Err(Error::InternalError(GarageError::Message(format!(
- "S3 ListObject: made no progress, still starting at {} (this is a bug, please report it)", next_chunk_start))));
+ // Otherwise, we need to check if we can add it
+ match self.is_full() {
+ true => false,
+ false => {
+ self.common_prefixes.insert(key);
+ true
+ }
}
+ }
+
+ fn try_insert_entry(&mut self, key: K, value: V) -> bool {
+ // It is impossible to add twice a key, this is an error
+ assert!(!self.keys.contains_key(&key));
- // Loop and fetch more objects
+ match self.is_full() {
+ true => false,
+ false => {
+ self.keys.insert(key, value);
+ true
+ }
+ }
}
+}
- let mut result = s3_xml::ListBucketResult {
- xmlns: (),
- name: s3_xml::Value(query.bucket_name.to_string()),
- prefix: uriencode_maybe(&query.prefix, query.urlencode_resp),
- marker: None,
- next_marker: None,
- start_after: None,
- continuation_token: None,
- next_continuation_token: None,
- max_keys: s3_xml::IntValue(query.max_keys as i64),
- delimiter: query
- .delimiter
- .as_ref()
- .map(|x| uriencode_maybe(x, query.urlencode_resp)),
- encoding_type: match query.urlencode_resp {
- true => Some(s3_xml::Value("url".to_string())),
- false => None,
- },
- key_count: Some(s3_xml::IntValue(
- result_keys.len() as i64 + result_common_prefixes.len() as i64,
- )),
- is_truncated: s3_xml::Value(format!("{}", truncated)),
- contents: vec![],
- common_prefixes: vec![],
- };
+impl ExtractAccumulator for ObjectAccumulator {
+ fn extract<'a>(
+ &mut self,
+ query: &ListQueryCommon,
+ _cursor: &RangeBegin,
+ objects: &mut Peekable<impl Iterator<Item = &'a Object>>,
+ ) -> ExtractionResult {
+ if let Some(e) = self.extract_common_prefix(objects, query) {
+ return e;
+ }
- if query.is_v2 {
- if let Some(ct) = &query.continuation_token {
- result.continuation_token = Some(s3_xml::Value(ct.to_string()));
+ let object = objects.next().expect("This iterator can not be empty as it is checked earlier in the code. This is a logic bug, please report it.");
+
+ let version = match object.versions().iter().find(|x| x.is_data()) {
+ Some(v) => v,
+ None => unreachable!(
+ "Expect to have objects having data due to earlier filtering. This is a logic bug."
+ ),
+ };
+
+ let meta = match &version.state {
+ ObjectVersionState::Complete(ObjectVersionData::Inline(meta, _)) => meta,
+ ObjectVersionState::Complete(ObjectVersionData::FirstBlock(meta, _)) => meta,
+ _ => unreachable!(),
+ };
+ let info = ObjectInfo {
+ last_modified: version.timestamp,
+ size: meta.size,
+ etag: meta.etag.to_string(),
+ };
+
+ match self.try_insert_entry(object.key.clone(), info) {
+ true => ExtractionResult::Extracted {
+ key: object.key.clone(),
+ },
+ false => ExtractionResult::Filled,
}
- if let Some(sa) = &query.start_after {
- result.start_after = Some(uriencode_maybe(sa, query.urlencode_resp));
+ }
+}
+
+impl ExtractAccumulator for UploadAccumulator {
+ /// Observe the iterator, process a single key, and try to extract one or more upload entries
+ ///
+ /// This function processes a single object from the iterator that can contain an arbitrary
+ /// number of versions, and thus "uploads".
+ fn extract<'a>(
+ &mut self,
+ query: &ListQueryCommon,
+ cursor: &RangeBegin,
+ objects: &mut Peekable<impl Iterator<Item = &'a Object>>,
+ ) -> ExtractionResult {
+ if let Some(e) = self.extract_common_prefix(objects, query) {
+ return e;
}
- if truncated {
- let b64 = base64::encode(next_chunk_start.as_bytes());
- let nct = if next_chunk_exclude_start {
- format!("]{}", b64)
- } else {
- format!("[{}", b64)
+
+ // Get the next object from the iterator
+ let object = objects.next().expect("This iterator can not be empty as it is checked earlier in the code. This is a logic bug, please report it.");
+
+ let mut uploads_for_key = object
+ .versions()
+ .iter()
+ .filter(|x| x.is_uploading())
+ .collect::<Vec<&ObjectVersion>>();
+
+ // S3 logic requires lexicographically sorted upload ids.
+ uploads_for_key.sort_unstable_by_key(|e| e.uuid);
+
+ // Skip results if an upload marker is provided
+ if let RangeBegin::AfterUpload { upload, .. } = cursor {
+ // Because our data are sorted, we can use a binary search to find the UUID
+ // or to find where it should have been added. Once this position is found,
+ // we use it to discard the first part of the array.
+ let idx = match uploads_for_key.binary_search_by(|e| e.uuid.cmp(upload)) {
+ // we start after the found uuid so we need to discard the pointed value.
+ // In the worst case, the UUID is the last element, which lead us to an empty array
+ // but we are never out of bound.
+ Ok(i) => i + 1,
+ // if the UUID is not found, the upload may have been discarded between the 2 request,
+ // this function returns where it could have been inserted,
+ // the pointed value is thus greater than our marker and we need to keep it.
+ Err(i) => i,
};
- result.next_continuation_token = Some(s3_xml::Value(nct));
+ uploads_for_key = uploads_for_key[idx..].to_vec();
}
- } else {
- // TODO: are these supposed to be urlencoded when encoding-type is URL??
- if let Some(mkr) = &query.marker {
- result.marker = Some(uriencode_maybe(mkr, query.urlencode_resp));
+
+ let mut iter = uploads_for_key.iter();
+
+ // The first entry is a specific case
+ // as it changes our result enum type
+ let first_upload = match iter.next() {
+ Some(u) => u,
+ None => {
+ return ExtractionResult::Extracted {
+ key: object.key.clone(),
+ }
+ }
+ };
+ let first_up_info = UploadInfo {
+ key: object.key.to_string(),
+ timestamp: first_upload.timestamp,
+ };
+ if !self.try_insert_entry(first_upload.uuid, first_up_info) {
+ return ExtractionResult::Filled;
}
- if truncated {
- if let Some(lpi) = last_processed_item {
- result.next_marker = Some(uriencode_maybe(&lpi, query.urlencode_resp));
- } else {
- return Err(Error::InternalError(GarageError::Message(
- "S3 ListObject: last_processed_item is None but the response was truncated, indicating that many items were processed (this is a bug, please report it)".to_string())));
+
+ // We can then collect the remaining uploads in a loop
+ let mut prev_uuid = first_upload.uuid;
+ for upload in iter {
+ let up_info = UploadInfo {
+ key: object.key.to_string(),
+ timestamp: upload.timestamp,
+ };
+
+ // Insert data in our accumulator
+ // If it is full, return information to paginate.
+ if !self.try_insert_entry(upload.uuid, up_info) {
+ return ExtractionResult::FilledAtUpload {
+ key: object.key.clone(),
+ upload: prev_uuid,
+ };
}
+ // Update our last added UUID
+ prev_uuid = upload.uuid;
}
- }
- for (key, info) in result_keys.iter() {
- result.contents.push(s3_xml::ListBucketItem {
- key: uriencode_maybe(key, query.urlencode_resp),
- last_modified: s3_xml::Value(msec_to_rfc3339(info.last_modified)),
- size: s3_xml::IntValue(info.size as i64),
- etag: s3_xml::Value(info.etag.to_string()),
- storage_class: s3_xml::Value("STANDARD".to_string()),
- });
+ // We successfully collected all the uploads
+ ExtractionResult::Extracted {
+ key: object.key.clone(),
+ }
}
+}
- for pfx in result_common_prefixes.iter() {
- result.common_prefixes.push(s3_xml::CommonPrefix {
- prefix: uriencode_maybe(pfx, query.urlencode_resp),
- });
+/*
+ * Utility functions
+ */
+
+/// Returns the common prefix of the object given the query prefix and delimiter
+fn common_prefix<'a>(object: &'a Object, query: &ListQueryCommon) -> Option<&'a str> {
+ match &query.delimiter {
+ Some(delimiter) => object.key[query.prefix.len()..]
+ .find(delimiter)
+ .map(|i| &object.key[..query.prefix.len() + i + delimiter.len()]),
+ None => None,
}
-
- let xml = s3_xml::to_xml_with_header(&result)?;
-
- Ok(Response::builder()
- .header("Content-Type", "application/xml")
- .body(Body::from(xml.into_bytes()))?)
}
+/// URIencode a value if needed
fn uriencode_maybe(s: &str, yes: bool) -> s3_xml::Value {
if yes {
s3_xml::Value(uri_encode(s, true))
@@ -319,3 +733,285 @@ fn uriencode_maybe(s: &str, yes: bool) -> s3_xml::Value {
s3_xml::Value(s.to_string())
}
}
+
+const UTF8_BEFORE_LAST_CHAR: char = '\u{10FFFE}';
+
+/// Compute the key after the prefix
+fn key_after_prefix(pfx: &str) -> Option<String> {
+ let mut next = pfx.to_string();
+ while !next.is_empty() {
+ let tail = next.pop().unwrap();
+ if tail >= char::MAX {
+ continue;
+ }
+
+ // Circumvent a limitation of RangeFrom that overflow earlier than needed
+ // See: https://doc.rust-lang.org/core/ops/struct.RangeFrom.html
+ let new_tail = if tail == UTF8_BEFORE_LAST_CHAR {
+ char::MAX
+ } else {
+ (tail..).nth(1).unwrap()
+ };
+
+ next.push(new_tail);
+ return Some(next);
+ }
+
+ None
+}
+
+/*
+ * Unit tests of this module
+ */
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use std::iter::FromIterator;
+
+ const TS: u64 = 1641394898314;
+
+ fn bucket() -> Uuid {
+ Uuid::from([0x42; 32])
+ }
+
+ fn query() -> ListMultipartUploadsQuery {
+ ListMultipartUploadsQuery {
+ common: ListQueryCommon {
+ prefix: "".to_string(),
+ delimiter: Some("/".to_string()),
+ page_size: 1000,
+ urlencode_resp: false,
+ bucket_name: "a".to_string(),
+ bucket_id: Uuid::from([0x00; 32]),
+ },
+ key_marker: None,
+ upload_id_marker: None,
+ }
+ }
+
+ fn objs() -> Vec<Object> {
+ vec![
+ Object::new(
+ bucket(),
+ "a/b/c".to_string(),
+ vec![objup_version([0x01; 32])],
+ ),
+ Object::new(bucket(), "d".to_string(), vec![objup_version([0x01; 32])]),
+ ]
+ }
+
+ fn objup_version(uuid: [u8; 32]) -> ObjectVersion {
+ ObjectVersion {
+ uuid: Uuid::from(uuid),
+ timestamp: TS,
+ state: ObjectVersionState::Uploading(ObjectVersionHeaders {
+ content_type: "text/plain".to_string(),
+ other: BTreeMap::<String, String>::new(),
+ }),
+ }
+ }
+
+ #[test]
+ fn test_key_after_prefix() {
+ assert_eq!(UTF8_BEFORE_LAST_CHAR as u32, (char::MAX as u32) - 1);
+ assert_eq!(key_after_prefix("a/b/").unwrap().as_str(), "a/b0");
+ assert_eq!(key_after_prefix("€").unwrap().as_str(), "₭");
+ assert_eq!(
+ key_after_prefix("􏿽").unwrap().as_str(),
+ String::from(char::from_u32(0x10FFFE).unwrap())
+ );
+
+ // When the last character is the biggest UTF8 char
+ let a = String::from_iter(['a', char::MAX].iter());
+ assert_eq!(key_after_prefix(a.as_str()).unwrap().as_str(), "b");
+
+ // When all characters are the biggest UTF8 char
+ let b = String::from_iter([char::MAX; 3].iter());
+ assert!(key_after_prefix(b.as_str()).is_none());
+
+ // Check utf8 surrogates
+ let c = String::from('\u{D7FF}');
+ assert_eq!(
+ key_after_prefix(c.as_str()).unwrap().as_str(),
+ String::from('\u{E000}')
+ );
+
+ // Check the character before the biggest one
+ let d = String::from('\u{10FFFE}');
+ assert_eq!(
+ key_after_prefix(d.as_str()).unwrap().as_str(),
+ String::from(char::MAX)
+ );
+ }
+
+ #[test]
+ fn test_common_prefixes() {
+ let mut query = query();
+ let objs = objs();
+
+ query.common.prefix = "a/".to_string();
+ assert_eq!(
+ common_prefix(&objs.get(0).unwrap(), &query.common),
+ Some("a/b/")
+ );
+
+ query.common.prefix = "a/b/".to_string();
+ assert_eq!(common_prefix(&objs.get(0).unwrap(), &query.common), None);
+ }
+
+ #[test]
+ fn test_extract_common_prefix() {
+ let mut query = query();
+ query.common.prefix = "a/".to_string();
+ let objs = objs();
+ let mut acc = UploadAccumulator::new(query.common.page_size);
+
+ let mut iter = objs.iter().peekable();
+ match acc.extract_common_prefix(&mut iter, &query.common) {
+ Some(ExtractionResult::Extracted { key }) => assert_eq!(key, "a/b/c".to_string()),
+ _ => panic!("wrong result"),
+ }
+ assert_eq!(acc.common_prefixes.len(), 1);
+ assert_eq!(acc.common_prefixes.iter().next().unwrap(), "a/b/");
+ }
+
+ #[test]
+ fn test_extract_upload() {
+ let objs = vec![
+ Object::new(
+ bucket(),
+ "b".to_string(),
+ vec![
+ objup_version([0x01; 32]),
+ objup_version([0x80; 32]),
+ objup_version([0x8f; 32]),
+ objup_version([0xdd; 32]),
+ ],
+ ),
+ Object::new(bucket(), "c".to_string(), vec![]),
+ ];
+
+ let mut acc = UploadAccumulator::new(2);
+ let mut start = RangeBegin::AfterUpload {
+ key: "b".to_string(),
+ upload: Uuid::from([0x01; 32]),
+ };
+
+ let mut iter = objs.iter().peekable();
+
+ // Check the case where we skip some uploads
+ match acc.extract(&(query().common), &start, &mut iter) {
+ ExtractionResult::FilledAtUpload { key, upload } => {
+ assert_eq!(key, "b");
+ assert_eq!(upload, Uuid::from([0x8f; 32]));
+ }
+ _ => panic!("wrong result"),
+ };
+
+ assert_eq!(acc.keys.len(), 2);
+ assert_eq!(
+ acc.keys.get(&Uuid::from([0x80; 32])).unwrap(),
+ &UploadInfo {
+ timestamp: TS,
+ key: "b".to_string()
+ }
+ );
+ assert_eq!(
+ acc.keys.get(&Uuid::from([0x8f; 32])).unwrap(),
+ &UploadInfo {
+ timestamp: TS,
+ key: "b".to_string()
+ }
+ );
+
+ acc = UploadAccumulator::new(2);
+ start = RangeBegin::AfterUpload {
+ key: "b".to_string(),
+ upload: Uuid::from([0xff; 32]),
+ };
+ iter = objs.iter().peekable();
+
+ // Check the case where we skip all the uploads
+ match acc.extract(&(query().common), &start, &mut iter) {
+ ExtractionResult::Extracted { key } if key.as_str() == "b" => (),
+ _ => panic!("wrong result"),
+ };
+ }
+
+ #[tokio::test]
+ async fn test_fetch_uploads_no_result() -> Result<(), Error> {
+ let query = query();
+ let mut acc = query.build_accumulator();
+ let page = fetch_list_entries(
+ &query.common,
+ query.begin()?,
+ &mut acc,
+ |_, _, _| async move { Ok(vec![]) },
+ )
+ .await?;
+ assert_eq!(page, None);
+ assert_eq!(acc.common_prefixes.len(), 0);
+ assert_eq!(acc.keys.len(), 0);
+
+ Ok(())
+ }
+
+ #[tokio::test]
+ async fn test_fetch_uploads_basic() -> Result<(), Error> {
+ let query = query();
+ let mut acc = query.build_accumulator();
+ let mut fake_io = |_, _, _| async move { Ok(objs()) };
+ let page =
+ fetch_list_entries(&query.common, query.begin()?, &mut acc, &mut fake_io).await?;
+ assert_eq!(page, None);
+ assert_eq!(acc.common_prefixes.len(), 1);
+ assert_eq!(acc.keys.len(), 1);
+ assert!(acc.common_prefixes.contains("a/"));
+
+ Ok(())
+ }
+
+ #[tokio::test]
+ async fn test_fetch_uploads_advanced() -> Result<(), Error> {
+ let mut query = query();
+ query.common.page_size = 2;
+
+ let mut fake_io = |_, k: Option<String>, _| async move {
+ Ok(match k.as_deref() {
+ Some("") => vec![
+ Object::new(bucket(), "b/a".to_string(), vec![objup_version([0x01; 32])]),
+ Object::new(bucket(), "b/b".to_string(), vec![objup_version([0x01; 32])]),
+ Object::new(bucket(), "b/c".to_string(), vec![objup_version([0x01; 32])]),
+ ],
+ Some("b0") => vec![
+ Object::new(bucket(), "c/a".to_string(), vec![objup_version([0x01; 32])]),
+ Object::new(bucket(), "c/b".to_string(), vec![objup_version([0x01; 32])]),
+ Object::new(bucket(), "c/c".to_string(), vec![objup_version([0x02; 32])]),
+ ],
+ Some("c0") => vec![Object::new(
+ bucket(),
+ "d".to_string(),
+ vec![objup_version([0x01; 32])],
+ )],
+ _ => panic!("wrong value {:?}", k),
+ })
+ };
+
+ let mut acc = query.build_accumulator();
+ let page =
+ fetch_list_entries(&query.common, query.begin()?, &mut acc, &mut fake_io).await?;
+ assert_eq!(
+ page,
+ Some(RangeBegin::IncludingKey {
+ key: "c0".to_string(),
+ fallback_key: Some("c/c".to_string())
+ })
+ );
+ assert_eq!(acc.common_prefixes.len(), 2);
+ assert_eq!(acc.keys.len(), 0);
+ assert!(acc.common_prefixes.contains("b/"));
+ assert!(acc.common_prefixes.contains("c/"));
+
+ Ok(())
+ }
+}
diff --git a/src/api/s3_put.rs b/src/api/s3_put.rs
index bb92c252..d7ee5893 100644
--- a/src/api/s3_put.rs
+++ b/src/api/s3_put.rs
@@ -610,7 +610,7 @@ pub(crate) fn get_headers(req: &Request<Body>) -> Result<ObjectVersionHeaders, E
})
}
-fn decode_upload_id(id: &str) -> Result<Uuid, Error> {
+pub fn decode_upload_id(id: &str) -> Result<Uuid, Error> {
let id_bin = hex::decode(id).map_err(|_| Error::NoSuchUpload)?;
if id_bin.len() != 32 {
return Err(Error::NoSuchUpload);
diff --git a/src/api/s3_router.rs b/src/api/s3_router.rs
index 234f77f0..a8ac0086 100644
--- a/src/api/s3_router.rs
+++ b/src/api/s3_router.rs
@@ -350,7 +350,7 @@ pub enum Endpoint {
delimiter: Option<char>,
encoding_type: Option<String>,
key_marker: Option<String>,
- max_uploads: Option<u64>,
+ max_uploads: Option<usize>,
prefix: Option<String>,
upload_id_marker: Option<String>,
},
diff --git a/src/api/s3_xml.rs b/src/api/s3_xml.rs
index 9b5a0202..98c63d57 100644
--- a/src/api/s3_xml.rs
+++ b/src/api/s3_xml.rs
@@ -142,6 +142,60 @@ pub struct CompleteMultipartUploadResult {
}
#[derive(Debug, Serialize, PartialEq)]
+pub struct Initiator {
+ #[serde(rename = "DisplayName")]
+ pub display_name: Value,
+ #[serde(rename = "ID")]
+ pub id: Value,
+}
+
+#[derive(Debug, Serialize, PartialEq)]
+pub struct ListMultipartItem {
+ #[serde(rename = "Initiated")]
+ pub initiated: Value,
+ #[serde(rename = "Initiator")]
+ pub initiator: Initiator,
+ #[serde(rename = "Key")]
+ pub key: Value,
+ #[serde(rename = "UploadId")]
+ pub upload_id: Value,
+ #[serde(rename = "Owner")]
+ pub owner: Owner,
+ #[serde(rename = "StorageClass")]
+ pub storage_class: Value,
+}
+
+#[derive(Debug, Serialize, PartialEq)]
+pub struct ListMultipartUploadsResult {
+ #[serde(serialize_with = "xmlns_tag")]
+ pub xmlns: (),
+ #[serde(rename = "Bucket")]
+ pub bucket: Value,
+ #[serde(rename = "KeyMarker")]
+ pub key_marker: Option<Value>,
+ #[serde(rename = "UploadIdMarker")]
+ pub upload_id_marker: Option<Value>,
+ #[serde(rename = "NextKeyMarker")]
+ pub next_key_marker: Option<Value>,
+ #[serde(rename = "NextUploadIdMarker")]
+ pub next_upload_id_marker: Option<Value>,
+ #[serde(rename = "Prefix")]
+ pub prefix: Value,
+ #[serde(rename = "Delimiter")]
+ pub delimiter: Option<Value>,
+ #[serde(rename = "MaxUploads")]
+ pub max_uploads: IntValue,
+ #[serde(rename = "IsTruncated")]
+ pub is_truncated: Value,
+ #[serde(rename = "Upload")]
+ pub upload: Vec<ListMultipartItem>,
+ #[serde(rename = "CommonPrefixes")]
+ pub common_prefixes: Vec<CommonPrefix>,
+ #[serde(rename = "EncodingType")]
+ pub encoding_type: Option<Value>,
+}
+
+#[derive(Debug, Serialize, PartialEq)]
pub struct ListBucketItem {
#[serde(rename = "Key")]
pub key: Value,
@@ -433,6 +487,58 @@ mod tests {
}
#[test]
+ fn list_multipart_uploads_result() -> Result<(), ApiError> {
+ let result = ListMultipartUploadsResult {
+ xmlns: (),
+ bucket: Value("example-bucket".to_string()),
+ key_marker: None,
+ next_key_marker: None,
+ upload_id_marker: None,
+ encoding_type: None,
+ next_upload_id_marker: None,
+ upload: vec![],
+ delimiter: Some(Value("/".to_string())),
+ prefix: Value("photos/2006/".to_string()),
+ max_uploads: IntValue(1000),
+ is_truncated: Value("false".to_string()),
+ common_prefixes: vec![
+ CommonPrefix {
+ prefix: Value("photos/2006/February/".to_string()),
+ },
+ CommonPrefix {
+ prefix: Value("photos/2006/January/".to_string()),
+ },
+ CommonPrefix {
+ prefix: Value("photos/2006/March/".to_string()),
+ },
+ ],
+ };
+
+ assert_eq!(
+ to_xml_with_header(&result)?,
+ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\
+<ListMultipartUploadsResult xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\">\
+ <Bucket>example-bucket</Bucket>\
+ <Prefix>photos/2006/</Prefix>\
+ <Delimiter>/</Delimiter>\
+ <MaxUploads>1000</MaxUploads>\
+ <IsTruncated>false</IsTruncated>\
+ <CommonPrefixes>\
+ <Prefix>photos/2006/February/</Prefix>\
+ </CommonPrefixes>\
+ <CommonPrefixes>\
+ <Prefix>photos/2006/January/</Prefix>\
+ </CommonPrefixes>\
+ <CommonPrefixes>\
+ <Prefix>photos/2006/March/</Prefix>\
+ </CommonPrefixes>\
+</ListMultipartUploadsResult>"
+ );
+
+ Ok(())
+ }
+
+ #[test]
fn list_objects_v1_1() -> Result<(), ApiError> {
let result = ListBucketResult {
xmlns: (),
diff --git a/src/garage/admin.rs b/src/garage/admin.rs
index f315c4dc..4a53792d 100644
--- a/src/garage/admin.rs
+++ b/src/garage/admin.rs
@@ -21,6 +21,7 @@ use garage_model::garage::Garage;
use garage_model::helper::error::{Error, OkOrBadRequest};
use garage_model::key_table::*;
use garage_model::migrate::Migrate;
+use garage_model::object_table::ObjectFilter;
use garage_model::permission::*;
use crate::cli::*;
@@ -209,7 +210,7 @@ impl AdminRpcHandler {
let objects = self
.garage
.object_table
- .get_range(&bucket_id, None, Some(DeletedFilter::NotDeleted), 10)
+ .get_range(&bucket_id, None, Some(ObjectFilter::IsData), 10)
.await?;
if !objects.is_empty() {
return Err(Error::BadRequest(format!(
diff --git a/src/model/object_table.rs b/src/model/object_table.rs
index 0c6c3a6d..da53878e 100644
--- a/src/model/object_table.rs
+++ b/src/model/object_table.rs
@@ -218,13 +218,19 @@ pub struct ObjectTable {
pub version_table: Arc<Table<VersionTable, TableShardedReplication>>,
}
+#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
+pub enum ObjectFilter {
+ IsData,
+ IsUploading,
+}
+
impl TableSchema for ObjectTable {
const TABLE_NAME: &'static str = "object";
type P = Uuid;
type S = String;
type E = Object;
- type Filter = DeletedFilter;
+ type Filter = ObjectFilter;
fn updated(&self, old: Option<Self::E>, new: Option<Self::E>) {
let version_table = self.version_table.clone();
@@ -254,8 +260,10 @@ impl TableSchema for ObjectTable {
}
fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool {
- let deleted = !entry.versions.iter().any(|v| v.is_data());
- filter.apply(deleted)
+ match filter {
+ ObjectFilter::IsData => entry.versions.iter().any(|v| v.is_data()),
+ ObjectFilter::IsUploading => entry.versions.iter().any(|v| v.is_uploading()),
+ }
}
fn try_migrate(bytes: &[u8]) -> Option<Self::E> {