aboutsummaryrefslogtreecommitdiff
path: root/src/api/s3/get.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/api/s3/get.rs')
-rw-r--r--src/api/s3/get.rs523
1 files changed, 523 insertions, 0 deletions
diff --git a/src/api/s3/get.rs b/src/api/s3/get.rs
new file mode 100644
index 00000000..2a99551a
--- /dev/null
+++ b/src/api/s3/get.rs
@@ -0,0 +1,523 @@
+//! Function related to GET and HEAD requests
+use std::sync::Arc;
+use std::time::{Duration, UNIX_EPOCH};
+
+use futures::future;
+use futures::stream::{self, StreamExt};
+use http::header::{
+ ACCEPT_RANGES, CONTENT_LENGTH, CONTENT_RANGE, CONTENT_TYPE, ETAG, IF_MODIFIED_SINCE,
+ IF_NONE_MATCH, LAST_MODIFIED, RANGE,
+};
+use hyper::{Body, Request, Response, StatusCode};
+use tokio::sync::mpsc;
+
+use garage_rpc::rpc_helper::{netapp::stream::ByteStream, OrderTag};
+use garage_table::EmptyKey;
+use garage_util::data::*;
+use garage_util::error::OkOrMessage;
+
+use garage_model::garage::Garage;
+use garage_model::s3::object_table::*;
+use garage_model::s3::version_table::*;
+
+use crate::s3::error::*;
+
+const X_AMZ_MP_PARTS_COUNT: &str = "x-amz-mp-parts-count";
+
+fn object_headers(
+ version: &ObjectVersion,
+ version_meta: &ObjectVersionMeta,
+) -> http::response::Builder {
+ debug!("Version meta: {:?}", version_meta);
+
+ let date = UNIX_EPOCH + Duration::from_millis(version.timestamp);
+ let date_str = httpdate::fmt_http_date(date);
+
+ let mut resp = Response::builder()
+ .header(CONTENT_TYPE, version_meta.headers.content_type.to_string())
+ .header(LAST_MODIFIED, date_str)
+ .header(ACCEPT_RANGES, "bytes".to_string());
+
+ if !version_meta.etag.is_empty() {
+ resp = resp.header(ETAG, format!("\"{}\"", version_meta.etag));
+ }
+
+ for (k, v) in version_meta.headers.other.iter() {
+ resp = resp.header(k, v.to_string());
+ }
+
+ resp
+}
+
+fn try_answer_cached(
+ version: &ObjectVersion,
+ version_meta: &ObjectVersionMeta,
+ req: &Request<Body>,
+) -> Option<Response<Body>> {
+ // <trinity> It is possible, and is even usually the case, [that both If-None-Match and
+ // If-Modified-Since] are present in a request. In this situation If-None-Match takes
+ // precedence and If-Modified-Since is ignored (as per 6.Precedence from rfc7232). The rational
+ // being that etag based matching is more accurate, it has no issue with sub-second precision
+ // for instance (in case of very fast updates)
+ let cached = if let Some(none_match) = req.headers().get(IF_NONE_MATCH) {
+ let none_match = none_match.to_str().ok()?;
+ let expected = format!("\"{}\"", version_meta.etag);
+ let found = none_match
+ .split(',')
+ .map(str::trim)
+ .any(|etag| etag == expected || etag == "\"*\"");
+ found
+ } else if let Some(modified_since) = req.headers().get(IF_MODIFIED_SINCE) {
+ let modified_since = modified_since.to_str().ok()?;
+ let client_date = httpdate::parse_http_date(modified_since).ok()?;
+ let server_date = UNIX_EPOCH + Duration::from_millis(version.timestamp);
+ client_date >= server_date
+ } else {
+ false
+ };
+
+ if cached {
+ Some(
+ Response::builder()
+ .status(StatusCode::NOT_MODIFIED)
+ .body(Body::empty())
+ .unwrap(),
+ )
+ } else {
+ None
+ }
+}
+
+/// Handle HEAD request
+pub async fn handle_head(
+ garage: Arc<Garage>,
+ req: &Request<Body>,
+ bucket_id: Uuid,
+ key: &str,
+ part_number: Option<u64>,
+) -> Result<Response<Body>, Error> {
+ let object = garage
+ .object_table
+ .get(&bucket_id, &key.to_string())
+ .await?
+ .ok_or(Error::NoSuchKey)?;
+
+ let object_version = object
+ .versions()
+ .iter()
+ .rev()
+ .find(|v| v.is_data())
+ .ok_or(Error::NoSuchKey)?;
+
+ let version_data = match &object_version.state {
+ ObjectVersionState::Complete(c) => c,
+ _ => unreachable!(),
+ };
+
+ let version_meta = match version_data {
+ ObjectVersionData::Inline(meta, _) => meta,
+ ObjectVersionData::FirstBlock(meta, _) => meta,
+ _ => unreachable!(),
+ };
+
+ if let Some(cached) = try_answer_cached(object_version, version_meta, req) {
+ return Ok(cached);
+ }
+
+ if let Some(pn) = part_number {
+ match version_data {
+ ObjectVersionData::Inline(_, bytes) => {
+ if pn != 1 {
+ return Err(Error::InvalidPart);
+ }
+ Ok(object_headers(object_version, version_meta)
+ .header(CONTENT_LENGTH, format!("{}", bytes.len()))
+ .header(
+ CONTENT_RANGE,
+ format!("bytes 0-{}/{}", bytes.len() - 1, bytes.len()),
+ )
+ .header(X_AMZ_MP_PARTS_COUNT, "1")
+ .status(StatusCode::PARTIAL_CONTENT)
+ .body(Body::empty())?)
+ }
+ ObjectVersionData::FirstBlock(_, _) => {
+ let version = garage
+ .version_table
+ .get(&object_version.uuid, &EmptyKey)
+ .await?
+ .ok_or(Error::NoSuchKey)?;
+
+ let (part_offset, part_end) =
+ calculate_part_bounds(&version, pn).ok_or(Error::InvalidPart)?;
+ let n_parts = version.parts_etags.items().len();
+
+ Ok(object_headers(object_version, version_meta)
+ .header(CONTENT_LENGTH, format!("{}", part_end - part_offset))
+ .header(
+ CONTENT_RANGE,
+ format!(
+ "bytes {}-{}/{}",
+ part_offset,
+ part_end - 1,
+ version_meta.size
+ ),
+ )
+ .header(X_AMZ_MP_PARTS_COUNT, format!("{}", n_parts))
+ .status(StatusCode::PARTIAL_CONTENT)
+ .body(Body::empty())?)
+ }
+ _ => unreachable!(),
+ }
+ } else {
+ Ok(object_headers(object_version, version_meta)
+ .header(CONTENT_LENGTH, format!("{}", version_meta.size))
+ .status(StatusCode::OK)
+ .body(Body::empty())?)
+ }
+}
+
+/// Handle GET request
+pub async fn handle_get(
+ garage: Arc<Garage>,
+ req: &Request<Body>,
+ bucket_id: Uuid,
+ key: &str,
+ part_number: Option<u64>,
+) -> Result<Response<Body>, Error> {
+ let object = garage
+ .object_table
+ .get(&bucket_id, &key.to_string())
+ .await?
+ .ok_or(Error::NoSuchKey)?;
+
+ let last_v = object
+ .versions()
+ .iter()
+ .rev()
+ .find(|v| v.is_complete())
+ .ok_or(Error::NoSuchKey)?;
+
+ let last_v_data = match &last_v.state {
+ ObjectVersionState::Complete(x) => x,
+ _ => unreachable!(),
+ };
+ let last_v_meta = match last_v_data {
+ ObjectVersionData::DeleteMarker => return Err(Error::NoSuchKey),
+ ObjectVersionData::Inline(meta, _) => meta,
+ ObjectVersionData::FirstBlock(meta, _) => meta,
+ };
+
+ if let Some(cached) = try_answer_cached(last_v, last_v_meta, req) {
+ return Ok(cached);
+ }
+
+ match (part_number, parse_range_header(req, last_v_meta.size)?) {
+ (Some(_), Some(_)) => {
+ return Err(Error::bad_request(
+ "Cannot specify both partNumber and Range header",
+ ));
+ }
+ (Some(pn), None) => {
+ return handle_get_part(garage, last_v, last_v_data, last_v_meta, pn).await;
+ }
+ (None, Some(range)) => {
+ return handle_get_range(
+ garage,
+ last_v,
+ last_v_data,
+ last_v_meta,
+ range.start,
+ range.start + range.length,
+ )
+ .await;
+ }
+ (None, None) => (),
+ }
+
+ let resp_builder = object_headers(last_v, last_v_meta)
+ .header(CONTENT_LENGTH, format!("{}", last_v_meta.size))
+ .status(StatusCode::OK);
+
+ match &last_v_data {
+ ObjectVersionData::DeleteMarker => unreachable!(),
+ ObjectVersionData::Inline(_, bytes) => {
+ let body: Body = Body::from(bytes.to_vec());
+ Ok(resp_builder.body(body)?)
+ }
+ ObjectVersionData::FirstBlock(_, first_block_hash) => {
+ let (tx, rx) = mpsc::channel(2);
+
+ let order_stream = OrderTag::stream();
+ let first_block_hash = *first_block_hash;
+ let version_uuid = last_v.uuid;
+
+ tokio::spawn(async move {
+ match async {
+ let garage2 = garage.clone();
+ let version_fut = tokio::spawn(async move {
+ garage2.version_table.get(&version_uuid, &EmptyKey).await
+ });
+
+ let stream_block_0 = garage
+ .block_manager
+ .rpc_get_block_streaming(&first_block_hash, Some(order_stream.order(0)))
+ .await?;
+ tx.send(stream_block_0)
+ .await
+ .ok_or_message("channel closed")?;
+
+ let version = version_fut.await.unwrap()?.ok_or(Error::NoSuchKey)?;
+ for (i, (_, vb)) in version.blocks.items().iter().enumerate().skip(1) {
+ let stream_block_i = garage
+ .block_manager
+ .rpc_get_block_streaming(&vb.hash, Some(order_stream.order(i as u64)))
+ .await?;
+ tx.send(stream_block_i)
+ .await
+ .ok_or_message("channel closed")?;
+ }
+
+ Ok::<(), Error>(())
+ }
+ .await
+ {
+ Ok(()) => (),
+ Err(e) => {
+ let err = std::io::Error::new(
+ std::io::ErrorKind::Other,
+ format!("Error while getting object data: {}", e),
+ );
+ let _ = tx
+ .send(Box::pin(stream::once(future::ready(Err(err)))))
+ .await;
+ }
+ }
+ });
+
+ let body_stream = tokio_stream::wrappers::ReceiverStream::new(rx).flatten();
+
+ let body = hyper::body::Body::wrap_stream(body_stream);
+ Ok(resp_builder.body(body)?)
+ }
+ }
+}
+
+async fn handle_get_range(
+ garage: Arc<Garage>,
+ version: &ObjectVersion,
+ version_data: &ObjectVersionData,
+ version_meta: &ObjectVersionMeta,
+ begin: u64,
+ end: u64,
+) -> Result<Response<Body>, Error> {
+ let resp_builder = object_headers(version, version_meta)
+ .header(CONTENT_LENGTH, format!("{}", end - begin))
+ .header(
+ CONTENT_RANGE,
+ format!("bytes {}-{}/{}", begin, end - 1, version_meta.size),
+ )
+ .status(StatusCode::PARTIAL_CONTENT);
+
+ match &version_data {
+ ObjectVersionData::DeleteMarker => unreachable!(),
+ ObjectVersionData::Inline(_meta, bytes) => {
+ if end as usize <= bytes.len() {
+ let body: Body = Body::from(bytes[begin as usize..end as usize].to_vec());
+ Ok(resp_builder.body(body)?)
+ } else {
+ Err(Error::internal_error(
+ "Requested range not present in inline bytes when it should have been",
+ ))
+ }
+ }
+ ObjectVersionData::FirstBlock(_meta, _first_block_hash) => {
+ let version = garage
+ .version_table
+ .get(&version.uuid, &EmptyKey)
+ .await?
+ .ok_or(Error::NoSuchKey)?;
+
+ let body = body_from_blocks_range(garage, version.blocks.items(), begin, end);
+ Ok(resp_builder.body(body)?)
+ }
+ }
+}
+
+async fn handle_get_part(
+ garage: Arc<Garage>,
+ object_version: &ObjectVersion,
+ version_data: &ObjectVersionData,
+ version_meta: &ObjectVersionMeta,
+ part_number: u64,
+) -> Result<Response<Body>, Error> {
+ let resp_builder =
+ object_headers(object_version, version_meta).status(StatusCode::PARTIAL_CONTENT);
+
+ match version_data {
+ ObjectVersionData::Inline(_, bytes) => {
+ if part_number != 1 {
+ return Err(Error::InvalidPart);
+ }
+ Ok(resp_builder
+ .header(CONTENT_LENGTH, format!("{}", bytes.len()))
+ .header(
+ CONTENT_RANGE,
+ format!("bytes {}-{}/{}", 0, bytes.len() - 1, bytes.len()),
+ )
+ .header(X_AMZ_MP_PARTS_COUNT, "1")
+ .body(Body::from(bytes.to_vec()))?)
+ }
+ ObjectVersionData::FirstBlock(_, _) => {
+ let version = garage
+ .version_table
+ .get(&object_version.uuid, &EmptyKey)
+ .await?
+ .ok_or(Error::NoSuchKey)?;
+
+ let (begin, end) =
+ calculate_part_bounds(&version, part_number).ok_or(Error::InvalidPart)?;
+ let n_parts = version.parts_etags.items().len();
+
+ let body = body_from_blocks_range(garage, version.blocks.items(), begin, end);
+
+ Ok(resp_builder
+ .header(CONTENT_LENGTH, format!("{}", end - begin))
+ .header(
+ CONTENT_RANGE,
+ format!("bytes {}-{}/{}", begin, end - 1, version_meta.size),
+ )
+ .header(X_AMZ_MP_PARTS_COUNT, format!("{}", n_parts))
+ .body(body)?)
+ }
+ _ => unreachable!(),
+ }
+}
+
+fn parse_range_header(
+ req: &Request<Body>,
+ total_size: u64,
+) -> Result<Option<http_range::HttpRange>, Error> {
+ let range = match req.headers().get(RANGE) {
+ Some(range) => {
+ let range_str = range.to_str()?;
+ let mut ranges =
+ http_range::HttpRange::parse(range_str, total_size).map_err(|e| (e, total_size))?;
+ if ranges.len() > 1 {
+ // garage does not support multi-range requests yet, so we respond with the entire
+ // object when multiple ranges are requested
+ None
+ } else {
+ ranges.pop()
+ }
+ }
+ None => None,
+ };
+ Ok(range)
+}
+
+fn calculate_part_bounds(v: &Version, part_number: u64) -> Option<(u64, u64)> {
+ let mut offset = 0;
+ for (i, (bk, bv)) in v.blocks.items().iter().enumerate() {
+ if bk.part_number == part_number {
+ let size: u64 = v.blocks.items()[i..]
+ .iter()
+ .take_while(|(k, _)| k.part_number == part_number)
+ .map(|(_, v)| v.size)
+ .sum();
+ return Some((offset, offset + size));
+ }
+ offset += bv.size;
+ }
+ None
+}
+
+fn body_from_blocks_range(
+ garage: Arc<Garage>,
+ all_blocks: &[(VersionBlockKey, VersionBlock)],
+ begin: u64,
+ end: u64,
+) -> Body {
+ // We will store here the list of blocks that have an intersection with the requested
+ // range, as well as their "true offset", which is their actual offset in the complete
+ // file (whereas block.offset designates the offset of the block WITHIN THE PART
+ // block.part_number, which is not the same in the case of a multipart upload)
+ let mut blocks: Vec<(VersionBlock, u64)> = Vec::with_capacity(std::cmp::min(
+ all_blocks.len(),
+ 4 + ((end - begin) / std::cmp::max(all_blocks[0].1.size as u64, 1024)) as usize,
+ ));
+ let mut block_offset: u64 = 0;
+ for (_, b) in all_blocks.iter() {
+ if block_offset >= end {
+ break;
+ }
+ // Keep only blocks that have an intersection with the requested range
+ if block_offset < end && block_offset + b.size > begin {
+ blocks.push((*b, block_offset));
+ }
+ block_offset += b.size as u64;
+ }
+
+ let order_stream = OrderTag::stream();
+ let body_stream = futures::stream::iter(blocks)
+ .enumerate()
+ .map(move |(i, (block, block_offset))| {
+ let garage = garage.clone();
+ async move {
+ garage
+ .block_manager
+ .rpc_get_block_streaming(&block.hash, Some(order_stream.order(i as u64)))
+ .await
+ .unwrap_or_else(|e| error_stream(i, e))
+ .scan(block_offset, move |chunk_offset, chunk| {
+ let r = match chunk {
+ Ok(chunk_bytes) => {
+ let chunk_len = chunk_bytes.len() as u64;
+ let r = if *chunk_offset >= end {
+ // The current chunk is after the part we want to read.
+ // Returning None here will stop the scan, the rest of the
+ // stream will be ignored
+ None
+ } else if *chunk_offset + chunk_len <= begin {
+ // The current chunk is before the part we want to read.
+ // We return a None that will be removed by the filter_map
+ // below.
+ Some(None)
+ } else {
+ // The chunk has an intersection with the requested range
+ let start_in_chunk = if *chunk_offset > begin {
+ 0
+ } else {
+ begin - *chunk_offset
+ };
+ let end_in_chunk = if *chunk_offset + chunk_len < end {
+ chunk_len
+ } else {
+ end - *chunk_offset
+ };
+ Some(Some(Ok(chunk_bytes
+ .slice(start_in_chunk as usize..end_in_chunk as usize))))
+ };
+ *chunk_offset += chunk_bytes.len() as u64;
+ r
+ }
+ Err(e) => Some(Some(Err(e))),
+ };
+ futures::future::ready(r)
+ })
+ .filter_map(futures::future::ready)
+ }
+ })
+ .buffered(2)
+ .flatten();
+
+ hyper::body::Body::wrap_stream(body_stream)
+}
+
+fn error_stream(i: usize, e: garage_util::error::Error) -> ByteStream {
+ Box::pin(futures::stream::once(async move {
+ Err(std::io::Error::new(
+ std::io::ErrorKind::Other,
+ format!("Could not get block {}: {}", i, e),
+ ))
+ }))
+}