aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex <alex@adnab.me>2021-04-06 22:18:41 +0200
committerAlex <alex@adnab.me>2021-04-06 22:18:41 +0200
commit7380f3855ca11d6ca0c55c2132f478e52f3fe9b8 (patch)
treeead4cd90d954db614d4c3e60ef06c2844108d6d6
parentc4c4b7dedc7b0faca508a0d4a921cb2cc97ba560 (diff)
parent6cbc8d6ec93b832a301a5402f1b1ae70b07a2be3 (diff)
downloadgarage-7380f3855ca11d6ca0c55c2132f478e52f3fe9b8.tar.gz
garage-7380f3855ca11d6ca0c55c2132f478e52f3fe9b8.zip
Merge pull request 'Use content defined chunking' (#43) from trinity-1686a/garage:content-defined-chunking into main
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/43
-rw-r--r--Cargo.lock88
-rw-r--r--src/api/Cargo.toml2
-rw-r--r--src/api/s3_put.rs34
3 files changed, 104 insertions, 20 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 5dc83dfa..c97968ef 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -222,6 +222,12 @@ dependencies = [
]
[[package]]
+name = "fastcdc"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5afa29be46b12c8c380b997def8d1ac77c2665da93eb0a768fab0bf4db79333f"
+
+[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -365,7 +371,7 @@ dependencies = [
"hex",
"log",
"pretty_env_logger",
- "rand",
+ "rand 0.8.3",
"rmp-serde",
"serde",
"sled",
@@ -383,6 +389,7 @@ dependencies = [
"chrono",
"crypto-mac 0.10.0",
"err-derive",
+ "fastcdc",
"futures",
"futures-util",
"garage_model",
@@ -397,6 +404,7 @@ dependencies = [
"log",
"md-5",
"percent-encoding",
+ "rand 0.7.3",
"roxmltree",
"sha2",
"tokio",
@@ -415,7 +423,7 @@ dependencies = [
"garage_util",
"hex",
"log",
- "rand",
+ "rand 0.8.3",
"rmp-serde",
"serde",
"serde_bytes",
@@ -459,7 +467,7 @@ dependencies = [
"garage_util",
"hexdump",
"log",
- "rand",
+ "rand 0.8.3",
"rmp-serde",
"serde",
"serde_bytes",
@@ -479,7 +487,7 @@ dependencies = [
"http",
"hyper",
"log",
- "rand",
+ "rand 0.8.3",
"rmp-serde",
"rustls",
"serde",
@@ -531,13 +539,24 @@ dependencies = [
[[package]]
name = "getrandom"
+version = "0.1.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi 0.9.0+wasi-snapshot-preview1",
+]
+
+[[package]]
+name = "getrandom"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9495705279e7140bf035dde1f6e750c162df8b625267cd52cc44e0b156732c8"
dependencies = [
"cfg-if",
"libc",
- "wasi",
+ "wasi 0.10.2+wasi-snapshot-preview1",
]
[[package]]
@@ -1045,14 +1064,37 @@ dependencies = [
[[package]]
name = "rand"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
+dependencies = [
+ "getrandom 0.1.16",
+ "libc",
+ "rand_chacha 0.2.2",
+ "rand_core 0.5.1",
+ "rand_hc 0.2.0",
+]
+
+[[package]]
+name = "rand"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e"
dependencies = [
"libc",
- "rand_chacha",
- "rand_core",
- "rand_hc",
+ "rand_chacha 0.3.0",
+ "rand_core 0.6.2",
+ "rand_hc 0.3.0",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.5.1",
]
[[package]]
@@ -1062,7 +1104,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d"
dependencies = [
"ppv-lite86",
- "rand_core",
+ "rand_core 0.6.2",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
+dependencies = [
+ "getrandom 0.1.16",
]
[[package]]
@@ -1071,7 +1122,16 @@ version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7"
dependencies = [
- "getrandom",
+ "getrandom 0.2.2",
+]
+
+[[package]]
+name = "rand_hc"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
+dependencies = [
+ "rand_core 0.5.1",
]
[[package]]
@@ -1080,7 +1140,7 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73"
dependencies = [
- "rand_core",
+ "rand_core 0.6.2",
]
[[package]]
@@ -1583,6 +1643,12 @@ dependencies = [
[[package]]
name = "wasi"
+version = "0.9.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
+
+[[package]]
+name = "wasi"
version = "0.10.2+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
diff --git a/src/api/Cargo.toml b/src/api/Cargo.toml
index 0b824ca3..b328f671 100644
--- a/src/api/Cargo.toml
+++ b/src/api/Cargo.toml
@@ -22,10 +22,12 @@ bytes = "1.0"
chrono = "0.4"
crypto-mac = "0.10"
err-derive = "0.3"
+fastcdc = "1.0.5"
hex = "0.4"
hmac = "0.10"
log = "0.4"
md-5 = "0.9"
+rand = "0.7"
sha2 = "0.9"
futures = "0.3"
diff --git a/src/api/s3_put.rs b/src/api/s3_put.rs
index c4e3b818..d023bcef 100644
--- a/src/api/s3_put.rs
+++ b/src/api/s3_put.rs
@@ -2,6 +2,7 @@ use std::collections::{BTreeMap, VecDeque};
use std::fmt::Write;
use std::sync::Arc;
+use fastcdc::{Chunk, FastCDC};
use futures::stream::*;
use hyper::{Body, Request, Response};
use md5::{digest::generic_array::*, Digest as Md5Digest, Md5};
@@ -268,21 +269,28 @@ async fn put_block_meta(
struct BodyChunker {
body: Body,
read_all: bool,
- block_size: usize,
+ min_block_size: usize,
+ avg_block_size: usize,
+ max_block_size: usize,
buf: VecDeque<u8>,
}
impl BodyChunker {
fn new(body: Body, block_size: usize) -> Self {
+ let min_block_size = block_size / 4 * 3;
+ let avg_block_size = block_size;
+ let max_block_size = block_size * 2;
Self {
body,
read_all: false,
- block_size,
- buf: VecDeque::with_capacity(2 * block_size),
+ min_block_size,
+ avg_block_size,
+ max_block_size,
+ buf: VecDeque::with_capacity(2 * max_block_size),
}
}
async fn next(&mut self) -> Result<Option<Vec<u8>>, GarageError> {
- while !self.read_all && self.buf.len() < self.block_size {
+ while !self.read_all && self.buf.len() < self.max_block_size {
if let Some(block) = self.body.next().await {
let bytes = block?;
trace!("Body next: {} bytes", bytes.len());
@@ -293,12 +301,20 @@ impl BodyChunker {
}
if self.buf.len() == 0 {
Ok(None)
- } else if self.buf.len() <= self.block_size {
- let block = self.buf.drain(..).collect::<Vec<u8>>();
- Ok(Some(block))
} else {
- let block = self.buf.drain(..self.block_size).collect::<Vec<u8>>();
- Ok(Some(block))
+ let mut iter = FastCDC::with_eof(
+ self.buf.make_contiguous(),
+ self.min_block_size,
+ self.avg_block_size,
+ self.max_block_size,
+ self.read_all,
+ );
+ if let Some(Chunk { length, .. }) = iter.next() {
+ let block = self.buf.drain(..length).collect::<Vec<u8>>();
+ Ok(Some(block))
+ } else {
+ unreachable!("FastCDC returned not chunk")
+ }
}
}
}