aboutsummaryrefslogtreecommitdiff
path: root/src/util
diff options
context:
space:
mode:
authorAlex <alex@adnab.me>2022-09-12 16:38:43 +0200
committerAlex <alex@adnab.me>2022-09-12 16:38:43 +0200
commit309d7aef3f05657e2b969ab72442b2f2c350da03 (patch)
tree448704ae3f07a10f86fcb5d40347ad70cdd81498 /src/util
parent03c40a0b24dd5bd2a51d3cd3df0ca1a42fb2d328 (diff)
parentf91fab8582728f176f446a4a2e039d22f752167b (diff)
downloadgarage-309d7aef3f05657e2b969ab72442b2f2c350da03.tar.gz
garage-309d7aef3f05657e2b969ab72442b2f2c350da03.zip
Merge pull request 'performance improvements' (#342) from lx-perf-improvements into main
Performance improvements included in this PR: - [x] Use `Bytes` at a few places where appropriate, instead of `Vec<u8>`, to reduce the number of copies - [x] StreamChunker now accumulates incoming slices in a `Vec<Bytes>` instead of a `VecDeque<u8>`. Replaces calls to `.extend()` and `.drain()` that were quite costly by a simple `concat()` on a vec of slices which is much more optimized - [x] Hashing (b2, sha256, md5) is now done on a Tokio thread dedicated to cpu-intensive tasks, using `spawn_blocking` - [x] Block manager now uses 256 independant locks instead of one big lock for writing, reduces contention when writing several/many objects in parallel - [x] Better LMDB defaults: we now put flags `NoSync` and `NoMetaSync` to avoid `fsync` at each transaction (extremely slow). Also increased number of LMDB readers to accomodate more intensive workloads Other changes included in this PR: - [x] Update to hashing and MAC crates: md5 and sha2 from 0.9 to 0.10, hmac from 0.10 to 0.12 - [x] switch to `tracing_subscriber` for logs, which allows to have timing of each event Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/342
Diffstat (limited to 'src/util')
-rw-r--r--src/util/Cargo.toml4
-rw-r--r--src/util/async_hash.rs61
-rw-r--r--src/util/lib.rs1
3 files changed, 65 insertions, 1 deletions
diff --git a/src/util/Cargo.toml b/src/util/Cargo.toml
index 163c1b77..d5c194e8 100644
--- a/src/util/Cargo.toml
+++ b/src/util/Cargo.toml
@@ -19,6 +19,8 @@ garage_db = { version = "0.8.0", path = "../db" }
arc-swap = "1.0"
async-trait = "0.1"
blake2 = "0.9"
+bytes = "1.0"
+digest = "0.10"
err-derive = "0.3"
git-version = "0.3.4"
xxhash-rust = { version = "0.8", default-features = false, features = ["xxh3"] }
@@ -26,7 +28,7 @@ hex = "0.4"
lazy_static = "1.4"
tracing = "0.1.30"
rand = "0.8"
-sha2 = "0.9"
+sha2 = "0.10"
chrono = "0.4"
rmp-serde = "0.15"
diff --git a/src/util/async_hash.rs b/src/util/async_hash.rs
new file mode 100644
index 00000000..5631ea6b
--- /dev/null
+++ b/src/util/async_hash.rs
@@ -0,0 +1,61 @@
+use bytes::Bytes;
+use digest::Digest;
+
+use tokio::sync::mpsc;
+use tokio::task::JoinHandle;
+
+use crate::data::*;
+
+/// Compute the sha256 of a slice,
+/// spawning on a tokio thread for CPU-intensive processing
+/// The argument has to be an owned Bytes, as it is moved out to a new thread.
+pub async fn async_sha256sum(data: Bytes) -> Hash {
+ tokio::task::spawn_blocking(move || sha256sum(&data))
+ .await
+ .unwrap()
+}
+
+/// Compute the blake2sum of a slice,
+/// spawning on a tokio thread for CPU-intensive processing.
+/// The argument has to be an owned Bytes, as it is moved out to a new thread.
+pub async fn async_blake2sum(data: Bytes) -> Hash {
+ tokio::task::spawn_blocking(move || blake2sum(&data))
+ .await
+ .unwrap()
+}
+
+// ----
+
+pub struct AsyncHasher<D: Digest> {
+ sendblk: mpsc::Sender<Bytes>,
+ task: JoinHandle<digest::Output<D>>,
+}
+
+impl<D: Digest> AsyncHasher<D> {
+ pub fn new() -> Self {
+ let (sendblk, mut recvblk) = mpsc::channel::<Bytes>(1);
+ let task = tokio::task::spawn_blocking(move || {
+ let mut digest = D::new();
+ while let Some(blk) = recvblk.blocking_recv() {
+ digest.update(&blk[..]);
+ }
+ digest.finalize()
+ });
+ Self { sendblk, task }
+ }
+
+ pub async fn update(&self, b: Bytes) {
+ self.sendblk.send(b).await.unwrap();
+ }
+
+ pub async fn finalize(self) -> digest::Output<D> {
+ drop(self.sendblk);
+ self.task.await.unwrap()
+ }
+}
+
+impl<D: Digest> Default for AsyncHasher<D> {
+ fn default() -> Self {
+ Self::new()
+ }
+}
diff --git a/src/util/lib.rs b/src/util/lib.rs
index 47c85c3a..264cc192 100644
--- a/src/util/lib.rs
+++ b/src/util/lib.rs
@@ -3,6 +3,7 @@
#[macro_use]
extern crate tracing;
+pub mod async_hash;
pub mod background;
pub mod config;
pub mod crdt;