diff options
author | Alex <alex@adnab.me> | 2022-09-12 16:38:43 +0200 |
---|---|---|
committer | Alex <alex@adnab.me> | 2022-09-12 16:38:43 +0200 |
commit | 309d7aef3f05657e2b969ab72442b2f2c350da03 (patch) | |
tree | 448704ae3f07a10f86fcb5d40347ad70cdd81498 /src/util | |
parent | 03c40a0b24dd5bd2a51d3cd3df0ca1a42fb2d328 (diff) | |
parent | f91fab8582728f176f446a4a2e039d22f752167b (diff) | |
download | garage-309d7aef3f05657e2b969ab72442b2f2c350da03.tar.gz garage-309d7aef3f05657e2b969ab72442b2f2c350da03.zip |
Merge pull request 'performance improvements' (#342) from lx-perf-improvements into main
Performance improvements included in this PR:
- [x] Use `Bytes` at a few places where appropriate, instead of `Vec<u8>`, to reduce the number of copies
- [x] StreamChunker now accumulates incoming slices in a `Vec<Bytes>` instead of a `VecDeque<u8>`. Replaces calls to `.extend()` and `.drain()` that were quite costly by a simple `concat()` on a vec of slices which is much more optimized
- [x] Hashing (b2, sha256, md5) is now done on a Tokio thread dedicated to cpu-intensive tasks, using `spawn_blocking`
- [x] Block manager now uses 256 independant locks instead of one big lock for writing, reduces contention when writing several/many objects in parallel
- [x] Better LMDB defaults: we now put flags `NoSync` and `NoMetaSync` to avoid `fsync` at each transaction (extremely slow). Also increased number of LMDB readers to accomodate more intensive workloads
Other changes included in this PR:
- [x] Update to hashing and MAC crates: md5 and sha2 from 0.9 to 0.10, hmac from 0.10 to 0.12
- [x] switch to `tracing_subscriber` for logs, which allows to have timing of each event
Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/342
Diffstat (limited to 'src/util')
-rw-r--r-- | src/util/Cargo.toml | 4 | ||||
-rw-r--r-- | src/util/async_hash.rs | 61 | ||||
-rw-r--r-- | src/util/lib.rs | 1 |
3 files changed, 65 insertions, 1 deletions
diff --git a/src/util/Cargo.toml b/src/util/Cargo.toml index 163c1b77..d5c194e8 100644 --- a/src/util/Cargo.toml +++ b/src/util/Cargo.toml @@ -19,6 +19,8 @@ garage_db = { version = "0.8.0", path = "../db" } arc-swap = "1.0" async-trait = "0.1" blake2 = "0.9" +bytes = "1.0" +digest = "0.10" err-derive = "0.3" git-version = "0.3.4" xxhash-rust = { version = "0.8", default-features = false, features = ["xxh3"] } @@ -26,7 +28,7 @@ hex = "0.4" lazy_static = "1.4" tracing = "0.1.30" rand = "0.8" -sha2 = "0.9" +sha2 = "0.10" chrono = "0.4" rmp-serde = "0.15" diff --git a/src/util/async_hash.rs b/src/util/async_hash.rs new file mode 100644 index 00000000..5631ea6b --- /dev/null +++ b/src/util/async_hash.rs @@ -0,0 +1,61 @@ +use bytes::Bytes; +use digest::Digest; + +use tokio::sync::mpsc; +use tokio::task::JoinHandle; + +use crate::data::*; + +/// Compute the sha256 of a slice, +/// spawning on a tokio thread for CPU-intensive processing +/// The argument has to be an owned Bytes, as it is moved out to a new thread. +pub async fn async_sha256sum(data: Bytes) -> Hash { + tokio::task::spawn_blocking(move || sha256sum(&data)) + .await + .unwrap() +} + +/// Compute the blake2sum of a slice, +/// spawning on a tokio thread for CPU-intensive processing. +/// The argument has to be an owned Bytes, as it is moved out to a new thread. +pub async fn async_blake2sum(data: Bytes) -> Hash { + tokio::task::spawn_blocking(move || blake2sum(&data)) + .await + .unwrap() +} + +// ---- + +pub struct AsyncHasher<D: Digest> { + sendblk: mpsc::Sender<Bytes>, + task: JoinHandle<digest::Output<D>>, +} + +impl<D: Digest> AsyncHasher<D> { + pub fn new() -> Self { + let (sendblk, mut recvblk) = mpsc::channel::<Bytes>(1); + let task = tokio::task::spawn_blocking(move || { + let mut digest = D::new(); + while let Some(blk) = recvblk.blocking_recv() { + digest.update(&blk[..]); + } + digest.finalize() + }); + Self { sendblk, task } + } + + pub async fn update(&self, b: Bytes) { + self.sendblk.send(b).await.unwrap(); + } + + pub async fn finalize(self) -> digest::Output<D> { + drop(self.sendblk); + self.task.await.unwrap() + } +} + +impl<D: Digest> Default for AsyncHasher<D> { + fn default() -> Self { + Self::new() + } +} diff --git a/src/util/lib.rs b/src/util/lib.rs index 47c85c3a..264cc192 100644 --- a/src/util/lib.rs +++ b/src/util/lib.rs @@ -3,6 +3,7 @@ #[macro_use] extern crate tracing; +pub mod async_hash; pub mod background; pub mod config; pub mod crdt; |