aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2022-07-18 18:40:57 +0200
committerAlex Auvolat <alex@adnab.me>2022-07-29 12:24:48 +0200
commit2f111e6b3d772b10c8ed6279ce0c82d22852afd1 (patch)
treec9f4673aa34c12bda389b523753968a9bb92e0ee /src
parent1b2e1296eb99630e969e585ede0424072adc2d0c (diff)
downloadgarage-2f111e6b3d772b10c8ed6279ce0c82d22852afd1.tar.gz
garage-2f111e6b3d772b10c8ed6279ce0c82d22852afd1.zip
Performance improvements:
- reduce contention on mutation_lock by having 256 of them - better lmdb defaults
Diffstat (limited to 'src')
-rw-r--r--src/api/s3/put.rs30
-rw-r--r--src/block/manager.rs27
-rw-r--r--src/model/garage.rs15
-rw-r--r--src/util/async_hash.rs15
4 files changed, 59 insertions, 28 deletions
diff --git a/src/api/s3/put.rs b/src/api/s3/put.rs
index fbfa6f0d..a182f04d 100644
--- a/src/api/s3/put.rs
+++ b/src/api/s3/put.rs
@@ -8,6 +8,11 @@ use hyper::{Request, Response};
use md5::{digest::generic_array::*, Digest as Md5Digest, Md5};
use sha2::Sha256;
+use opentelemetry::{
+ trace::{FutureExt as OtelFutureExt, TraceContextExt, Tracer},
+ Context,
+};
+
use garage_table::*;
use garage_util::async_hash::*;
use garage_util::data::*;
@@ -279,12 +284,21 @@ async fn read_and_put_blocks<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
first_block_hash: Hash,
chunker: &mut StreamChunker<S>,
) -> Result<(u64, GenericArray<u8, typenum::U16>, Hash), Error> {
+ let tracer = opentelemetry::global::tracer("garage");
+
let first_block = Bytes::from(first_block);
let md5hasher = AsyncHasher::<Md5>::new();
let sha256hasher = AsyncHasher::<Sha256>::new();
- md5hasher.update(first_block.clone());
- sha256hasher.update(first_block.clone());
+
+ futures::future::join(
+ md5hasher.update(first_block.clone()),
+ sha256hasher.update(first_block.clone()),
+ )
+ .with_context(Context::current_with_span(
+ tracer.start("Hash first block (md5, sha256)"),
+ ))
+ .await;
let mut next_offset = first_block.len();
let mut put_curr_version_block = put_block_meta(
@@ -307,9 +321,15 @@ async fn read_and_put_blocks<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
)?;
if let Some(block) = next_block {
let block = Bytes::from(block);
- md5hasher.update(block.clone());
- sha256hasher.update(block.clone());
- let block_hash = async_blake2sum(block.clone()).await;
+ let (_, _, block_hash) = futures::future::join3(
+ md5hasher.update(block.clone()),
+ sha256hasher.update(block.clone()),
+ async_blake2sum(block.clone()),
+ )
+ .with_context(Context::current_with_span(
+ tracer.start("Hash block (md5, sha256, blake2)"),
+ ))
+ .await;
let block_len = block.len();
put_curr_version_block = put_block_meta(
garage,
diff --git a/src/block/manager.rs b/src/block/manager.rs
index 890c247d..be53ec6e 100644
--- a/src/block/manager.rs
+++ b/src/block/manager.rs
@@ -93,7 +93,7 @@ pub struct BlockManager {
compression_level: Option<i32>,
background_tranquility: u32,
- mutation_lock: Mutex<BlockManagerLocked>,
+ mutation_lock: [Mutex<BlockManagerLocked>; 256],
pub(crate) rc: BlockRc,
@@ -150,8 +150,6 @@ impl BlockManager {
.netapp
.endpoint("garage_block/manager.rs/Rpc".to_string());
- let manager_locked = BlockManagerLocked();
-
let metrics = BlockManagerMetrics::new(resync_queue.clone(), resync_errors.clone());
let block_manager = Arc::new(Self {
@@ -159,7 +157,7 @@ impl BlockManager {
data_dir,
compression_level,
background_tranquility,
- mutation_lock: Mutex::new(manager_locked),
+ mutation_lock: [(); 256].map(|_| Mutex::new(BlockManagerLocked())),
rc,
resync_queue,
resync_notify: Notify::new(),
@@ -313,14 +311,21 @@ impl BlockManager {
/// Write a block to disk
async fn write_block(&self, hash: &Hash, data: &DataBlock) -> Result<BlockRpc, Error> {
+ let tracer = opentelemetry::global::tracer("garage");
+
let write_size = data.inner_buffer().len() as u64;
- let res = self
- .mutation_lock
+ let res = self.mutation_lock[hash.as_slice()[0] as usize]
.lock()
+ .with_context(Context::current_with_span(
+ tracer.start("Acquire mutation_lock"),
+ ))
.await
.write_block(hash, data, self)
.bound_record_duration(&self.metrics.block_write_duration)
+ .with_context(Context::current_with_span(
+ tracer.start("BlockManagerLocked::write_block"),
+ ))
.await?;
self.metrics.bytes_written.add(write_size);
@@ -370,7 +375,7 @@ impl BlockManager {
if data.verify(*hash).is_err() {
self.metrics.corruption_counter.add(1);
- self.mutation_lock
+ self.mutation_lock[hash.as_slice()[0] as usize]
.lock()
.await
.move_block_to_corrupted(hash, self)
@@ -384,8 +389,7 @@ impl BlockManager {
/// Check if this node should have a block, but don't actually have it
async fn need_block(&self, hash: &Hash) -> Result<bool, Error> {
- let BlockStatus { exists, needed } = self
- .mutation_lock
+ let BlockStatus { exists, needed } = self.mutation_lock[hash.as_slice()[0] as usize]
.lock()
.await
.check_block_status(hash, self)
@@ -608,8 +612,7 @@ impl BlockManager {
}
async fn resync_block(&self, hash: &Hash) -> Result<(), Error> {
- let BlockStatus { exists, needed } = self
- .mutation_lock
+ let BlockStatus { exists, needed } = self.mutation_lock[hash.as_slice()[0] as usize]
.lock()
.await
.check_block_status(hash, self)
@@ -694,7 +697,7 @@ impl BlockManager {
who.len()
);
- self.mutation_lock
+ self.mutation_lock[hash.as_slice()[0] as usize]
.lock()
.await
.delete_if_unneeded(hash, self)
diff --git a/src/model/garage.rs b/src/model/garage.rs
index 15769a17..0d239df6 100644
--- a/src/model/garage.rs
+++ b/src/model/garage.rs
@@ -104,11 +104,16 @@ impl Garage {
std::fs::create_dir_all(&db_path).expect("Unable to create LMDB data directory");
let map_size = garage_db::lmdb_adapter::recommended_map_size();
- let db = db::lmdb_adapter::heed::EnvOpenOptions::new()
- .max_dbs(100)
- .map_size(map_size)
- .open(&db_path)
- .expect("Unable to open LMDB DB");
+ use db::lmdb_adapter::heed;
+ let mut env_builder = heed::EnvOpenOptions::new();
+ env_builder.max_dbs(100);
+ env_builder.max_readers(500);
+ env_builder.map_size(map_size);
+ unsafe {
+ env_builder.flag(heed::flags::Flags::MdbNoSync);
+ env_builder.flag(heed::flags::Flags::MdbNoMetaSync);
+ }
+ let db = env_builder.open(&db_path).expect("Unable to open LMDB DB");
db::lmdb_adapter::LmdbDb::init(db)
}
e => {
diff --git a/src/util/async_hash.rs b/src/util/async_hash.rs
index 67776eb9..be0535de 100644
--- a/src/util/async_hash.rs
+++ b/src/util/async_hash.rs
@@ -1,7 +1,7 @@
use bytes::Bytes;
use digest::Digest;
-use tokio::sync::mpsc;
+use tokio::sync::{mpsc, oneshot};
use tokio::task::JoinHandle;
use crate::data::*;
@@ -27,25 +27,28 @@ pub async fn async_blake2sum(data: Bytes) -> Hash {
// ----
pub struct AsyncHasher<D: Digest> {
- sendblk: mpsc::UnboundedSender<Bytes>,
+ sendblk: mpsc::UnboundedSender<(Bytes, oneshot::Sender<()>)>,
task: JoinHandle<digest::Output<D>>,
}
impl<D: Digest> AsyncHasher<D> {
pub fn new() -> Self {
- let (sendblk, mut recvblk) = mpsc::unbounded_channel::<Bytes>();
+ let (sendblk, mut recvblk) = mpsc::unbounded_channel::<(Bytes, oneshot::Sender<()>)>();
let task = tokio::task::spawn_blocking(move || {
let mut digest = D::new();
- while let Some(blk) = recvblk.blocking_recv() {
+ while let Some((blk, ch)) = recvblk.blocking_recv() {
digest.update(&blk[..]);
+ let _ = ch.send(());
}
digest.finalize()
});
Self { sendblk, task }
}
- pub fn update(&self, b: Bytes) {
- self.sendblk.send(b).unwrap()
+ pub async fn update(&self, b: Bytes) {
+ let (tx, rx) = oneshot::channel();
+ self.sendblk.send((b, tx)).unwrap();
+ let _ = rx.await;
}
pub async fn finalize(self) -> digest::Output<D> {