From 56592e18538b379ccaaa7b7c1990a599ac83b191 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 19 Sep 2022 20:12:19 +0200 Subject: RPC performance changes - configurable ping timeout - single, much higher, configurable RPC timeout - no more concurrency semaphore --- src/block/manager.rs | 18 +++++++++--------- src/block/resync.rs | 14 ++------------ 2 files changed, 11 insertions(+), 21 deletions(-) (limited to 'src/block') diff --git a/src/block/manager.rs b/src/block/manager.rs index ec694fc8..7f439b96 100644 --- a/src/block/manager.rs +++ b/src/block/manager.rs @@ -41,9 +41,6 @@ use crate::resync::*; /// Size under which data will be stored inlined in database instead of as files pub const INLINE_THRESHOLD: usize = 3072; -// Timeout for RPCs that read and write blocks to remote nodes -pub(crate) const BLOCK_RW_TIMEOUT: Duration = Duration::from_secs(60); - // The delay between the moment when the reference counter // drops to zero, and the moment where we allow ourselves // to delete the block locally. @@ -183,7 +180,7 @@ impl BlockManager { }; return Ok((header, stream)); } - _ = tokio::time::sleep(BLOCK_RW_TIMEOUT) => { + _ = tokio::time::sleep(self.system.rpc.rpc_timeout()) => { debug!("Node {:?} didn't return block in time, trying next.", node); } }; @@ -235,7 +232,7 @@ impl BlockManager { } } } - _ = tokio::time::sleep(BLOCK_RW_TIMEOUT) => { + _ = tokio::time::sleep(self.system.rpc.rpc_timeout()) => { debug!("Node {:?} didn't return block in time, trying next.", node); } }; @@ -300,8 +297,7 @@ impl BlockManager { &who[..], put_block_rpc, RequestStrategy::with_priority(PRIO_NORMAL | PRIO_SECONDARY) - .with_quorum(self.replication.write_quorum()) - .with_timeout(BLOCK_RW_TIMEOUT), + .with_quorum(self.replication.write_quorum()), ) .await?; @@ -336,7 +332,10 @@ impl BlockManager { // we will fecth it from someone. let this = self.clone(); tokio::spawn(async move { - if let Err(e) = this.resync.put_to_resync(&hash, 2 * BLOCK_RW_TIMEOUT) { + if let Err(e) = this + .resync + .put_to_resync(&hash, 2 * this.system.rpc.rpc_timeout()) + { error!("Block {:?} could not be put in resync queue: {}.", hash, e); } }); @@ -444,7 +443,8 @@ impl BlockManager { Ok(c) => c, Err(e) => { // Not found but maybe we should have had it ?? - self.resync.put_to_resync(hash, 2 * BLOCK_RW_TIMEOUT)?; + self.resync + .put_to_resync(hash, 2 * self.system.rpc.rpc_timeout())?; return Err(Into::into(e)); } }; diff --git a/src/block/resync.rs b/src/block/resync.rs index bde3e98c..ada3ac54 100644 --- a/src/block/resync.rs +++ b/src/block/resync.rs @@ -33,14 +33,6 @@ use garage_table::replication::TableReplication; use crate::manager::*; -// Timeout for RPCs that ask other nodes whether they need a copy -// of a given block before we delete it locally -// The timeout here is relatively low because we don't want to block -// the entire resync loop when some nodes are not responding. -// Nothing will be deleted if the nodes don't answer the queries, -// we will just retry later. -const NEED_BLOCK_QUERY_TIMEOUT: Duration = Duration::from_secs(15); - // The delay between the time where a resync operation fails // and the time when it is retried, with exponential backoff // (multiplied by 2, 4, 8, 16, etc. for every consecutive failure). @@ -346,8 +338,7 @@ impl BlockResyncManager { &manager.endpoint, &who, BlockRpc::NeedBlockQuery(*hash), - RequestStrategy::with_priority(PRIO_BACKGROUND) - .with_timeout(NEED_BLOCK_QUERY_TIMEOUT), + RequestStrategy::with_priority(PRIO_BACKGROUND), ) .await?; @@ -394,8 +385,7 @@ impl BlockResyncManager { &need_nodes[..], put_block_message, RequestStrategy::with_priority(PRIO_BACKGROUND) - .with_quorum(need_nodes.len()) - .with_timeout(BLOCK_RW_TIMEOUT), + .with_quorum(need_nodes.len()), ) .await .err_context("PutBlock RPC")?; -- cgit v1.2.3