From 56592e18538b379ccaaa7b7c1990a599ac83b191 Mon Sep 17 00:00:00 2001
From: Alex Auvolat <alex@adnab.me>
Date: Mon, 19 Sep 2022 20:12:19 +0200
Subject: RPC performance changes

- configurable ping timeout
- single, much higher, configurable RPC timeout
- no more concurrency semaphore
---
 src/block/manager.rs | 18 +++++++++---------
 src/block/resync.rs  | 14 ++------------
 2 files changed, 11 insertions(+), 21 deletions(-)

(limited to 'src/block')

diff --git a/src/block/manager.rs b/src/block/manager.rs
index ec694fc8..7f439b96 100644
--- a/src/block/manager.rs
+++ b/src/block/manager.rs
@@ -41,9 +41,6 @@ use crate::resync::*;
 /// Size under which data will be stored inlined in database instead of as files
 pub const INLINE_THRESHOLD: usize = 3072;
 
-// Timeout for RPCs that read and write blocks to remote nodes
-pub(crate) const BLOCK_RW_TIMEOUT: Duration = Duration::from_secs(60);
-
 // The delay between the moment when the reference counter
 // drops to zero, and the moment where we allow ourselves
 // to delete the block locally.
@@ -183,7 +180,7 @@ impl BlockManager {
 					};
 					return Ok((header, stream));
 				}
-				_ = tokio::time::sleep(BLOCK_RW_TIMEOUT) => {
+				_ = tokio::time::sleep(self.system.rpc.rpc_timeout()) => {
 					debug!("Node {:?} didn't return block in time, trying next.", node);
 				}
 			};
@@ -235,7 +232,7 @@ impl BlockManager {
 						}
 					}
 				}
-				_ = tokio::time::sleep(BLOCK_RW_TIMEOUT) => {
+				_ = tokio::time::sleep(self.system.rpc.rpc_timeout()) => {
 					debug!("Node {:?} didn't return block in time, trying next.", node);
 				}
 			};
@@ -300,8 +297,7 @@ impl BlockManager {
 				&who[..],
 				put_block_rpc,
 				RequestStrategy::with_priority(PRIO_NORMAL | PRIO_SECONDARY)
-					.with_quorum(self.replication.write_quorum())
-					.with_timeout(BLOCK_RW_TIMEOUT),
+					.with_quorum(self.replication.write_quorum()),
 			)
 			.await?;
 
@@ -336,7 +332,10 @@ impl BlockManager {
 			// we will fecth it from someone.
 			let this = self.clone();
 			tokio::spawn(async move {
-				if let Err(e) = this.resync.put_to_resync(&hash, 2 * BLOCK_RW_TIMEOUT) {
+				if let Err(e) = this
+					.resync
+					.put_to_resync(&hash, 2 * this.system.rpc.rpc_timeout())
+				{
 					error!("Block {:?} could not be put in resync queue: {}.", hash, e);
 				}
 			});
@@ -444,7 +443,8 @@ impl BlockManager {
 			Ok(c) => c,
 			Err(e) => {
 				// Not found but maybe we should have had it ??
-				self.resync.put_to_resync(hash, 2 * BLOCK_RW_TIMEOUT)?;
+				self.resync
+					.put_to_resync(hash, 2 * self.system.rpc.rpc_timeout())?;
 				return Err(Into::into(e));
 			}
 		};
diff --git a/src/block/resync.rs b/src/block/resync.rs
index bde3e98c..ada3ac54 100644
--- a/src/block/resync.rs
+++ b/src/block/resync.rs
@@ -33,14 +33,6 @@ use garage_table::replication::TableReplication;
 
 use crate::manager::*;
 
-// Timeout for RPCs that ask other nodes whether they need a copy
-// of a given block before we delete it locally
-// The timeout here is relatively low because we don't want to block
-// the entire resync loop when some nodes are not responding.
-// Nothing will be deleted if the nodes don't answer the queries,
-// we will just retry later.
-const NEED_BLOCK_QUERY_TIMEOUT: Duration = Duration::from_secs(15);
-
 // The delay between the time where a resync operation fails
 // and the time when it is retried, with exponential backoff
 // (multiplied by 2, 4, 8, 16, etc. for every consecutive failure).
@@ -346,8 +338,7 @@ impl BlockResyncManager {
 					&manager.endpoint,
 					&who,
 					BlockRpc::NeedBlockQuery(*hash),
-					RequestStrategy::with_priority(PRIO_BACKGROUND)
-						.with_timeout(NEED_BLOCK_QUERY_TIMEOUT),
+					RequestStrategy::with_priority(PRIO_BACKGROUND),
 				)
 				.await?;
 
@@ -394,8 +385,7 @@ impl BlockResyncManager {
 						&need_nodes[..],
 						put_block_message,
 						RequestStrategy::with_priority(PRIO_BACKGROUND)
-							.with_quorum(need_nodes.len())
-							.with_timeout(BLOCK_RW_TIMEOUT),
+							.with_quorum(need_nodes.len()),
 					)
 					.await
 					.err_context("PutBlock RPC")?;
-- 
cgit v1.2.3