From 8e7e680afe39f48fe15f365c9ef3fee57596e119 Mon Sep 17 00:00:00 2001
From: Alex Auvolat <alex@adnab.me>
Date: Fri, 22 Jul 2022 15:20:00 +0200
Subject: First adaptation to WIP netapp with streaming body

---
 src/rpc/rpc_helper.rs | 71 ++++++++++++++++++++++-----------------------------
 1 file changed, 30 insertions(+), 41 deletions(-)

(limited to 'src/rpc/rpc_helper.rs')
diff --git a/src/rpc/rpc_helper.rs b/src/rpc/rpc_helper.rs
index 34717d3b..079cdc70 100644
--- a/src/rpc/rpc_helper.rs
+++ b/src/rpc/rpc_helper.rs
@@ -15,9 +15,9 @@ use opentelemetry::{
 	Context,
 };
 
-pub use netapp::endpoint::{Endpoint, EndpointHandler, Message as Rpc};
+pub use netapp::endpoint::{Endpoint, EndpointHandler};
+pub use netapp::message::{Message as Rpc, *};
 use netapp::peering::fullmesh::FullMeshPeeringStrategy;
-pub use netapp::proto::*;
 pub use netapp::{NetApp, NodeID};
 
 use garage_util::background::BackgroundRunner;
@@ -30,10 +30,8 @@ use crate::ring::Ring;
 
 const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
 
-// Try to never have more than 200MB of outgoing requests
-// buffered at the same time. Other requests are queued until
-// space is freed.
-const REQUEST_BUFFER_SIZE: usize = 200 * 1024 * 1024;
+// Don't allow more than 100 concurrent outgoing RPCs.
+const MAX_CONCURRENT_REQUESTS: usize = 100;
 
 /// Strategy to apply when making RPC
 #[derive(Copy, Clone)]
@@ -95,7 +93,7 @@ impl RpcHelper {
 		background: Arc<BackgroundRunner>,
 		ring: watch::Receiver<Arc<Ring>>,
 	) -> Self {
-		let sem = Arc::new(Semaphore::new(REQUEST_BUFFER_SIZE));
+		let sem = Arc::new(Semaphore::new(MAX_CONCURRENT_REQUESTS));
 
 		let metrics = RpcMetrics::new(sem.clone());
 
@@ -109,29 +107,16 @@ impl RpcHelper {
 		}))
 	}
 
-	pub async fn call<M, H, S>(
+	pub async fn call<M, N, H, S>(
 		&self,
 		endpoint: &Endpoint<M, H>,
 		to: Uuid,
-		msg: M,
-		strat: RequestStrategy,
-	) -> Result<S, Error>
-	where
-		M: Rpc<Response = Result<S, Error>>,
-		H: EndpointHandler<M>,
-	{
-		self.call_arc(endpoint, to, Arc::new(msg), strat).await
-	}
-
-	pub async fn call_arc<M, H, S>(
-		&self,
-		endpoint: &Endpoint<M, H>,
-		to: Uuid,
-		msg: Arc<M>,
+		msg: N,
 		strat: RequestStrategy,
 	) -> Result<S, Error>
 	where
 		M: Rpc<Response = Result<S, Error>>,
+		N: IntoReq<M> + Send,
 		H: EndpointHandler<M>,
 	{
 		let metric_tags = [
@@ -140,11 +125,10 @@ impl RpcHelper {
 			KeyValue::new("to", format!("{:?}", to)),
 		];
 
-		let msg_size = rmp_to_vec_all_named(&msg)?.len() as u32;
 		let permit = self
 			.0
 			.request_buffer_semaphore
-			.acquire_many(msg_size)
+			.acquire()
 			.record_duration(&self.0.metrics.rpc_queueing_time, &metric_tags)
 			.await?;
 
@@ -152,7 +136,7 @@ impl RpcHelper {
 
 		let node_id = to.into();
 		let rpc_call = endpoint
-			.call(&node_id, msg, strat.rs_priority)
+			.call_streaming(&node_id, msg, strat.rs_priority)
 			.record_duration(&self.0.metrics.rpc_duration, &metric_tags);
 
 		select! {
@@ -162,7 +146,7 @@ impl RpcHelper {
 				if res.is_err() {
 					self.0.metrics.rpc_netapp_error_counter.add(1, &metric_tags);
 				}
-				let res = res?;
+				let res = res?.into_msg();
 
 				if res.is_err() {
 					self.0.metrics.rpc_garage_error_counter.add(1, &metric_tags);
@@ -178,37 +162,41 @@ impl RpcHelper {
 		}
 	}
 
-	pub async fn call_many<M, H, S>(
+	pub async fn call_many<M, N, H, S>(
 		&self,
 		endpoint: &Endpoint<M, H>,
 		to: &[Uuid],
-		msg: M,
+		msg: N,
 		strat: RequestStrategy,
-	) -> Vec<(Uuid, Result<S, Error>)>
+	) -> Result<Vec<(Uuid, Result<S, Error>)>, Error>
 	where
 		M: Rpc<Response = Result<S, Error>>,
+		N: IntoReq<M>,
 		H: EndpointHandler<M>,
 	{
-		let msg = Arc::new(msg);
+		let msg = msg.into_req().map_err(netapp::error::Error::from)?;
+
 		let resps = join_all(
 			to.iter()
-				.map(|to| self.call_arc(endpoint, *to, msg.clone(), strat)),
+				.map(|to| self.call(endpoint, *to, msg.clone(), strat)),
 		)
 		.await;
-		to.iter()
+		Ok(to
+			.iter()
 			.cloned()
 			.zip(resps.into_iter())
-			.collect::<Vec<_>>()
+			.collect::<Vec<_>>())
 	}
 
-	pub async fn broadcast<M, H, S>(
+	pub async fn broadcast<M, N, H, S>(
 		&self,
 		endpoint: &Endpoint<M, H>,
-		msg: M,
+		msg: N,
 		strat: RequestStrategy,
-	) -> Vec<(Uuid, Result<S, Error>)>
+	) -> Result<Vec<(Uuid, Result<S, Error>)>, Error>
 	where
 		M: Rpc<Response = Result<S, Error>>,
+		N: IntoReq<M>,
 		H: EndpointHandler<M>,
 	{
 		let to = self
@@ -262,20 +250,21 @@ impl RpcHelper {
 			.await
 	}
 
-	async fn try_call_many_internal<M, H, S>(
+	async fn try_call_many_internal<M, N, H, S>(
 		&self,
 		endpoint: &Arc<Endpoint<M, H>>,
 		to: &[Uuid],
-		msg: M,
+		msg: N,
 		strategy: RequestStrategy,
 		quorum: usize,
 	) -> Result<Vec<S>, Error>
 	where
 		M: Rpc<Response = Result<S, Error>> + 'static,
+		N: IntoReq<M>,
 		H: EndpointHandler<M> + 'static,
 		S: Send + 'static,
 	{
-		let msg = Arc::new(msg);
+		let msg = msg.into_req().map_err(netapp::error::Error::from)?;
 
 		// Build future for each request
 		// They are not started now: they are added below in a FuturesUnordered
@@ -285,7 +274,7 @@ impl RpcHelper {
 			let msg = msg.clone();
 			let endpoint2 = endpoint.clone();
 			(to, async move {
-				self2.call_arc(&endpoint2, to, msg, strategy).await
+				self2.call(&endpoint2, to, msg, strategy).await
 			})
 		});
 
-- 
cgit v1.2.3


From 605a630333c8ee60c55fe011a375c01277bba173 Mon Sep 17 00:00:00 2001
From: Alex Auvolat <alex@adnab.me>
Date: Fri, 22 Jul 2022 18:20:27 +0200
Subject: Use streaming in block manager

---
 src/rpc/rpc_helper.rs | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'src/rpc/rpc_helper.rs')

diff --git a/src/rpc/rpc_helper.rs b/src/rpc/rpc_helper.rs
index 079cdc70..6e098446 100644
--- a/src/rpc/rpc_helper.rs
+++ b/src/rpc/rpc_helper.rs
@@ -15,10 +15,13 @@ use opentelemetry::{
 	Context,
 };
 
-pub use netapp::endpoint::{Endpoint, EndpointHandler};
-pub use netapp::message::{Message as Rpc, *};
+pub use netapp::endpoint::{Endpoint, EndpointHandler, StreamingEndpointHandler};
+use netapp::message::IntoReq;
+pub use netapp::message::{
+	Message as Rpc, Req, RequestPriority, Resp, PRIO_BACKGROUND, PRIO_HIGH, PRIO_NORMAL,
+};
 use netapp::peering::fullmesh::FullMeshPeeringStrategy;
-pub use netapp::{NetApp, NodeID};
+pub use netapp::{self, NetApp, NodeID};
 
 use garage_util::background::BackgroundRunner;
 use garage_util::data::*;
@@ -117,7 +120,7 @@ impl RpcHelper {
 	where
 		M: Rpc<Response = Result<S, Error>>,
 		N: IntoReq<M> + Send,
-		H: EndpointHandler<M>,
+		H: StreamingEndpointHandler<M>,
 	{
 		let metric_tags = [
 			KeyValue::new("rpc_endpoint", endpoint.path().to_string()),
@@ -172,7 +175,7 @@ impl RpcHelper {
 	where
 		M: Rpc<Response = Result<S, Error>>,
 		N: IntoReq<M>,
-		H: EndpointHandler<M>,
+		H: StreamingEndpointHandler<M>,
 	{
 		let msg = msg.into_req().map_err(netapp::error::Error::from)?;
 
@@ -197,7 +200,7 @@ impl RpcHelper {
 	where
 		M: Rpc<Response = Result<S, Error>>,
 		N: IntoReq<M>,
-		H: EndpointHandler<M>,
+		H: StreamingEndpointHandler<M>,
 	{
 		let to = self
 			.0
@@ -211,16 +214,17 @@ impl RpcHelper {
 
 	/// Make a RPC call to multiple servers, returning either a Vec of responses,
 	/// or an error if quorum could not be reached due to too many errors
-	pub async fn try_call_many<M, H, S>(
+	pub async fn try_call_many<M, N, H, S>(
 		&self,
 		endpoint: &Arc<Endpoint<M, H>>,
 		to: &[Uuid],
-		msg: M,
+		msg: N,
 		strategy: RequestStrategy,
 	) -> Result<Vec<S>, Error>
 	where
 		M: Rpc<Response = Result<S, Error>> + 'static,
-		H: EndpointHandler<M> + 'static,
+		N: IntoReq<M>,
+		H: StreamingEndpointHandler<M> + 'static,
 		S: Send + 'static,
 	{
 		let quorum = strategy.rs_quorum.unwrap_or(to.len());
@@ -261,7 +265,7 @@ impl RpcHelper {
 	where
 		M: Rpc<Response = Result<S, Error>> + 'static,
 		N: IntoReq<M>,
-		H: EndpointHandler<M> + 'static,
+		H: StreamingEndpointHandler<M> + 'static,
 		S: Send + 'static,
 	{
 		let msg = msg.into_req().map_err(netapp::error::Error::from)?;
-- 
cgit v1.2.3


From e935861854deed5d1ca66767fc51d9849201a4dd Mon Sep 17 00:00:00 2001
From: Alex Auvolat <alex@adnab.me>
Date: Mon, 25 Jul 2022 18:19:35 +0200
Subject: Factor out node request order selection logic & use in manager

---
 src/rpc/rpc_helper.rs | 95 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 56 insertions(+), 39 deletions(-)

(limited to 'src/rpc/rpc_helper.rs')

diff --git a/src/rpc/rpc_helper.rs b/src/rpc/rpc_helper.rs
index 6e098446..ddabd636 100644
--- a/src/rpc/rpc_helper.rs
+++ b/src/rpc/rpc_helper.rs
@@ -292,47 +292,19 @@ impl RpcHelper {
 			// to reach a quorum, priorizing nodes with the lowest latency.
 			// When there are errors, we start new requests to compensate.
 
-			// Retrieve some status variables that we will use to sort requests
-			let peer_list = self.0.fullmesh.get_peer_list();
-			let ring: Arc<Ring> = self.0.ring.borrow().clone();
-			let our_zone = match ring.layout.node_role(&self.0.our_node_id) {
-				Some(pc) => &pc.zone,
-				None => "",
-			};
-
-			// Augment requests with some information used to sort them.
-			// The tuples are as follows:
-			//         (is another node?, is another zone?, latency, node ID, request future)
-			// We store all of these tuples in a vec that we can sort.
-			// By sorting this vec, we priorize ourself, then nodes in the same zone,
-			// and within a same zone we priorize nodes with the lowest latency.
-			let mut requests = requests
-				.map(|(to, fut)| {
-					let peer_zone = match ring.layout.node_role(&to) {
-						Some(pc) => &pc.zone,
-						None => "",
-					};
-					let peer_avg_ping = peer_list
-						.iter()
-						.find(|x| x.id.as_ref() == to.as_slice())
-						.and_then(|pi| pi.avg_ping)
-						.unwrap_or_else(|| Duration::from_secs(1));
-					(
-						to != self.0.our_node_id,
-						peer_zone != our_zone,
-						peer_avg_ping,
-						to,
-						fut,
-					)
-				})
+			// Reorder requests to priorize closeness / low latency
+			let request_order = self.request_order(to);
+			let mut ord_requests = vec![(); request_order.len()]
+				.into_iter()
+				.map(|_| None)
 				.collect::<Vec<_>>();
-
-			// Sort requests by (priorize ourself, priorize same zone, priorize low latency)
-			requests
-				.sort_by_key(|(diffnode, diffzone, ping, _to, _fut)| (*diffnode, *diffzone, *ping));
+			for (to, fut) in requests {
+				let i = request_order.iter().position(|x| *x == to).unwrap();
+				ord_requests[i] = Some((to, fut));
+			}
 
 			// Make an iterator to take requests in their sorted order
-			let mut requests = requests.into_iter();
+			let mut requests = ord_requests.into_iter().map(Option::unwrap);
 
 			// resp_stream will contain all of the requests that are currently in flight.
 			// (for the moment none, they will be added in the loop below)
@@ -343,7 +315,7 @@ impl RpcHelper {
 				// If the current set of requests that are running is not enough to possibly
 				// reach quorum, start some new requests.
 				while successes.len() + resp_stream.len() < quorum {
-					if let Some((_, _, _, req_to, fut)) = requests.next() {
+					if let Some((req_to, fut)) = requests.next() {
 						let tracer = opentelemetry::global::tracer("garage");
 						let span = tracer.start(format!("RPC to {:?}", req_to));
 						resp_stream.push(tokio::spawn(
@@ -413,4 +385,49 @@ impl RpcHelper {
 			Err(Error::Quorum(quorum, successes.len(), to.len(), errors))
 		}
 	}
+
+	pub fn request_order(&self, nodes: &[Uuid]) -> Vec<Uuid> {
+		// Retrieve some status variables that we will use to sort requests
+		let peer_list = self.0.fullmesh.get_peer_list();
+		let ring: Arc<Ring> = self.0.ring.borrow().clone();
+		let our_zone = match ring.layout.node_role(&self.0.our_node_id) {
+			Some(pc) => &pc.zone,
+			None => "",
+		};
+
+		// Augment requests with some information used to sort them.
+		// The tuples are as follows:
+		//         (is another node?, is another zone?, latency, node ID, request future)
+		// We store all of these tuples in a vec that we can sort.
+		// By sorting this vec, we priorize ourself, then nodes in the same zone,
+		// and within a same zone we priorize nodes with the lowest latency.
+		let mut nodes = nodes
+			.iter()
+			.map(|to| {
+				let peer_zone = match ring.layout.node_role(&to) {
+					Some(pc) => &pc.zone,
+					None => "",
+				};
+				let peer_avg_ping = peer_list
+					.iter()
+					.find(|x| x.id.as_ref() == to.as_slice())
+					.and_then(|pi| pi.avg_ping)
+					.unwrap_or_else(|| Duration::from_secs(1));
+				(
+					*to != self.0.our_node_id,
+					peer_zone != our_zone,
+					peer_avg_ping,
+					*to,
+				)
+			})
+			.collect::<Vec<_>>();
+
+		// Sort requests by (priorize ourself, priorize same zone, priorize low latency)
+		nodes.sort_by_key(|(diffnode, diffzone, ping, _to)| (*diffnode, *diffzone, *ping));
+
+		nodes
+			.into_iter()
+			.map(|(_, _, _, to)| to)
+			.collect::<Vec<_>>()
+	}
 }
-- 
cgit v1.2.3


From 322dafc761295df45c081183c5fc059a750a3249 Mon Sep 17 00:00:00 2001
From: Alex Auvolat <alex@adnab.me>
Date: Mon, 29 Aug 2022 17:32:45 +0200
Subject: Try to fix clippy

---
 src/rpc/rpc_helper.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/rpc/rpc_helper.rs')

diff --git a/src/rpc/rpc_helper.rs b/src/rpc/rpc_helper.rs
index ddabd636..216fffd4 100644
--- a/src/rpc/rpc_helper.rs
+++ b/src/rpc/rpc_helper.rs
@@ -404,7 +404,7 @@ impl RpcHelper {
 		let mut nodes = nodes
 			.iter()
 			.map(|to| {
-				let peer_zone = match ring.layout.node_role(&to) {
+				let peer_zone = match ring.layout.node_role(to) {
 					Some(pc) => &pc.zone,
 					None => "",
 				};
-- 
cgit v1.2.3


From bc977f9a7a7a5bd87ccf5fe96d64b397591f8ba0 Mon Sep 17 00:00:00 2001
From: Alex Auvolat <alex@adnab.me>
Date: Thu, 1 Sep 2022 12:58:20 +0200
Subject: Update to Netapp with OrderTag support and exploit OrderTags

---
 src/rpc/rpc_helper.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/rpc/rpc_helper.rs')

diff --git a/src/rpc/rpc_helper.rs b/src/rpc/rpc_helper.rs
index 216fffd4..6c79c502 100644
--- a/src/rpc/rpc_helper.rs
+++ b/src/rpc/rpc_helper.rs
@@ -18,7 +18,7 @@ use opentelemetry::{
 pub use netapp::endpoint::{Endpoint, EndpointHandler, StreamingEndpointHandler};
 use netapp::message::IntoReq;
 pub use netapp::message::{
-	Message as Rpc, Req, RequestPriority, Resp, PRIO_BACKGROUND, PRIO_HIGH, PRIO_NORMAL,
+	Message as Rpc, OrderTag, Req, RequestPriority, Resp, PRIO_BACKGROUND, PRIO_HIGH, PRIO_NORMAL,
 };
 use netapp::peering::fullmesh::FullMeshPeeringStrategy;
 pub use netapp::{self, NetApp, NodeID};
-- 
cgit v1.2.3


From df094bd8075332bb765b8b44c9b19cf2485e9ca8 Mon Sep 17 00:00:00 2001
From: Alex Auvolat <alex@adnab.me>
Date: Thu, 1 Sep 2022 16:30:44 +0200
Subject: Less strict timeouts

---
 src/rpc/rpc_helper.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/rpc/rpc_helper.rs')

diff --git a/src/rpc/rpc_helper.rs b/src/rpc/rpc_helper.rs
index 6c79c502..e9575261 100644
--- a/src/rpc/rpc_helper.rs
+++ b/src/rpc/rpc_helper.rs
@@ -31,7 +31,7 @@ use garage_util::metrics::RecordDuration;
 use crate::metrics::RpcMetrics;
 use crate::ring::Ring;
 
-const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
+const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
 
 // Don't allow more than 100 concurrent outgoing RPCs.
 const MAX_CONCURRENT_REQUESTS: usize = 100;
-- 
cgit v1.2.3


From 99b532b85bf35b5acf621c229fb991825f3d994c Mon Sep 17 00:00:00 2001
From: Alex Auvolat <alex@adnab.me>
Date: Thu, 1 Sep 2022 16:35:43 +0200
Subject: Apply PRIO_SECONDARY to block data transfers

---
 src/rpc/rpc_helper.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/rpc/rpc_helper.rs')

diff --git a/src/rpc/rpc_helper.rs b/src/rpc/rpc_helper.rs
index e9575261..aa204c5e 100644
--- a/src/rpc/rpc_helper.rs
+++ b/src/rpc/rpc_helper.rs
@@ -18,7 +18,7 @@ use opentelemetry::{
 pub use netapp::endpoint::{Endpoint, EndpointHandler, StreamingEndpointHandler};
 use netapp::message::IntoReq;
 pub use netapp::message::{
-	Message as Rpc, OrderTag, Req, RequestPriority, Resp, PRIO_BACKGROUND, PRIO_HIGH, PRIO_NORMAL,
+	Message as Rpc, OrderTag, Req, RequestPriority, Resp, PRIO_BACKGROUND, PRIO_HIGH, PRIO_NORMAL, PRIO_SECONDARY
 };
 use netapp::peering::fullmesh::FullMeshPeeringStrategy;
 pub use netapp::{self, NetApp, NodeID};
-- 
cgit v1.2.3


From 1ef87ac4cb676113e86fc16a9eb27546d9a737bd Mon Sep 17 00:00:00 2001
From: Alex Auvolat <alex@adnab.me>
Date: Fri, 2 Sep 2022 13:38:29 +0200
Subject: cargo fmt

---
 src/rpc/rpc_helper.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/rpc/rpc_helper.rs')

diff --git a/src/rpc/rpc_helper.rs b/src/rpc/rpc_helper.rs
index aa204c5e..19abb4c5 100644
--- a/src/rpc/rpc_helper.rs
+++ b/src/rpc/rpc_helper.rs
@@ -18,7 +18,8 @@ use opentelemetry::{
 pub use netapp::endpoint::{Endpoint, EndpointHandler, StreamingEndpointHandler};
 use netapp::message::IntoReq;
 pub use netapp::message::{
-	Message as Rpc, OrderTag, Req, RequestPriority, Resp, PRIO_BACKGROUND, PRIO_HIGH, PRIO_NORMAL, PRIO_SECONDARY
+	Message as Rpc, OrderTag, Req, RequestPriority, Resp, PRIO_BACKGROUND, PRIO_HIGH, PRIO_NORMAL,
+	PRIO_SECONDARY,
 };
 use netapp::peering::fullmesh::FullMeshPeeringStrategy;
 pub use netapp::{self, NetApp, NodeID};
-- 
cgit v1.2.3


From 56592e18538b379ccaaa7b7c1990a599ac83b191 Mon Sep 17 00:00:00 2001
From: Alex Auvolat <alex@adnab.me>
Date: Mon, 19 Sep 2022 20:12:19 +0200
Subject: RPC performance changes

- configurable ping timeout
- single, much higher, configurable RPC timeout
- no more concurrency semaphore
---
 src/rpc/rpc_helper.rs | 57 +++++++++++++++++++++++++--------------------------
 1 file changed, 28 insertions(+), 29 deletions(-)

(limited to 'src/rpc/rpc_helper.rs')

diff --git a/src/rpc/rpc_helper.rs b/src/rpc/rpc_helper.rs
index 19abb4c5..857ed620 100644
--- a/src/rpc/rpc_helper.rs
+++ b/src/rpc/rpc_helper.rs
@@ -7,7 +7,7 @@ use futures::stream::futures_unordered::FuturesUnordered;
 use futures::stream::StreamExt;
 use futures_util::future::FutureExt;
 use tokio::select;
-use tokio::sync::{watch, Semaphore};
+use tokio::sync::watch;
 
 use opentelemetry::KeyValue;
 use opentelemetry::{
@@ -32,32 +32,30 @@ use garage_util::metrics::RecordDuration;
 use crate::metrics::RpcMetrics;
 use crate::ring::Ring;
 
-const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
-
-// Don't allow more than 100 concurrent outgoing RPCs.
-const MAX_CONCURRENT_REQUESTS: usize = 100;
+// Default RPC timeout = 5 minutes
+const DEFAULT_TIMEOUT: Duration = Duration::from_secs(300);
 
 /// Strategy to apply when making RPC
 #[derive(Copy, Clone)]
 pub struct RequestStrategy {
-	/// Max time to wait for reponse
-	pub rs_timeout: Duration,
 	/// Min number of response to consider the request successful
 	pub rs_quorum: Option<usize>,
 	/// Should requests be dropped after enough response are received
 	pub rs_interrupt_after_quorum: bool,
 	/// Request priority
 	pub rs_priority: RequestPriority,
+	/// Deactivate timeout for this request
+	pub rs_no_timeout: bool,
 }
 
 impl RequestStrategy {
 	/// Create a RequestStrategy with default timeout and not interrupting when quorum reached
 	pub fn with_priority(prio: RequestPriority) -> Self {
 		RequestStrategy {
-			rs_timeout: DEFAULT_TIMEOUT,
 			rs_quorum: None,
 			rs_interrupt_after_quorum: false,
 			rs_priority: prio,
+			rs_no_timeout: false,
 		}
 	}
 	/// Set quorum to be reached for request
@@ -65,17 +63,17 @@ impl RequestStrategy {
 		self.rs_quorum = Some(quorum);
 		self
 	}
-	/// Set timeout of the strategy
-	pub fn with_timeout(mut self, timeout: Duration) -> Self {
-		self.rs_timeout = timeout;
-		self
-	}
 	/// Set if requests can be dropped after quorum has been reached
 	/// In general true for read requests, and false for write
 	pub fn interrupt_after_quorum(mut self, interrupt: bool) -> Self {
 		self.rs_interrupt_after_quorum = interrupt;
 		self
 	}
+	/// Deactivate timeout for this request
+	pub fn without_timeout(mut self) -> Self {
+		self.rs_no_timeout = true;
+		self
+	}
 }
 
 #[derive(Clone)]
@@ -86,8 +84,8 @@ struct RpcHelperInner {
 	fullmesh: Arc<FullMeshPeeringStrategy>,
 	background: Arc<BackgroundRunner>,
 	ring: watch::Receiver<Arc<Ring>>,
-	request_buffer_semaphore: Arc<Semaphore>,
 	metrics: RpcMetrics,
+	rpc_timeout: Duration,
 }
 
 impl RpcHelper {
@@ -96,21 +94,24 @@ impl RpcHelper {
 		fullmesh: Arc<FullMeshPeeringStrategy>,
 		background: Arc<BackgroundRunner>,
 		ring: watch::Receiver<Arc<Ring>>,
+		rpc_timeout: Option<Duration>,
 	) -> Self {
-		let sem = Arc::new(Semaphore::new(MAX_CONCURRENT_REQUESTS));
-
-		let metrics = RpcMetrics::new(sem.clone());
+		let metrics = RpcMetrics::new();
 
 		Self(Arc::new(RpcHelperInner {
 			our_node_id,
 			fullmesh,
 			background,
 			ring,
-			request_buffer_semaphore: sem,
 			metrics,
+			rpc_timeout: rpc_timeout.unwrap_or(DEFAULT_TIMEOUT),
 		}))
 	}
 
+	pub fn rpc_timeout(&self) -> Duration {
+		self.0.rpc_timeout
+	}
+
 	pub async fn call<M, N, H, S>(
 		&self,
 		endpoint: &Endpoint<M, H>,
@@ -129,13 +130,6 @@ impl RpcHelper {
 			KeyValue::new("to", format!("{:?}", to)),
 		];
 
-		let permit = self
-			.0
-			.request_buffer_semaphore
-			.acquire()
-			.record_duration(&self.0.metrics.rpc_queueing_time, &metric_tags)
-			.await?;
-
 		self.0.metrics.rpc_counter.add(1, &metric_tags);
 
 		let node_id = to.into();
@@ -143,10 +137,16 @@ impl RpcHelper {
 			.call_streaming(&node_id, msg, strat.rs_priority)
 			.record_duration(&self.0.metrics.rpc_duration, &metric_tags);
 
+		let timeout = async {
+			if strat.rs_no_timeout {
+				futures::future::pending().await
+			} else {
+				tokio::time::sleep(self.0.rpc_timeout).await
+			}
+		};
+
 		select! {
 			res = rpc_call => {
-				drop(permit);
-
 				if res.is_err() {
 					self.0.metrics.rpc_netapp_error_counter.add(1, &metric_tags);
 				}
@@ -158,8 +158,7 @@ impl RpcHelper {
 
 				Ok(res?)
 			}
-			_ = tokio::time::sleep(strat.rs_timeout) => {
-				drop(permit);
+			() = timeout => {
 				self.0.metrics.rpc_timeout_counter.add(1, &metric_tags);
 				Err(Error::Timeout)
 			}
-- 
cgit v1.2.3


From ded444f6c96f8ab991e762f65760b42e4d64246c Mon Sep 17 00:00:00 2001
From: Alex Auvolat <alex@adnab.me>
Date: Tue, 20 Sep 2022 16:01:41 +0200
Subject: Ability to have custom timeouts in request strategy (not used)

---
 src/rpc/rpc_helper.rs | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

(limited to 'src/rpc/rpc_helper.rs')

diff --git a/src/rpc/rpc_helper.rs b/src/rpc/rpc_helper.rs
index 857ed620..949aced6 100644
--- a/src/rpc/rpc_helper.rs
+++ b/src/rpc/rpc_helper.rs
@@ -44,8 +44,15 @@ pub struct RequestStrategy {
 	pub rs_interrupt_after_quorum: bool,
 	/// Request priority
 	pub rs_priority: RequestPriority,
-	/// Deactivate timeout for this request
-	pub rs_no_timeout: bool,
+	/// Custom timeout for this request
+	rs_timeout: Timeout,
+}
+
+#[derive(Copy, Clone)]
+enum Timeout {
+	None,
+	Default,
+	Custom(Duration),
 }
 
 impl RequestStrategy {
@@ -55,7 +62,7 @@ impl RequestStrategy {
 			rs_quorum: None,
 			rs_interrupt_after_quorum: false,
 			rs_priority: prio,
-			rs_no_timeout: false,
+			rs_timeout: Timeout::Default,
 		}
 	}
 	/// Set quorum to be reached for request
@@ -71,7 +78,12 @@ impl RequestStrategy {
 	}
 	/// Deactivate timeout for this request
 	pub fn without_timeout(mut self) -> Self {
-		self.rs_no_timeout = true;
+		self.rs_timeout = Timeout::None;
+		self
+	}
+	/// Set custom timeout for this request
+	pub fn with_custom_timeout(mut self, timeout: Duration) -> Self {
+		self.rs_timeout = Timeout::Custom(timeout);
 		self
 	}
 }
@@ -138,10 +150,10 @@ impl RpcHelper {
 			.record_duration(&self.0.metrics.rpc_duration, &metric_tags);
 
 		let timeout = async {
-			if strat.rs_no_timeout {
-				futures::future::pending().await
-			} else {
-				tokio::time::sleep(self.0.rpc_timeout).await
+			match strat.rs_timeout {
+				Timeout::None => futures::future::pending().await,
+				Timeout::Default => tokio::time::sleep(self.0.rpc_timeout).await,
+				Timeout::Custom(t) => tokio::time::sleep(t).await,
 			}
 		};
 
@@ -412,7 +424,7 @@ impl RpcHelper {
 					.iter()
 					.find(|x| x.id.as_ref() == to.as_slice())
 					.and_then(|pi| pi.avg_ping)
-					.unwrap_or_else(|| Duration::from_secs(1));
+					.unwrap_or_else(|| Duration::from_secs(10));
 				(
 					*to != self.0.our_node_id,
 					peer_zone != our_zone,
-- 
cgit v1.2.3