From fa7c7780243e461d9b95eb18d8eff992dca8ae5b Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 16 Feb 2024 10:50:41 +0100 Subject: [reconnect-only-current] filter nodes to reconnect to do not try reconnecting to nodes received from consul/kubernetes discovery if they are not currently in the layout --- src/rpc/system.rs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'src/rpc') diff --git a/src/rpc/system.rs b/src/rpc/system.rs index de44e656..778f7fae 100644 --- a/src/rpc/system.rs +++ b/src/rpc/system.rs @@ -725,15 +725,18 @@ impl System { async fn discovery_loop(self: &Arc, mut stop_signal: watch::Receiver) { while !*stop_signal.borrow() { - let not_configured = self.ring.borrow().layout.check().is_err(); - let no_peers = self.peering.get_peer_list().len() < self.replication_factor; - let expected_n_nodes = self.ring.borrow().layout.num_nodes(); - let bad_peers = self + let n_connected = self .peering .get_peer_list() .iter() - .filter(|p| p.is_up()) - .count() != expected_n_nodes; + .filter(|x| x.is_up()) + .count(); + + let not_configured = self.ring.borrow().layout.check().is_err(); + let no_peers = n_connected < self.replication_factor; + + let expected_n_nodes = self.ring.borrow().layout.num_nodes(); + let bad_peers = n_connected != expected_n_nodes; if not_configured || no_peers || bad_peers { info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers); @@ -780,6 +783,14 @@ impl System { } } + if !not_configured && !no_peers { + // If the layout is configured, and we already have some connections + // to other nodes in the cluster, we can skip trying to connect to + // nodes that are not in the cluster layout. + let ring = self.ring.borrow(); + ping_list.retain(|(id, _)| ring.layout.node_ids().contains(&(*id).into())); + } + for (node_id, node_addr) in ping_list { let self2 = self.clone(); tokio::spawn(async move { -- cgit v1.2.3