diff options
author | Alex Auvolat <alex@adnab.me> | 2024-02-16 10:50:41 +0100 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2024-02-20 11:07:10 +0100 |
commit | e91576677e712c07cf9c47b1a0d2cc4d2d1d37cf (patch) | |
tree | b1b826d55ca11785986eb1fa2f6cd32107202833 /src | |
parent | 7be3f15e45fcfff10a45302a040c2919a3ba8ccd (diff) | |
download | garage-e91576677e712c07cf9c47b1a0d2cc4d2d1d37cf.tar.gz garage-e91576677e712c07cf9c47b1a0d2cc4d2d1d37cf.zip |
[reconnect-only-current] filter nodes to reconnect to
do not try reconnecting to nodes received from consul/kubernetes
discovery if they are not currently in the layout
Diffstat (limited to 'src')
-rw-r--r-- | src/rpc/system.rs | 21 |
1 files changed, 16 insertions, 5 deletions
diff --git a/src/rpc/system.rs b/src/rpc/system.rs index de44e656..14a101ca 100644 --- a/src/rpc/system.rs +++ b/src/rpc/system.rs @@ -725,15 +725,18 @@ impl System { async fn discovery_loop(self: &Arc<Self>, mut stop_signal: watch::Receiver<bool>) { while !*stop_signal.borrow() { - let not_configured = self.ring.borrow().layout.check().is_err(); - let no_peers = self.peering.get_peer_list().len() < self.replication_factor; - let expected_n_nodes = self.ring.borrow().layout.num_nodes(); - let bad_peers = self + let n_connected = self .peering .get_peer_list() .iter() .filter(|p| p.is_up()) - .count() != expected_n_nodes; + .count(); + + let not_configured = self.ring.borrow().layout.check().is_err(); + let no_peers = n_connected < self.replication_factor; + + let expected_n_nodes = self.ring.borrow().layout.num_nodes(); + let bad_peers = n_connected != expected_n_nodes; if not_configured || no_peers || bad_peers { info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers); @@ -780,6 +783,14 @@ impl System { } } + if !not_configured && !no_peers { + // If the layout is configured, and we already have some connections + // to other nodes in the cluster, we can skip trying to connect to + // nodes that are not in the cluster layout. + let ring = self.ring.borrow(); + ping_list.retain(|(id, _)| ring.layout.node_ids().contains(&(*id).into())); + } + for (node_id, node_addr) in ping_list { let self2 = self.clone(); tokio::spawn(async move { |