diff options
author | Alex Auvolat <alex@adnab.me> | 2024-02-16 10:50:41 +0100 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2024-02-16 11:04:14 +0100 |
commit | fa7c7780243e461d9b95eb18d8eff992dca8ae5b (patch) | |
tree | 97d7366d629f006fbd627195fc758aae043c1f5a | |
parent | 7be3f15e45fcfff10a45302a040c2919a3ba8ccd (diff) | |
download | garage-fa7c7780243e461d9b95eb18d8eff992dca8ae5b.tar.gz garage-fa7c7780243e461d9b95eb18d8eff992dca8ae5b.zip |
[reconnect-only-current] filter nodes to reconnect toreconnect-only-current
do not try reconnecting to nodes received from consul/kubernetes
discovery if they are not currently in the layout
-rw-r--r-- | src/rpc/system.rs | 23 |
1 files changed, 17 insertions, 6 deletions
diff --git a/src/rpc/system.rs b/src/rpc/system.rs index de44e656..778f7fae 100644 --- a/src/rpc/system.rs +++ b/src/rpc/system.rs @@ -725,15 +725,18 @@ impl System { async fn discovery_loop(self: &Arc<Self>, mut stop_signal: watch::Receiver<bool>) { while !*stop_signal.borrow() { - let not_configured = self.ring.borrow().layout.check().is_err(); - let no_peers = self.peering.get_peer_list().len() < self.replication_factor; - let expected_n_nodes = self.ring.borrow().layout.num_nodes(); - let bad_peers = self + let n_connected = self .peering .get_peer_list() .iter() - .filter(|p| p.is_up()) - .count() != expected_n_nodes; + .filter(|x| x.is_up()) + .count(); + + let not_configured = self.ring.borrow().layout.check().is_err(); + let no_peers = n_connected < self.replication_factor; + + let expected_n_nodes = self.ring.borrow().layout.num_nodes(); + let bad_peers = n_connected != expected_n_nodes; if not_configured || no_peers || bad_peers { info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers); @@ -780,6 +783,14 @@ impl System { } } + if !not_configured && !no_peers { + // If the layout is configured, and we already have some connections + // to other nodes in the cluster, we can skip trying to connect to + // nodes that are not in the cluster layout. + let ring = self.ring.borrow(); + ping_list.retain(|(id, _)| ring.layout.node_ids().contains(&(*id).into())); + } + for (node_id, node_addr) in ping_list { let self2 = self.clone(); tokio::spawn(async move { |