aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2024-02-16 10:50:41 +0100
committerAlex Auvolat <alex@adnab.me>2024-02-20 11:07:10 +0100
commite91576677e712c07cf9c47b1a0d2cc4d2d1d37cf (patch)
treeb1b826d55ca11785986eb1fa2f6cd32107202833 /src
parent7be3f15e45fcfff10a45302a040c2919a3ba8ccd (diff)
downloadgarage-e91576677e712c07cf9c47b1a0d2cc4d2d1d37cf.tar.gz
garage-e91576677e712c07cf9c47b1a0d2cc4d2d1d37cf.zip
[reconnect-only-current] filter nodes to reconnect to
do not try reconnecting to nodes received from consul/kubernetes discovery if they are not currently in the layout
Diffstat (limited to 'src')
-rw-r--r--src/rpc/system.rs21
1 files changed, 16 insertions, 5 deletions
diff --git a/src/rpc/system.rs b/src/rpc/system.rs
index de44e656..14a101ca 100644
--- a/src/rpc/system.rs
+++ b/src/rpc/system.rs
@@ -725,15 +725,18 @@ impl System {
async fn discovery_loop(self: &Arc<Self>, mut stop_signal: watch::Receiver<bool>) {
while !*stop_signal.borrow() {
- let not_configured = self.ring.borrow().layout.check().is_err();
- let no_peers = self.peering.get_peer_list().len() < self.replication_factor;
- let expected_n_nodes = self.ring.borrow().layout.num_nodes();
- let bad_peers = self
+ let n_connected = self
.peering
.get_peer_list()
.iter()
.filter(|p| p.is_up())
- .count() != expected_n_nodes;
+ .count();
+
+ let not_configured = self.ring.borrow().layout.check().is_err();
+ let no_peers = n_connected < self.replication_factor;
+
+ let expected_n_nodes = self.ring.borrow().layout.num_nodes();
+ let bad_peers = n_connected != expected_n_nodes;
if not_configured || no_peers || bad_peers {
info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers);
@@ -780,6 +783,14 @@ impl System {
}
}
+ if !not_configured && !no_peers {
+ // If the layout is configured, and we already have some connections
+ // to other nodes in the cluster, we can skip trying to connect to
+ // nodes that are not in the cluster layout.
+ let ring = self.ring.borrow();
+ ping_list.retain(|(id, _)| ring.layout.node_ids().contains(&(*id).into()));
+ }
+
for (node_id, node_addr) in ping_list {
let self2 = self.clone();
tokio::spawn(async move {