aboutsummaryrefslogtreecommitdiff
path: root/src/rpc/system.rs
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2024-02-16 10:50:41 +0100
committerAlex Auvolat <alex@adnab.me>2024-02-16 11:04:14 +0100
commitfa7c7780243e461d9b95eb18d8eff992dca8ae5b (patch)
tree97d7366d629f006fbd627195fc758aae043c1f5a /src/rpc/system.rs
parent7be3f15e45fcfff10a45302a040c2919a3ba8ccd (diff)
downloadgarage-reconnect-only-current.tar.gz
garage-reconnect-only-current.zip
[reconnect-only-current] filter nodes to reconnect toreconnect-only-current
do not try reconnecting to nodes received from consul/kubernetes discovery if they are not currently in the layout
Diffstat (limited to 'src/rpc/system.rs')
-rw-r--r--src/rpc/system.rs23
1 files changed, 17 insertions, 6 deletions
diff --git a/src/rpc/system.rs b/src/rpc/system.rs
index de44e656..778f7fae 100644
--- a/src/rpc/system.rs
+++ b/src/rpc/system.rs
@@ -725,15 +725,18 @@ impl System {
async fn discovery_loop(self: &Arc<Self>, mut stop_signal: watch::Receiver<bool>) {
while !*stop_signal.borrow() {
- let not_configured = self.ring.borrow().layout.check().is_err();
- let no_peers = self.peering.get_peer_list().len() < self.replication_factor;
- let expected_n_nodes = self.ring.borrow().layout.num_nodes();
- let bad_peers = self
+ let n_connected = self
.peering
.get_peer_list()
.iter()
- .filter(|p| p.is_up())
- .count() != expected_n_nodes;
+ .filter(|x| x.is_up())
+ .count();
+
+ let not_configured = self.ring.borrow().layout.check().is_err();
+ let no_peers = n_connected < self.replication_factor;
+
+ let expected_n_nodes = self.ring.borrow().layout.num_nodes();
+ let bad_peers = n_connected != expected_n_nodes;
if not_configured || no_peers || bad_peers {
info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers);
@@ -780,6 +783,14 @@ impl System {
}
}
+ if !not_configured && !no_peers {
+ // If the layout is configured, and we already have some connections
+ // to other nodes in the cluster, we can skip trying to connect to
+ // nodes that are not in the cluster layout.
+ let ring = self.ring.borrow();
+ ping_list.retain(|(id, _)| ring.layout.node_ids().contains(&(*id).into()));
+ }
+
for (node_id, node_addr) in ping_list {
let self2 = self.clone();
tokio::spawn(async move {