aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-12-07 10:30:26 +0100
committerAlex Auvolat <alex@adnab.me>2023-12-07 10:30:26 +0100
commitc8356a91d9bf1d1488ec288099f2a55a1019918f (patch)
tree1782fc08ed6b2c6b6813c8c617d1be564e9229e9
parentc04dd8788a3764da2f307b1d10c2d56b7b0e4a61 (diff)
downloadgarage-c8356a91d9bf1d1488ec288099f2a55a1019918f.tar.gz
garage-c8356a91d9bf1d1488ec288099f2a55a1019918f.zip
layout updates: fix the set of nodes among which minima are calculated
-rw-r--r--src/rpc/layout/helper.rs25
-rw-r--r--src/rpc/layout/history.rs8
-rw-r--r--src/rpc/layout/schema.rs2
3 files changed, 27 insertions, 8 deletions
diff --git a/src/rpc/layout/helper.rs b/src/rpc/layout/helper.rs
index 5d159f3e..881a039e 100644
--- a/src/rpc/layout/helper.rs
+++ b/src/rpc/layout/helper.rs
@@ -51,20 +51,37 @@ impl LayoutHelper {
pub fn new(mut layout: LayoutHistory, mut ack_lock: HashMap<u64, AtomicUsize>) -> Self {
layout.cleanup_old_versions();
+ let all_nodes = layout.get_all_nodes();
let all_nongateway_nodes = layout.get_all_nongateway_nodes();
- layout.clamp_update_trackers(&all_nongateway_nodes);
+
+ layout.clamp_update_trackers(&all_nodes);
let min_version = layout.min_stored();
+
+ // ack_map_min is the minimum value of ack_map among all nodes
+ // in the cluster (gateway, non-gateway, current and previous layouts).
+ // It is the highest layout version which all of these nodes have
+ // acknowledged, indicating that they are aware of it and are no
+ // longer processing write operations that did not take it into account.
let ack_map_min = layout
.update_trackers
.ack_map
- .min(&all_nongateway_nodes, min_version);
+ .min_among(&all_nodes, min_version);
+
+ // sync_map_min is the minimum value of sync_map among all storage nodes
+ // in the cluster (non-gateway nodes only, current and previous layouts).
+ // It is the highest layout version for which we know that all relevant
+ // storage nodes have fullfilled a sync, and therefore it is safe to
+ // use a read quorum within that layout to ensure consistency.
+ // Gateway nodes are excluded here because they hold no relevant data
+ // (they store the bucket and access key tables, but we don't have
+ // consistency on those).
+ // TODO: this value could take quorums into account instead.
let sync_map_min = layout
.update_trackers
.sync_map
- .min(&all_nongateway_nodes, min_version);
+ .min_among(&all_nongateway_nodes, min_version);
- let all_nodes = layout.get_all_nodes();
let trackers_hash = layout.calculate_trackers_hash();
let staging_hash = layout.calculate_staging_hash();
diff --git a/src/rpc/layout/history.rs b/src/rpc/layout/history.rs
index 7d4a1b48..c448ac24 100644
--- a/src/rpc/layout/history.rs
+++ b/src/rpc/layout/history.rs
@@ -77,14 +77,16 @@ impl LayoutHistory {
}
// If there are old versions that no one is reading from anymore,
- // remove them
+ // remove them (keep them in self.old_versions).
+ // ASSUMPTION: we only care about where nodes in the current layout version
+ // are reading from, as we assume older nodes are being discarded.
while self.versions.len() > 1 {
- let all_nongateway_nodes = self.get_all_nongateway_nodes();
+ let current_nodes = &self.current().node_id_vec;
let min_version = self.min_stored();
let sync_ack_map_min = self
.update_trackers
.sync_ack_map
- .min(&all_nongateway_nodes, min_version);
+ .min_among(&current_nodes, min_version);
if self.min_stored() < sync_ack_map_min {
let removed = self.versions.remove(0);
info!(
diff --git a/src/rpc/layout/schema.rs b/src/rpc/layout/schema.rs
index cb36297d..49e84420 100644
--- a/src/rpc/layout/schema.rs
+++ b/src/rpc/layout/schema.rs
@@ -408,7 +408,7 @@ impl UpdateTracker {
}
}
- pub(crate) fn min(&self, storage_nodes: &[Uuid], min_version: u64) -> u64 {
+ pub(crate) fn min_among(&self, storage_nodes: &[Uuid], min_version: u64) -> u64 {
storage_nodes
.iter()
.map(|x| self.0.get(x).copied().unwrap_or(min_version))