aboutsummaryrefslogtreecommitdiff
path: root/src/rpc/layout
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-11-11 13:10:59 +0100
committerAlex Auvolat <alex@adnab.me>2023-11-11 13:10:59 +0100
commit9a491fa1372a23e91c793ee1d2b313607752826a (patch)
treec581d11c7b23aef30a9f770b30d494346f83d4a8 /src/rpc/layout
parentdf24bb806d64d5d5e748c35efe3f49ad3dda709e (diff)
downloadgarage-9a491fa1372a23e91c793ee1d2b313607752826a.tar.gz
garage-9a491fa1372a23e91c793ee1d2b313607752826a.zip
layout: fix test
Diffstat (limited to 'src/rpc/layout')
-rw-r--r--src/rpc/layout/history.rs9
-rw-r--r--src/rpc/layout/mod.rs3
-rw-r--r--src/rpc/layout/test.rs159
-rw-r--r--src/rpc/layout/version.rs172
4 files changed, 174 insertions, 169 deletions
diff --git a/src/rpc/layout/history.rs b/src/rpc/layout/history.rs
index cef56647..050f5d0a 100644
--- a/src/rpc/layout/history.rs
+++ b/src/rpc/layout/history.rs
@@ -18,7 +18,7 @@ impl LayoutHistory {
};
let mut ret = LayoutHistory {
- versions: vec![version].into_boxed_slice().into(),
+ versions: vec![version],
update_trackers: Default::default(),
trackers_hash: [0u8; 32].into(),
staging: Lww::raw(0, staging),
@@ -211,6 +211,11 @@ To know the correct value of the new layout version, invoke `garage layout show`
let msg = new_version.calculate_partition_assignment()?;
self.versions.push(new_version);
+ if self.current().check().is_ok() {
+ while self.versions.first().unwrap().check().is_err() {
+ self.versions.remove(0);
+ }
+ }
// Reset the staged layout changes
self.staging.update(LayoutStaging {
@@ -245,7 +250,7 @@ To know the correct value of the new layout version, invoke `garage layout show`
version.check()?;
}
- // TODO: anythign more ?
+ // TODO: anything more ?
Ok(())
}
}
diff --git a/src/rpc/layout/mod.rs b/src/rpc/layout/mod.rs
index cd3764bc..577b32fb 100644
--- a/src/rpc/layout/mod.rs
+++ b/src/rpc/layout/mod.rs
@@ -3,6 +3,9 @@ mod history;
mod schema;
mod version;
+#[cfg(test)]
+mod test;
+
pub mod manager;
// ---- re-exports ----
diff --git a/src/rpc/layout/test.rs b/src/rpc/layout/test.rs
new file mode 100644
index 00000000..0ce090d2
--- /dev/null
+++ b/src/rpc/layout/test.rs
@@ -0,0 +1,159 @@
+use std::cmp::min;
+use std::collections::HashMap;
+
+use garage_util::crdt::Crdt;
+use garage_util::error::*;
+
+use crate::layout::*;
+
+// This function checks that the partition size S computed is at least better than the
+// one given by a very naive algorithm. To do so, we try to run the naive algorithm
+// assuming a partion size of S+1. If we succed, it means that the optimal assignment
+// was not optimal. The naive algorithm is the following :
+// - we compute the max number of partitions associated to every node, capped at the
+// partition number. It gives the number of tokens of every node.
+// - every zone has a number of tokens equal to the sum of the tokens of its nodes.
+// - we cycle over the partitions and associate zone tokens while respecting the
+// zone redundancy constraint.
+// NOTE: the naive algorithm is not optimal. Counter example:
+// take nb_partition = 3 ; replication_factor = 5; redundancy = 4;
+// number of tokens by zone : (A, 4), (B,1), (C,4), (D, 4), (E, 2)
+// With these parameters, the naive algo fails, whereas there is a solution:
+// (A,A,C,D,E) , (A,B,C,D,D) (A,C,C,D,E)
+fn check_against_naive(cl: &LayoutVersion) -> Result<bool, Error> {
+ let over_size = cl.partition_size + 1;
+ let mut zone_token = HashMap::<String, usize>::new();
+
+ let (zones, zone_to_id) = cl.generate_nongateway_zone_ids()?;
+
+ if zones.is_empty() {
+ return Ok(false);
+ }
+
+ for z in zones.iter() {
+ zone_token.insert(z.clone(), 0);
+ }
+ for uuid in cl.nongateway_nodes() {
+ let z = cl.get_node_zone(&uuid)?;
+ let c = cl.get_node_capacity(&uuid)?;
+ zone_token.insert(
+ z.to_string(),
+ zone_token[z] + min(NB_PARTITIONS, (c / over_size) as usize),
+ );
+ }
+
+ // For every partition, we count the number of zone already associated and
+ // the name of the last zone associated
+
+ let mut id_zone_token = vec![0; zones.len()];
+ for (z, t) in zone_token.iter() {
+ id_zone_token[zone_to_id[z]] = *t;
+ }
+
+ let mut nb_token = vec![0; NB_PARTITIONS];
+ let mut last_zone = vec![zones.len(); NB_PARTITIONS];
+
+ let mut curr_zone = 0;
+
+ let redundancy = cl.effective_zone_redundancy();
+
+ for replic in 0..cl.replication_factor {
+ for p in 0..NB_PARTITIONS {
+ while id_zone_token[curr_zone] == 0
+ || (last_zone[p] == curr_zone
+ && redundancy - nb_token[p] <= cl.replication_factor - replic)
+ {
+ curr_zone += 1;
+ if curr_zone >= zones.len() {
+ return Ok(true);
+ }
+ }
+ id_zone_token[curr_zone] -= 1;
+ if last_zone[p] != curr_zone {
+ nb_token[p] += 1;
+ last_zone[p] = curr_zone;
+ }
+ }
+ }
+
+ return Ok(false);
+}
+
+fn show_msg(msg: &Message) {
+ for s in msg.iter() {
+ println!("{}", s);
+ }
+}
+
+fn update_layout(
+ cl: &mut LayoutHistory,
+ node_capacity_vec: &[u64],
+ node_zone_vec: &[&'static str],
+ zone_redundancy: usize,
+) {
+ let staging = cl.staging.get_mut();
+
+ for (i, (capacity, zone)) in node_capacity_vec
+ .iter()
+ .zip(node_zone_vec.iter())
+ .enumerate()
+ {
+ let node_id = [i as u8; 32].into();
+
+ let update = staging.roles.update_mutator(
+ node_id,
+ NodeRoleV(Some(NodeRole {
+ zone: zone.to_string(),
+ capacity: Some(*capacity),
+ tags: (vec![]),
+ })),
+ );
+ staging.roles.merge(&update);
+ }
+ staging.parameters.update(LayoutParameters {
+ zone_redundancy: ZoneRedundancy::AtLeast(zone_redundancy),
+ });
+
+ cl.update_hashes();
+}
+
+#[test]
+fn test_assignment() {
+ let mut node_capacity_vec = vec![4000, 1000, 2000];
+ let mut node_zone_vec = vec!["A", "B", "C"];
+
+ let mut cl = LayoutHistory::new(3);
+ update_layout(&mut cl, &node_capacity_vec, &node_zone_vec, 3);
+ let v = cl.current().version;
+ let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
+ show_msg(&msg);
+ assert_eq!(cl.check(), Ok(()));
+ assert!(check_against_naive(cl.current()).unwrap());
+
+ node_capacity_vec = vec![4000, 1000, 1000, 3000, 1000, 1000, 2000, 10000, 2000];
+ node_zone_vec = vec!["A", "B", "C", "C", "C", "B", "G", "H", "I"];
+ update_layout(&mut cl, &node_capacity_vec, &node_zone_vec, 2);
+ let v = cl.current().version;
+ let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
+ show_msg(&msg);
+ assert_eq!(cl.check(), Ok(()));
+ assert!(check_against_naive(cl.current()).unwrap());
+
+ node_capacity_vec = vec![4000, 1000, 2000, 7000, 1000, 1000, 2000, 10000, 2000];
+ update_layout(&mut cl, &node_capacity_vec, &node_zone_vec, 3);
+ let v = cl.current().version;
+ let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
+ show_msg(&msg);
+ assert_eq!(cl.check(), Ok(()));
+ assert!(check_against_naive(cl.current()).unwrap());
+
+ node_capacity_vec = vec![
+ 4000000, 4000000, 2000000, 7000000, 1000000, 9000000, 2000000, 10000, 2000000,
+ ];
+ update_layout(&mut cl, &node_capacity_vec, &node_zone_vec, 1);
+ let v = cl.current().version;
+ let (cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
+ show_msg(&msg);
+ assert_eq!(cl.check(), Ok(()));
+ assert!(check_against_naive(cl.current()).unwrap());
+}
diff --git a/src/rpc/layout/version.rs b/src/rpc/layout/version.rs
index f45a3c35..ffbdf277 100644
--- a/src/rpc/layout/version.rs
+++ b/src/rpc/layout/version.rs
@@ -143,7 +143,7 @@ impl LayoutVersion {
}
/// Given a node uuids, this function returns the label of its zone
- fn get_node_zone(&self, uuid: &Uuid) -> Result<&str, Error> {
+ pub(crate) fn get_node_zone(&self, uuid: &Uuid) -> Result<&str, Error> {
match self.node_role(uuid) {
Some(role) => Ok(&role.zone),
_ => Err(Error::Message(
@@ -162,7 +162,7 @@ impl LayoutVersion {
}
/// Returns the effective value of the zone_redundancy parameter
- fn effective_zone_redundancy(&self) -> usize {
+ pub(crate) fn effective_zone_redundancy(&self) -> usize {
match self.parameters.zone_redundancy {
ZoneRedundancy::AtLeast(v) => v,
ZoneRedundancy::Maximum => {
@@ -472,7 +472,9 @@ impl LayoutVersion {
/// This function generates ids for the zone of the nodes appearing in
/// self.node_id_vec.
- fn generate_nongateway_zone_ids(&self) -> Result<(Vec<String>, HashMap<String, usize>), Error> {
+ pub(crate) fn generate_nongateway_zone_ids(
+ &self,
+ ) -> Result<(Vec<String>, HashMap<String, usize>), Error> {
let mut id_to_zone = Vec::<String>::new();
let mut zone_to_id = HashMap::<String, usize>::new();
@@ -838,167 +840,3 @@ impl LayoutVersion {
Ok(msg)
}
}
-
-// ====================================================================================
-
-#[cfg(test)]
-mod tests {
- use super::{Error, *};
- use std::cmp::min;
-
- // This function checks that the partition size S computed is at least better than the
- // one given by a very naive algorithm. To do so, we try to run the naive algorithm
- // assuming a partion size of S+1. If we succed, it means that the optimal assignment
- // was not optimal. The naive algorithm is the following :
- // - we compute the max number of partitions associated to every node, capped at the
- // partition number. It gives the number of tokens of every node.
- // - every zone has a number of tokens equal to the sum of the tokens of its nodes.
- // - we cycle over the partitions and associate zone tokens while respecting the
- // zone redundancy constraint.
- // NOTE: the naive algorithm is not optimal. Counter example:
- // take nb_partition = 3 ; replication_factor = 5; redundancy = 4;
- // number of tokens by zone : (A, 4), (B,1), (C,4), (D, 4), (E, 2)
- // With these parameters, the naive algo fails, whereas there is a solution:
- // (A,A,C,D,E) , (A,B,C,D,D) (A,C,C,D,E)
- fn check_against_naive(cl: &LayoutVersion) -> Result<bool, Error> {
- let over_size = cl.partition_size + 1;
- let mut zone_token = HashMap::<String, usize>::new();
-
- let (zones, zone_to_id) = cl.generate_nongateway_zone_ids()?;
-
- if zones.is_empty() {
- return Ok(false);
- }
-
- for z in zones.iter() {
- zone_token.insert(z.clone(), 0);
- }
- for uuid in cl.nongateway_nodes() {
- let z = cl.get_node_zone(&uuid)?;
- let c = cl.get_node_capacity(&uuid)?;
- zone_token.insert(
- z.clone(),
- zone_token[&z] + min(NB_PARTITIONS, (c / over_size) as usize),
- );
- }
-
- // For every partition, we count the number of zone already associated and
- // the name of the last zone associated
-
- let mut id_zone_token = vec![0; zones.len()];
- for (z, t) in zone_token.iter() {
- id_zone_token[zone_to_id[z]] = *t;
- }
-
- let mut nb_token = vec![0; NB_PARTITIONS];
- let mut last_zone = vec![zones.len(); NB_PARTITIONS];
-
- let mut curr_zone = 0;
-
- let redundancy = cl.effective_zone_redundancy();
-
- for replic in 0..cl.replication_factor {
- for p in 0..NB_PARTITIONS {
- while id_zone_token[curr_zone] == 0
- || (last_zone[p] == curr_zone
- && redundancy - nb_token[p] <= cl.replication_factor - replic)
- {
- curr_zone += 1;
- if curr_zone >= zones.len() {
- return Ok(true);
- }
- }
- id_zone_token[curr_zone] -= 1;
- if last_zone[p] != curr_zone {
- nb_token[p] += 1;
- last_zone[p] = curr_zone;
- }
- }
- }
-
- return Ok(false);
- }
-
- fn show_msg(msg: &Message) {
- for s in msg.iter() {
- println!("{}", s);
- }
- }
-
- fn update_layout(
- cl: &mut LayoutVersion,
- node_id_vec: &Vec<u8>,
- node_capacity_vec: &Vec<u64>,
- node_zone_vec: &Vec<String>,
- zone_redundancy: usize,
- ) {
- for i in 0..node_id_vec.len() {
- if let Some(x) = FixedBytes32::try_from(&[i as u8; 32]) {
- cl.node_id_vec.push(x);
- }
-
- let update = cl.staging_roles.update_mutator(
- cl.node_id_vec[i],
- NodeRoleV(Some(NodeRole {
- zone: (node_zone_vec[i].to_string()),
- capacity: (Some(node_capacity_vec[i])),
- tags: (vec![]),
- })),
- );
- cl.staging_roles.merge(&update);
- }
- cl.staging_parameters.update(LayoutParameters {
- zone_redundancy: ZoneRedundancy::AtLeast(zone_redundancy),
- });
- cl.staging_hash = cl.calculate_staging_hash();
- }
-
- #[test]
- fn test_assignment() {
- let mut node_id_vec = vec![1, 2, 3];
- let mut node_capacity_vec = vec![4000, 1000, 2000];
- let mut node_zone_vec = vec!["A", "B", "C"]
- .into_iter()
- .map(|x| x.to_string())
- .collect();
-
- let mut cl = LayoutVersion::new(3);
- update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 3);
- let v = cl.version;
- let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
- show_msg(&msg);
- assert_eq!(cl.check(), Ok(()));
- assert!(matches!(check_against_naive(&cl), Ok(true)));
-
- node_id_vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9];
- node_capacity_vec = vec![4000, 1000, 1000, 3000, 1000, 1000, 2000, 10000, 2000];
- node_zone_vec = vec!["A", "B", "C", "C", "C", "B", "G", "H", "I"]
- .into_iter()
- .map(|x| x.to_string())
- .collect();
- update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 2);
- let v = cl.version;
- let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
- show_msg(&msg);
- assert_eq!(cl.check(), Ok(()));
- assert!(matches!(check_against_naive(&cl), Ok(true)));
-
- node_capacity_vec = vec![4000, 1000, 2000, 7000, 1000, 1000, 2000, 10000, 2000];
- update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 3);
- let v = cl.version;
- let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
- show_msg(&msg);
- assert_eq!(cl.check(), Ok(()));
- assert!(matches!(check_against_naive(&cl), Ok(true)));
-
- node_capacity_vec = vec![
- 4000000, 4000000, 2000000, 7000000, 1000000, 9000000, 2000000, 10000, 2000000,
- ];
- update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 1);
- let v = cl.version;
- let (cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
- show_msg(&msg);
- assert_eq!(cl.check(), Ok(()));
- assert!(matches!(check_against_naive(&cl), Ok(true)));
- }
-}