From 12d1dbfc6b884be488e2d79c0b9e3c47490f5442 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 8 Nov 2023 15:41:24 +0100 Subject: remove Ring and use ClusterLayout everywhere --- src/garage/admin/bucket.rs | 4 ++-- src/garage/admin/mod.rs | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'src/garage') diff --git a/src/garage/admin/bucket.rs b/src/garage/admin/bucket.rs index 0781cb8b..34e48292 100644 --- a/src/garage/admin/bucket.rs +++ b/src/garage/admin/bucket.rs @@ -70,7 +70,7 @@ impl AdminRpcHandler { .table .get(&bucket_id, &EmptyKey) .await? - .map(|x| x.filtered_values(&self.garage.system.ring.borrow())) + .map(|x| x.filtered_values(&self.garage.system.layout_watch.borrow())) .unwrap_or_default(); let mpu_counters = self @@ -79,7 +79,7 @@ impl AdminRpcHandler { .table .get(&bucket_id, &EmptyKey) .await? - .map(|x| x.filtered_values(&self.garage.system.ring.borrow())) + .map(|x| x.filtered_values(&self.garage.system.layout_watch.borrow())) .unwrap_or_default(); let mut relevant_keys = HashMap::new(); diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs index b6f9c426..006f71cd 100644 --- a/src/garage/admin/mod.rs +++ b/src/garage/admin/mod.rs @@ -18,7 +18,7 @@ use garage_util::error::Error as GarageError; use garage_table::replication::*; use garage_table::*; -use garage_rpc::ring::PARTITION_BITS; +use garage_rpc::layout::PARTITION_BITS; use garage_rpc::*; use garage_block::manager::BlockResyncErrorInfo; @@ -126,8 +126,8 @@ impl AdminRpcHandler { opt_to_send.all_nodes = false; let mut failures = vec![]; - let ring = self.garage.system.ring.borrow().clone(); - for node in ring.layout.node_ids().iter() { + let layout = self.garage.system.layout_watch.borrow().clone(); + for node in layout.node_ids().iter() { let node = (*node).into(); let resp = self .endpoint @@ -163,9 +163,9 @@ impl AdminRpcHandler { async fn handle_stats(&self, opt: StatsOpt) -> Result { if opt.all_nodes { let mut ret = String::new(); - let ring = self.garage.system.ring.borrow().clone(); + let layout = self.garage.system.layout_watch.borrow().clone(); - for node in ring.layout.node_ids().iter() { + for node in layout.node_ids().iter() { let mut opt = opt.clone(); opt.all_nodes = false; opt.skip_global = true; @@ -275,7 +275,7 @@ impl AdminRpcHandler { let mut ret = String::new(); // Gather storage node and free space statistics - let layout = &self.garage.system.ring.borrow().layout; + let layout = &self.garage.system.layout_watch.borrow(); let mut node_partition_count = HashMap::::new(); for short_id in layout.ring_assignment_data.iter() { let id = layout.node_id_vec[*short_id as usize]; @@ -440,8 +440,8 @@ impl AdminRpcHandler { ) -> Result { if all_nodes { let mut ret = vec![]; - let ring = self.garage.system.ring.borrow().clone(); - for node in ring.layout.node_ids().iter() { + let layout = self.garage.system.layout_watch.borrow().clone(); + for node in layout.node_ids().iter() { let node = (*node).into(); match self .endpoint @@ -488,8 +488,8 @@ impl AdminRpcHandler { ) -> Result { if all_nodes { let mut ret = vec![]; - let ring = self.garage.system.ring.borrow().clone(); - for node in ring.layout.node_ids().iter() { + let layout = self.garage.system.layout_watch.borrow().clone(); + for node in layout.node_ids().iter() { let node = (*node).into(); match self .endpoint -- cgit v1.2.3 From 4a9c94514f49aa4e9880a8e0f5cf5a52d11ae993 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 8 Nov 2023 16:41:00 +0100 Subject: avoid using layout_watch in System directly --- src/garage/admin/bucket.rs | 4 ++-- src/garage/admin/mod.rs | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src/garage') diff --git a/src/garage/admin/bucket.rs b/src/garage/admin/bucket.rs index 34e48292..9e642f57 100644 --- a/src/garage/admin/bucket.rs +++ b/src/garage/admin/bucket.rs @@ -70,7 +70,7 @@ impl AdminRpcHandler { .table .get(&bucket_id, &EmptyKey) .await? - .map(|x| x.filtered_values(&self.garage.system.layout_watch.borrow())) + .map(|x| x.filtered_values(&self.garage.system.cluster_layout())) .unwrap_or_default(); let mpu_counters = self @@ -79,7 +79,7 @@ impl AdminRpcHandler { .table .get(&bucket_id, &EmptyKey) .await? - .map(|x| x.filtered_values(&self.garage.system.layout_watch.borrow())) + .map(|x| x.filtered_values(&self.garage.system.cluster_layout())) .unwrap_or_default(); let mut relevant_keys = HashMap::new(); diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs index 006f71cd..c3fa801a 100644 --- a/src/garage/admin/mod.rs +++ b/src/garage/admin/mod.rs @@ -126,7 +126,7 @@ impl AdminRpcHandler { opt_to_send.all_nodes = false; let mut failures = vec![]; - let layout = self.garage.system.layout_watch.borrow().clone(); + let layout = self.garage.system.cluster_layout().clone(); for node in layout.node_ids().iter() { let node = (*node).into(); let resp = self @@ -163,7 +163,7 @@ impl AdminRpcHandler { async fn handle_stats(&self, opt: StatsOpt) -> Result { if opt.all_nodes { let mut ret = String::new(); - let layout = self.garage.system.layout_watch.borrow().clone(); + let layout = self.garage.system.cluster_layout().clone(); for node in layout.node_ids().iter() { let mut opt = opt.clone(); @@ -275,7 +275,7 @@ impl AdminRpcHandler { let mut ret = String::new(); // Gather storage node and free space statistics - let layout = &self.garage.system.layout_watch.borrow(); + let layout = &self.garage.system.cluster_layout(); let mut node_partition_count = HashMap::::new(); for short_id in layout.ring_assignment_data.iter() { let id = layout.node_id_vec[*short_id as usize]; @@ -440,7 +440,7 @@ impl AdminRpcHandler { ) -> Result { if all_nodes { let mut ret = vec![]; - let layout = self.garage.system.layout_watch.borrow().clone(); + let layout = self.garage.system.cluster_layout().clone(); for node in layout.node_ids().iter() { let node = (*node).into(); match self @@ -488,7 +488,7 @@ impl AdminRpcHandler { ) -> Result { if all_nodes { let mut ret = vec![]; - let layout = self.garage.system.layout_watch.borrow().clone(); + let layout = self.garage.system.cluster_layout().clone(); for node in layout.node_ids().iter() { let node = (*node).into(); match self -- cgit v1.2.3 From 8dccee3ccfe7793c42203f28c1e91c6f989b6899 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 8 Nov 2023 19:28:36 +0100 Subject: cluster layout: adapt all uses of ClusterLayout to LayoutHistory --- src/garage/admin/mod.rs | 14 +++++++------- src/garage/cli/cmd.rs | 17 +++++++++++------ src/garage/cli/layout.rs | 38 +++++++++++++++++++++----------------- 3 files changed, 39 insertions(+), 30 deletions(-) (limited to 'src/garage') diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs index c3fa801a..e3ba6d35 100644 --- a/src/garage/admin/mod.rs +++ b/src/garage/admin/mod.rs @@ -127,7 +127,7 @@ impl AdminRpcHandler { let mut failures = vec![]; let layout = self.garage.system.cluster_layout().clone(); - for node in layout.node_ids().iter() { + for node in layout.current().node_ids().iter() { let node = (*node).into(); let resp = self .endpoint @@ -165,7 +165,7 @@ impl AdminRpcHandler { let mut ret = String::new(); let layout = self.garage.system.cluster_layout().clone(); - for node in layout.node_ids().iter() { + for node in layout.current().node_ids().iter() { let mut opt = opt.clone(); opt.all_nodes = false; opt.skip_global = true; @@ -277,8 +277,8 @@ impl AdminRpcHandler { // Gather storage node and free space statistics let layout = &self.garage.system.cluster_layout(); let mut node_partition_count = HashMap::::new(); - for short_id in layout.ring_assignment_data.iter() { - let id = layout.node_id_vec[*short_id as usize]; + for short_id in layout.current().ring_assignment_data.iter() { + let id = layout.current().node_id_vec[*short_id as usize]; *node_partition_count.entry(id).or_default() += 1; } let node_info = self @@ -293,7 +293,7 @@ impl AdminRpcHandler { for (id, parts) in node_partition_count.iter() { let info = node_info.get(id); let status = info.map(|x| &x.status); - let role = layout.roles.get(id).and_then(|x| x.0.as_ref()); + let role = layout.current().roles.get(id).and_then(|x| x.0.as_ref()); let hostname = status.map(|x| x.hostname.as_str()).unwrap_or("?"); let zone = role.map(|x| x.zone.as_str()).unwrap_or("?"); let capacity = role @@ -441,7 +441,7 @@ impl AdminRpcHandler { if all_nodes { let mut ret = vec![]; let layout = self.garage.system.cluster_layout().clone(); - for node in layout.node_ids().iter() { + for node in layout.current().node_ids().iter() { let node = (*node).into(); match self .endpoint @@ -489,7 +489,7 @@ impl AdminRpcHandler { if all_nodes { let mut ret = vec![]; let layout = self.garage.system.cluster_layout().clone(); - for node in layout.node_ids().iter() { + for node in layout.current().node_ids().iter() { let node = (*node).into(); match self .endpoint diff --git a/src/garage/cli/cmd.rs b/src/garage/cli/cmd.rs index 48359614..8be43873 100644 --- a/src/garage/cli/cmd.rs +++ b/src/garage/cli/cmd.rs @@ -62,7 +62,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> let mut healthy_nodes = vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tDataAvail".to_string()]; for adv in status.iter().filter(|adv| adv.is_up) { - match layout.roles.get(&adv.id) { + match layout.current().roles.get(&adv.id) { Some(NodeRoleV(Some(cfg))) => { let data_avail = match &adv.status.data_disk_avail { _ if cfg.capacity.is_none() => "N/A".into(), @@ -102,10 +102,15 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> format_table(healthy_nodes); let status_keys = status.iter().map(|adv| adv.id).collect::>(); - let failure_case_1 = status - .iter() - .any(|adv| !adv.is_up && matches!(layout.roles.get(&adv.id), Some(NodeRoleV(Some(_))))); + let failure_case_1 = status.iter().any(|adv| { + !adv.is_up + && matches!( + layout.current().roles.get(&adv.id), + Some(NodeRoleV(Some(_))) + ) + }); let failure_case_2 = layout + .current() .roles .items() .iter() @@ -115,7 +120,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> let mut failed_nodes = vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tLast seen".to_string()]; for adv in status.iter().filter(|adv| !adv.is_up) { - if let Some(NodeRoleV(Some(cfg))) = layout.roles.get(&adv.id) { + if let Some(NodeRoleV(Some(cfg))) = layout.current().roles.get(&adv.id) { let tf = timeago::Formatter::new(); failed_nodes.push(format!( "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{last_seen}", @@ -132,7 +137,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> )); } } - for (id, _, role_v) in layout.roles.items().iter() { + for (id, _, role_v) in layout.current().roles.items().iter() { if let NodeRoleV(Some(cfg)) = role_v { if !status_keys.contains(id) { failed_nodes.push(format!( diff --git a/src/garage/cli/layout.rs b/src/garage/cli/layout.rs index ce2b11e0..4a617337 100644 --- a/src/garage/cli/layout.rs +++ b/src/garage/cli/layout.rs @@ -58,17 +58,18 @@ pub async fn cmd_assign_role( status .iter() .map(|adv| adv.id) - .chain(layout.node_ids().iter().cloned()), + .chain(layout.current().node_ids().iter().cloned()), node_id, ) }) .collect::, _>>()?; - let mut roles = layout.roles.clone(); + let mut roles = layout.current().roles.clone(); roles.merge(&layout.staging_roles); for replaced in args.replace.iter() { - let replaced_node = find_matching_node(layout.node_ids().iter().cloned(), replaced)?; + let replaced_node = + find_matching_node(layout.current().node_ids().iter().cloned(), replaced)?; match roles.get(&replaced_node) { Some(NodeRoleV(Some(_))) => { layout @@ -149,7 +150,7 @@ pub async fn cmd_remove_role( ) -> Result<(), Error> { let mut layout = fetch_layout(rpc_cli, rpc_host).await?; - let mut roles = layout.roles.clone(); + let mut roles = layout.current().roles.clone(); roles.merge(&layout.staging_roles); let deleted_node = @@ -174,13 +175,16 @@ pub async fn cmd_show_layout( let layout = fetch_layout(rpc_cli, rpc_host).await?; println!("==== CURRENT CLUSTER LAYOUT ===="); - print_cluster_layout(&layout, "No nodes currently have a role in the cluster.\nSee `garage status` to view available nodes."); + print_cluster_layout(layout.current(), "No nodes currently have a role in the cluster.\nSee `garage status` to view available nodes."); println!(); - println!("Current cluster layout version: {}", layout.version); + println!( + "Current cluster layout version: {}", + layout.current().version + ); let has_role_changes = print_staging_role_changes(&layout); if has_role_changes { - let v = layout.version; + let v = layout.current().version; let res_apply = layout.apply_staged_changes(Some(v + 1)); // this will print the stats of what partitions @@ -189,7 +193,7 @@ pub async fn cmd_show_layout( Ok((layout, msg)) => { println!(); println!("==== NEW CLUSTER LAYOUT AFTER APPLYING CHANGES ===="); - print_cluster_layout(&layout, "No nodes have a role in the new layout."); + print_cluster_layout(layout.current(), "No nodes have a role in the new layout."); println!(); for line in msg.iter() { @@ -266,11 +270,11 @@ pub async fn cmd_config_layout( .parse::() .ok_or_message("invalid zone redundancy value")?; if let ZoneRedundancy::AtLeast(r_int) = r { - if r_int > layout.replication_factor { + if r_int > layout.current().replication_factor { return Err(Error::Message(format!( "The zone redundancy must be smaller or equal to the \ replication factor ({}).", - layout.replication_factor + layout.current().replication_factor ))); } else if r_int < 1 { return Err(Error::Message( @@ -302,7 +306,7 @@ pub async fn cmd_config_layout( pub async fn fetch_layout( rpc_cli: &Endpoint, rpc_host: NodeID, -) -> Result { +) -> Result { match rpc_cli .call(&rpc_host, SystemRpc::PullClusterLayout, PRIO_NORMAL) .await?? @@ -315,7 +319,7 @@ pub async fn fetch_layout( pub async fn send_layout( rpc_cli: &Endpoint, rpc_host: NodeID, - layout: ClusterLayout, + layout: LayoutHistory, ) -> Result<(), Error> { rpc_cli .call( @@ -327,7 +331,7 @@ pub async fn send_layout( Ok(()) } -pub fn print_cluster_layout(layout: &ClusterLayout, empty_msg: &str) { +pub fn print_cluster_layout(layout: &LayoutVersion, empty_msg: &str) { let mut table = vec!["ID\tTags\tZone\tCapacity\tUsable capacity".to_string()]; for (id, _, role) in layout.roles.items().iter() { let role = match &role.0 { @@ -366,13 +370,13 @@ pub fn print_cluster_layout(layout: &ClusterLayout, empty_msg: &str) { } } -pub fn print_staging_role_changes(layout: &ClusterLayout) -> bool { +pub fn print_staging_role_changes(layout: &LayoutHistory) -> bool { let has_role_changes = layout .staging_roles .items() .iter() - .any(|(k, _, v)| layout.roles.get(k) != Some(v)); - let has_layout_changes = *layout.staging_parameters.get() != layout.parameters; + .any(|(k, _, v)| layout.current().roles.get(k) != Some(v)); + let has_layout_changes = *layout.staging_parameters.get() != layout.current().parameters; if has_role_changes || has_layout_changes { println!(); @@ -380,7 +384,7 @@ pub fn print_staging_role_changes(layout: &ClusterLayout) -> bool { if has_role_changes { let mut table = vec!["ID\tTags\tZone\tCapacity".to_string()]; for (id, _, role) in layout.staging_roles.items().iter() { - if layout.roles.get(id) == Some(role) { + if layout.current().roles.get(id) == Some(role) { continue; } if let Some(role) = &role.0 { -- cgit v1.2.3 From 523d2ecb9511f74e144cd116b942d6c1bf0f546d Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 9 Nov 2023 11:19:43 +0100 Subject: layout: use separate CRDT for staged layout changes --- src/garage/cli/cmd.rs | 2 +- src/garage/cli/layout.rs | 47 +++++++++++++++++++++++++++++------------------ src/garage/cli/structs.rs | 6 +++--- 3 files changed, 33 insertions(+), 22 deletions(-) (limited to 'src/garage') diff --git a/src/garage/cli/cmd.rs b/src/garage/cli/cmd.rs index 8be43873..1a054025 100644 --- a/src/garage/cli/cmd.rs +++ b/src/garage/cli/cmd.rs @@ -85,7 +85,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> )); } _ => { - let new_role = match layout.staging_roles.get(&adv.id) { + let new_role = match layout.staging.get().roles.get(&adv.id) { Some(NodeRoleV(Some(_))) => "(pending)", _ => "NO ROLE ASSIGNED", }; diff --git a/src/garage/cli/layout.rs b/src/garage/cli/layout.rs index 4a617337..269d92f4 100644 --- a/src/garage/cli/layout.rs +++ b/src/garage/cli/layout.rs @@ -65,7 +65,7 @@ pub async fn cmd_assign_role( .collect::, _>>()?; let mut roles = layout.current().roles.clone(); - roles.merge(&layout.staging_roles); + roles.merge(&layout.staging.get().roles); for replaced in args.replace.iter() { let replaced_node = @@ -73,7 +73,9 @@ pub async fn cmd_assign_role( match roles.get(&replaced_node) { Some(NodeRoleV(Some(_))) => { layout - .staging_roles + .staging + .get_mut() + .roles .merge(&roles.update_mutator(replaced_node, NodeRoleV(None))); } _ => { @@ -131,7 +133,9 @@ pub async fn cmd_assign_role( }; layout - .staging_roles + .staging + .get_mut() + .roles .merge(&roles.update_mutator(added_node, NodeRoleV(Some(new_entry)))); } @@ -151,13 +155,15 @@ pub async fn cmd_remove_role( let mut layout = fetch_layout(rpc_cli, rpc_host).await?; let mut roles = layout.current().roles.clone(); - roles.merge(&layout.staging_roles); + roles.merge(&layout.staging.get().roles); let deleted_node = find_matching_node(roles.items().iter().map(|(id, _, _)| *id), &args.node_id)?; layout - .staging_roles + .staging + .get_mut() + .roles .merge(&roles.update_mutator(deleted_node, NodeRoleV(None))); send_layout(rpc_cli, rpc_host, layout).await?; @@ -203,16 +209,12 @@ pub async fn cmd_show_layout( println!(); println!(" garage layout apply --version {}", v + 1); println!(); - println!( - "You can also revert all proposed changes with: garage layout revert --version {}", - v + 1) + println!("You can also revert all proposed changes with: garage layout revert"); } Err(e) => { println!("Error while trying to compute the assignment: {}", e); println!("This new layout cannot yet be applied."); - println!( - "You can also revert all proposed changes with: garage layout revert --version {}", - v + 1) + println!("You can also revert all proposed changes with: garage layout revert"); } } } @@ -245,9 +247,15 @@ pub async fn cmd_revert_layout( rpc_host: NodeID, revert_opt: RevertLayoutOpt, ) -> Result<(), Error> { + if !revert_opt.yes { + return Err(Error::Message( + "Please add the --yes flag to run the layout revert operation".into(), + )); + } + let layout = fetch_layout(rpc_cli, rpc_host).await?; - let layout = layout.revert_staged_changes(revert_opt.version)?; + let layout = layout.revert_staged_changes()?; send_layout(rpc_cli, rpc_host, layout).await?; @@ -284,7 +292,9 @@ pub async fn cmd_config_layout( } layout - .staging_parameters + .staging + .get_mut() + .parameters .update(LayoutParameters { zone_redundancy: r }); println!("The zone redundancy parameter has been set to '{}'.", r); did_something = true; @@ -371,19 +381,20 @@ pub fn print_cluster_layout(layout: &LayoutVersion, empty_msg: &str) { } pub fn print_staging_role_changes(layout: &LayoutHistory) -> bool { - let has_role_changes = layout - .staging_roles + let staging = layout.staging.get(); + let has_role_changes = staging + .roles .items() .iter() .any(|(k, _, v)| layout.current().roles.get(k) != Some(v)); - let has_layout_changes = *layout.staging_parameters.get() != layout.current().parameters; + let has_layout_changes = *staging.parameters.get() != layout.current().parameters; if has_role_changes || has_layout_changes { println!(); println!("==== STAGED ROLE CHANGES ===="); if has_role_changes { let mut table = vec!["ID\tTags\tZone\tCapacity".to_string()]; - for (id, _, role) in layout.staging_roles.items().iter() { + for (id, _, role) in staging.roles.items().iter() { if layout.current().roles.get(id) == Some(role) { continue; } @@ -406,7 +417,7 @@ pub fn print_staging_role_changes(layout: &LayoutHistory) -> bool { if has_layout_changes { println!( "Zone redundancy: {}", - layout.staging_parameters.get().zone_redundancy + staging.parameters.get().zone_redundancy ); } true diff --git a/src/garage/cli/structs.rs b/src/garage/cli/structs.rs index aba57551..3badc447 100644 --- a/src/garage/cli/structs.rs +++ b/src/garage/cli/structs.rs @@ -164,9 +164,9 @@ pub struct ApplyLayoutOpt { #[derive(StructOpt, Debug)] pub struct RevertLayoutOpt { - /// Version number of old configuration to which to revert - #[structopt(long = "version")] - pub(crate) version: Option, + /// The revert operation will not be ran unless this flag is added + #[structopt(long = "yes")] + pub(crate) yes: bool, } #[derive(Serialize, Deserialize, StructOpt, Debug)] -- cgit v1.2.3 From 19ef1ec8e7fee3a6c670e6e35dfcc83f0801e604 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 9 Nov 2023 13:34:14 +0100 Subject: layout: more refactoring --- src/garage/cli/layout.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/garage') diff --git a/src/garage/cli/layout.rs b/src/garage/cli/layout.rs index 269d92f4..bffc81d3 100644 --- a/src/garage/cli/layout.rs +++ b/src/garage/cli/layout.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use bytesize::ByteSize; use format_table::format_table; @@ -321,7 +323,7 @@ pub async fn fetch_layout( .call(&rpc_host, SystemRpc::PullClusterLayout, PRIO_NORMAL) .await?? { - SystemRpc::AdvertiseClusterLayout(t) => Ok(t), + SystemRpc::AdvertiseClusterLayout(t) => Ok(Arc::try_unwrap(t).unwrap()), resp => Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), } } @@ -334,7 +336,7 @@ pub async fn send_layout( rpc_cli .call( &rpc_host, - SystemRpc::AdvertiseClusterLayout(layout), + SystemRpc::AdvertiseClusterLayout(Arc::new(layout)), PRIO_NORMAL, ) .await??; -- cgit v1.2.3 From bfb1845fdc981a370539d641a5d80f438f184f07 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 9 Nov 2023 14:12:05 +0100 Subject: layout: refactor to use a RwLock on LayoutHistory --- src/garage/cli/layout.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/garage') diff --git a/src/garage/cli/layout.rs b/src/garage/cli/layout.rs index bffc81d3..269d92f4 100644 --- a/src/garage/cli/layout.rs +++ b/src/garage/cli/layout.rs @@ -1,5 +1,3 @@ -use std::sync::Arc; - use bytesize::ByteSize; use format_table::format_table; @@ -323,7 +321,7 @@ pub async fn fetch_layout( .call(&rpc_host, SystemRpc::PullClusterLayout, PRIO_NORMAL) .await?? { - SystemRpc::AdvertiseClusterLayout(t) => Ok(Arc::try_unwrap(t).unwrap()), + SystemRpc::AdvertiseClusterLayout(t) => Ok(t), resp => Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), } } @@ -336,7 +334,7 @@ pub async fn send_layout( rpc_cli .call( &rpc_host, - SystemRpc::AdvertiseClusterLayout(Arc::new(layout)), + SystemRpc::AdvertiseClusterLayout(layout), PRIO_NORMAL, ) .await??; -- cgit v1.2.3 From bad7cc812ead88e9f334405c5c082d79c14c8898 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 9 Nov 2023 15:42:10 +0100 Subject: layout admin: add missing calls to update_hash --- src/garage/cli/layout.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/garage') diff --git a/src/garage/cli/layout.rs b/src/garage/cli/layout.rs index 269d92f4..15727448 100644 --- a/src/garage/cli/layout.rs +++ b/src/garage/cli/layout.rs @@ -329,8 +329,9 @@ pub async fn fetch_layout( pub async fn send_layout( rpc_cli: &Endpoint, rpc_host: NodeID, - layout: LayoutHistory, + mut layout: LayoutHistory, ) -> Result<(), Error> { + layout.update_hashes(); rpc_cli .call( &rpc_host, -- cgit v1.2.3 From 1aab1f4e688ebc3f3adcb41c817c16c688a3291c Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 14 Nov 2023 13:06:16 +0100 Subject: layout: refactoring of all_nodes --- src/garage/admin/mod.rs | 17 +++++++++-------- src/garage/cli/layout.rs | 6 +++--- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'src/garage') diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs index e3ba6d35..77918a0f 100644 --- a/src/garage/admin/mod.rs +++ b/src/garage/admin/mod.rs @@ -126,8 +126,8 @@ impl AdminRpcHandler { opt_to_send.all_nodes = false; let mut failures = vec![]; - let layout = self.garage.system.cluster_layout().clone(); - for node in layout.current().node_ids().iter() { + let all_nodes = self.garage.system.cluster_layout().all_nodes().to_vec(); + for node in all_nodes.iter() { let node = (*node).into(); let resp = self .endpoint @@ -163,9 +163,9 @@ impl AdminRpcHandler { async fn handle_stats(&self, opt: StatsOpt) -> Result { if opt.all_nodes { let mut ret = String::new(); - let layout = self.garage.system.cluster_layout().clone(); + let all_nodes = self.garage.system.cluster_layout().all_nodes().to_vec(); - for node in layout.current().node_ids().iter() { + for node in all_nodes.iter() { let mut opt = opt.clone(); opt.all_nodes = false; opt.skip_global = true; @@ -275,6 +275,7 @@ impl AdminRpcHandler { let mut ret = String::new(); // Gather storage node and free space statistics + // TODO: not only layout.current() ??? let layout = &self.garage.system.cluster_layout(); let mut node_partition_count = HashMap::::new(); for short_id in layout.current().ring_assignment_data.iter() { @@ -440,8 +441,8 @@ impl AdminRpcHandler { ) -> Result { if all_nodes { let mut ret = vec![]; - let layout = self.garage.system.cluster_layout().clone(); - for node in layout.current().node_ids().iter() { + let all_nodes = self.garage.system.cluster_layout().all_nodes().to_vec(); + for node in all_nodes.iter() { let node = (*node).into(); match self .endpoint @@ -488,8 +489,8 @@ impl AdminRpcHandler { ) -> Result { if all_nodes { let mut ret = vec![]; - let layout = self.garage.system.cluster_layout().clone(); - for node in layout.current().node_ids().iter() { + let all_nodes = self.garage.system.cluster_layout().all_nodes().to_vec(); + for node in all_nodes.iter() { let node = (*node).into(); match self .endpoint diff --git a/src/garage/cli/layout.rs b/src/garage/cli/layout.rs index 15727448..0f01a37a 100644 --- a/src/garage/cli/layout.rs +++ b/src/garage/cli/layout.rs @@ -49,6 +49,7 @@ pub async fn cmd_assign_role( }; let mut layout = fetch_layout(rpc_cli, rpc_host).await?; + let all_nodes = layout.all_nodes().into_owned(); let added_nodes = args .node_ids @@ -58,7 +59,7 @@ pub async fn cmd_assign_role( status .iter() .map(|adv| adv.id) - .chain(layout.current().node_ids().iter().cloned()), + .chain(all_nodes.iter().cloned()), node_id, ) }) @@ -68,8 +69,7 @@ pub async fn cmd_assign_role( roles.merge(&layout.staging.get().roles); for replaced in args.replace.iter() { - let replaced_node = - find_matching_node(layout.current().node_ids().iter().cloned(), replaced)?; + let replaced_node = find_matching_node(all_nodes.iter().cloned(), replaced)?; match roles.get(&replaced_node) { Some(NodeRoleV(Some(_))) => { layout -- cgit v1.2.3 From 46007bf01dd2e5b489d145ca0a5499ffa7257b96 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 15 Nov 2023 12:56:52 +0100 Subject: integration test: print stdout and stderr on subcommand crash --- src/garage/tests/common/ext/process.rs | 44 +++++++++------------------------- src/garage/tests/common/garage.rs | 2 +- 2 files changed, 12 insertions(+), 34 deletions(-) (limited to 'src/garage') diff --git a/src/garage/tests/common/ext/process.rs b/src/garage/tests/common/ext/process.rs index ba533b6c..8e20bf7c 100644 --- a/src/garage/tests/common/ext/process.rs +++ b/src/garage/tests/common/ext/process.rs @@ -14,42 +14,20 @@ impl CommandExt for process::Command { } fn expect_success_status(&mut self, msg: &str) -> process::ExitStatus { - let status = self.status().expect(msg); - status.expect_success(msg); - status + self.expect_success_output(msg).status } fn expect_success_output(&mut self, msg: &str) -> process::Output { let output = self.output().expect(msg); - output.expect_success(msg); - output - } -} - -pub trait OutputExt { - fn expect_success(&self, msg: &str); -} - -impl OutputExt for process::Output { - fn expect_success(&self, msg: &str) { - self.status.expect_success(msg) - } -} - -pub trait ExitStatusExt { - fn expect_success(&self, msg: &str); -} - -impl ExitStatusExt for process::ExitStatus { - fn expect_success(&self, msg: &str) { - if !self.success() { - match self.code() { - Some(code) => panic!( - "Command exited with code {code}: {msg}", - code = code, - msg = msg - ), - None => panic!("Command exited with signal: {msg}", msg = msg), - } + if !output.status.success() { + panic!( + "{}: command {:?} exited with error {:?}\nSTDOUT: {}\nSTDERR: {}", + msg, + self, + output.status.code(), + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); } + output } } diff --git a/src/garage/tests/common/garage.rs b/src/garage/tests/common/garage.rs index d1f0867a..ebc82f37 100644 --- a/src/garage/tests/common/garage.rs +++ b/src/garage/tests/common/garage.rs @@ -96,7 +96,7 @@ api_bind_addr = "127.0.0.1:{admin_port}" .arg("server") .stdout(stdout) .stderr(stderr) - .env("RUST_LOG", "garage=info,garage_api=trace") + .env("RUST_LOG", "garage=debug,garage_api=trace") .spawn() .expect("Could not start garage"); -- cgit v1.2.3 From 393c4d4515e0cdadadc8de8ae2df12e4371cff88 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 15 Nov 2023 14:20:50 +0100 Subject: layout: add helper for cached/external values to centralize recomputation --- src/garage/cli/layout.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/garage') diff --git a/src/garage/cli/layout.rs b/src/garage/cli/layout.rs index 0f01a37a..51774314 100644 --- a/src/garage/cli/layout.rs +++ b/src/garage/cli/layout.rs @@ -49,7 +49,7 @@ pub async fn cmd_assign_role( }; let mut layout = fetch_layout(rpc_cli, rpc_host).await?; - let all_nodes = layout.all_nodes().into_owned(); + let all_nodes = layout.get_all_nodes(); let added_nodes = args .node_ids @@ -331,7 +331,6 @@ pub async fn send_layout( rpc_host: NodeID, mut layout: LayoutHistory, ) -> Result<(), Error> { - layout.update_hashes(); rpc_cli .call( &rpc_host, -- cgit v1.2.3 From 33c8a489b0a9c0e869282bfc19c548f5a3e02e8c Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 15 Nov 2023 15:40:44 +0100 Subject: layou: implement ack locking --- src/garage/cli/layout.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/garage') diff --git a/src/garage/cli/layout.rs b/src/garage/cli/layout.rs index 51774314..0be8278f 100644 --- a/src/garage/cli/layout.rs +++ b/src/garage/cli/layout.rs @@ -329,7 +329,7 @@ pub async fn fetch_layout( pub async fn send_layout( rpc_cli: &Endpoint, rpc_host: NodeID, - mut layout: LayoutHistory, + layout: LayoutHistory, ) -> Result<(), Error> { rpc_cli .call( -- cgit v1.2.3 From 539a920313fff010b8a4291aeef58ec9a14ee635 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 27 Nov 2023 13:18:59 +0100 Subject: cli: show when nodes are draining metadata --- src/garage/cli/cmd.rs | 172 +++++++++++++++++++++++++++++++------------------- 1 file changed, 108 insertions(+), 64 deletions(-) (limited to 'src/garage') diff --git a/src/garage/cli/cmd.rs b/src/garage/cli/cmd.rs index 1a054025..c99243b9 100644 --- a/src/garage/cli/cmd.rs +++ b/src/garage/cli/cmd.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::time::Duration; use format_table::format_table; @@ -62,35 +62,69 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> let mut healthy_nodes = vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tDataAvail".to_string()]; for adv in status.iter().filter(|adv| adv.is_up) { - match layout.current().roles.get(&adv.id) { - Some(NodeRoleV(Some(cfg))) => { - let data_avail = match &adv.status.data_disk_avail { - _ if cfg.capacity.is_none() => "N/A".into(), - Some((avail, total)) => { - let pct = (*avail as f64) / (*total as f64) * 100.; - let avail = bytesize::ByteSize::b(*avail); - format!("{} ({:.1}%)", avail, pct) - } - None => "?".into(), - }; + if let Some(NodeRoleV(Some(cfg))) = layout.current().roles.get(&adv.id) { + let data_avail = match &adv.status.data_disk_avail { + _ if cfg.capacity.is_none() => "N/A".into(), + Some((avail, total)) => { + let pct = (*avail as f64) / (*total as f64) * 100.; + let avail = bytesize::ByteSize::b(*avail); + format!("{} ({:.1}%)", avail, pct) + } + None => "?".into(), + }; + healthy_nodes.push(format!( + "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{data_avail}", + id = adv.id, + host = adv.status.hostname, + addr = adv.addr, + tags = cfg.tags.join(","), + zone = cfg.zone, + capacity = cfg.capacity_string(), + data_avail = data_avail, + )); + } else { + let prev_role = layout + .versions + .iter() + .rev() + .find_map(|x| match x.roles.get(&adv.id) { + Some(NodeRoleV(Some(cfg))) => Some(cfg), + _ => None, + }); + let historic_role = + layout + .old_versions + .iter() + .rev() + .find_map(|x| match x.roles.get(&adv.id) { + Some(NodeRoleV(Some(cfg))) => Some(cfg), + _ => None, + }); + if let Some(cfg) = prev_role { healthy_nodes.push(format!( - "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{data_avail}", + "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\tdraining metadata...", id = adv.id, host = adv.status.hostname, addr = adv.addr, tags = cfg.tags.join(","), zone = cfg.zone, - capacity = cfg.capacity_string(), - data_avail = data_avail, )); - } - _ => { + } else if let Some(cfg) = historic_role { + healthy_nodes.push(format!( + "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\tremoved, metadata drained", + id = adv.id, + host = adv.status.hostname, + addr = adv.addr, + tags = cfg.tags.join(","), + zone = cfg.zone, + )); + } else { let new_role = match layout.staging.get().roles.get(&adv.id) { - Some(NodeRoleV(Some(_))) => "(pending)", + Some(NodeRoleV(Some(_))) => "pending...", _ => "NO ROLE ASSIGNED", }; healthy_nodes.push(format!( - "{id:?}\t{h}\t{addr}\t{new_role}", + "{id:?}\t{h}\t{addr}\t\t\t{new_role}", id = adv.id, h = adv.status.hostname, addr = adv.addr, @@ -101,55 +135,65 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> } format_table(healthy_nodes); - let status_keys = status.iter().map(|adv| adv.id).collect::>(); - let failure_case_1 = status.iter().any(|adv| { - !adv.is_up - && matches!( - layout.current().roles.get(&adv.id), - Some(NodeRoleV(Some(_))) - ) - }); - let failure_case_2 = layout - .current() - .roles - .items() + // Determine which nodes are unhealthy and print that to stdout + let status_map = status .iter() - .any(|(id, _, v)| !status_keys.contains(id) && v.0.is_some()); - if failure_case_1 || failure_case_2 { - println!("\n==== FAILED NODES ===="); - let mut failed_nodes = - vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tLast seen".to_string()]; - for adv in status.iter().filter(|adv| !adv.is_up) { - if let Some(NodeRoleV(Some(cfg))) = layout.current().roles.get(&adv.id) { - let tf = timeago::Formatter::new(); - failed_nodes.push(format!( - "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{last_seen}", - id = adv.id, - host = adv.status.hostname, - addr = adv.addr, - tags = cfg.tags.join(","), - zone = cfg.zone, - capacity = cfg.capacity_string(), - last_seen = adv - .last_seen_secs_ago - .map(|s| tf.convert(Duration::from_secs(s))) - .unwrap_or_else(|| "never seen".into()), - )); + .map(|adv| (adv.id, adv)) + .collect::>(); + + let tf = timeago::Formatter::new(); + let mut failed_nodes = + vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tLast seen".to_string()]; + let mut listed = HashSet::new(); + for ver in layout.versions.iter().rev() { + for (node, _, role) in ver.roles.items().iter() { + let cfg = match role { + NodeRoleV(Some(role)) if role.capacity.is_some() => role, + _ => continue, + }; + + if listed.contains(node) { + continue; } - } - for (id, _, role_v) in layout.current().roles.items().iter() { - if let NodeRoleV(Some(cfg)) = role_v { - if !status_keys.contains(id) { - failed_nodes.push(format!( - "{id:?}\t??\t??\t[{tags}]\t{zone}\t{capacity}\tnever seen", - id = id, - tags = cfg.tags.join(","), - zone = cfg.zone, - capacity = cfg.capacity_string(), - )); - } + listed.insert(*node); + + let adv = status_map.get(node); + if adv.map(|x| x.is_up).unwrap_or(false) { + continue; } + + // Node is in a layout version, is not a gateway node, and is not up: + // it is in a failed state, add proper line to the output + let (host, addr, last_seen) = match adv { + Some(adv) => ( + adv.status.hostname.as_str(), + adv.addr.to_string(), + adv.last_seen_secs_ago + .map(|s| tf.convert(Duration::from_secs(s))) + .unwrap_or_else(|| "never seen".into()), + ), + None => ("??", "??".into(), "never seen".into()), + }; + let capacity = if ver.version == layout.current().version { + cfg.capacity_string() + } else { + "draining metadata...".to_string() + }; + failed_nodes.push(format!( + "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{last_seen}", + id = node, + host = host, + addr = addr, + tags = cfg.tags.join(","), + zone = cfg.zone, + capacity = capacity, + last_seen = last_seen, + )); } + } + + if failed_nodes.len() > 1 { + println!("\n==== FAILED NODES ===="); format_table(failed_nodes); } -- cgit v1.2.3 From 11e6fef93ce3ca56584fc99223b71da77d320dd7 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 27 Nov 2023 16:17:41 +0100 Subject: cli: add layout history and layout assume-sync commands --- src/garage/cli/cmd.rs | 14 +++++- src/garage/cli/layout.rs | 111 ++++++++++++++++++++++++++++++++++++++++++++++ src/garage/cli/structs.rs | 16 +++++++ 3 files changed, 139 insertions(+), 2 deletions(-) (limited to 'src/garage') diff --git a/src/garage/cli/cmd.rs b/src/garage/cli/cmd.rs index c99243b9..08ed00cf 100644 --- a/src/garage/cli/cmd.rs +++ b/src/garage/cli/cmd.rs @@ -135,13 +135,14 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> } format_table(healthy_nodes); - // Determine which nodes are unhealthy and print that to stdout + // Determine which nodes are unhealthy and print that to stdout let status_map = status .iter() .map(|adv| (adv.id, adv)) .collect::>(); let tf = timeago::Formatter::new(); + let mut drain_msg = false; let mut failed_nodes = vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tLast seen".to_string()]; let mut listed = HashSet::new(); @@ -163,7 +164,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> } // Node is in a layout version, is not a gateway node, and is not up: - // it is in a failed state, add proper line to the output + // it is in a failed state, add proper line to the output let (host, addr, last_seen) = match adv { Some(adv) => ( adv.status.hostname.as_str(), @@ -177,6 +178,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> let capacity = if ver.version == layout.current().version { cfg.capacity_string() } else { + drain_msg = true; "draining metadata...".to_string() }; failed_nodes.push(format!( @@ -195,6 +197,14 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> if failed_nodes.len() > 1 { println!("\n==== FAILED NODES ===="); format_table(failed_nodes); + if drain_msg { + println!(); + println!("Your cluster is expecting to drain data from nodes that are currently unavailable."); + println!("If these nodes are definitely dead, please review the layout history with"); + println!( + "`garage layout history` and use `garage layout assume-sync` to force progress." + ); + } } if print_staging_role_changes(&layout) { diff --git a/src/garage/cli/layout.rs b/src/garage/cli/layout.rs index 0be8278f..3c7843bd 100644 --- a/src/garage/cli/layout.rs +++ b/src/garage/cli/layout.rs @@ -32,6 +32,10 @@ pub async fn cli_layout_command_dispatch( LayoutOperation::Config(config_opt) => { cmd_config_layout(system_rpc_endpoint, rpc_host, config_opt).await } + LayoutOperation::History => cmd_layout_history(system_rpc_endpoint, rpc_host).await, + LayoutOperation::AssumeSync(assume_sync_opt) => { + cmd_layout_assume_sync(system_rpc_endpoint, rpc_host, assume_sync_opt).await + } } } @@ -311,6 +315,113 @@ pub async fn cmd_config_layout( Ok(()) } +pub async fn cmd_layout_history( + rpc_cli: &Endpoint, + rpc_host: NodeID, +) -> Result<(), Error> { + let layout = fetch_layout(rpc_cli, rpc_host).await?; + let min_stored = layout.min_stored(); + + println!("==== LAYOUT HISTORY ===="); + let mut table = vec!["Version\tStatus\tStorage nodes\tGateway nodes".to_string()]; + for ver in layout + .versions + .iter() + .rev() + .chain(layout.old_versions.iter().rev()) + { + let status = if ver.version == layout.current().version { + "current" + } else if ver.version >= min_stored { + "draining" + } else { + "historical" + }; + table.push(format!( + "#{}\t{}\t{}\t{}", + ver.version, + status, + ver.roles + .items() + .iter() + .filter(|(_, _, x)| matches!(x, NodeRoleV(Some(c)) if c.capacity.is_some())) + .count(), + ver.roles + .items() + .iter() + .filter(|(_, _, x)| matches!(x, NodeRoleV(Some(c)) if c.capacity.is_none())) + .count(), + )); + } + format_table(table); + + println!(); + println!("==== UPDATE TRACKERS ===="); + println!("This is the internal data that Garage stores to know which nodes have what data."); + println!(); + let mut table = vec!["Node\tAck\tSync\tSync_ack".to_string()]; + let all_nodes = layout.get_all_nodes(); + for node in all_nodes.iter() { + table.push(format!( + "{:?}\t#{}\t#{}\t#{}", + node, + layout.update_trackers.ack_map.get(node), + layout.update_trackers.sync_map.get(node), + layout.update_trackers.sync_ack_map.get(node), + )); + } + table[1..].sort(); + format_table(table); + + if layout.versions.len() > 1 { + println!(); + println!( + "If some nodes are not catching up to the latest layout version in the update tracker," + ); + println!("it might be because they are offline or unable to complete a sync successfully."); + println!( + "You may force progress using `garage layout assume-sync --version {}`", + layout.current().version + ); + } + + Ok(()) +} + +pub async fn cmd_layout_assume_sync( + rpc_cli: &Endpoint, + rpc_host: NodeID, + opt: AssumeSyncOpt, +) -> Result<(), Error> { + let mut layout = fetch_layout(rpc_cli, rpc_host).await?; + + let min_v = layout.min_stored(); + if opt.version <= min_v || opt.version > layout.current().version { + return Err(Error::Message(format!( + "Invalid version, you may use the following version numbers: {}", + (min_v + 1..=layout.current().version) + .map(|x| x.to_string()) + .collect::>() + .join(" ") + ))); + } + + let all_nodes = layout.get_all_nodes(); + for node in all_nodes.iter() { + layout.update_trackers.ack_map.set_max(*node, opt.version); + layout.update_trackers.sync_map.set_max(*node, opt.version); + layout + .update_trackers + .sync_ack_map + .set_max(*node, opt.version); + } + + send_layout(rpc_cli, rpc_host, layout).await?; + println!("Success."); + + Ok(()) +} + // --- utility --- pub async fn fetch_layout( diff --git a/src/garage/cli/structs.rs b/src/garage/cli/structs.rs index 3badc447..c4b400f4 100644 --- a/src/garage/cli/structs.rs +++ b/src/garage/cli/structs.rs @@ -112,6 +112,14 @@ pub enum LayoutOperation { /// Revert staged changes to cluster layout #[structopt(name = "revert", version = garage_version())] Revert(RevertLayoutOpt), + + /// View the history of layouts in the cluster + #[structopt(name = "history", version = garage_version())] + History, + + /// Assume all nodes are synchronized up to a certain layout version + #[structopt(name = "assume-sync", version = garage_version())] + AssumeSync(AssumeSyncOpt), } #[derive(StructOpt, Debug)] @@ -169,6 +177,14 @@ pub struct RevertLayoutOpt { pub(crate) yes: bool, } +#[derive(StructOpt, Debug)] +pub struct AssumeSyncOpt { + /// Version number of the layout to assume is currently up-to-date. + /// This will generally be the current layout version. + #[structopt(long = "version")] + pub(crate) version: u64, +} + #[derive(Serialize, Deserialize, StructOpt, Debug)] pub enum BucketOperation { /// List buckets -- cgit v1.2.3 From c539077d30809c9d2232aa0fe107a9652dcb7c26 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 27 Nov 2023 16:20:19 +0100 Subject: cli: remove historic layout info from status --- src/garage/cli/cmd.rs | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'src/garage') diff --git a/src/garage/cli/cmd.rs b/src/garage/cli/cmd.rs index 08ed00cf..4d1306b6 100644 --- a/src/garage/cli/cmd.rs +++ b/src/garage/cli/cmd.rs @@ -91,15 +91,6 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> Some(NodeRoleV(Some(cfg))) => Some(cfg), _ => None, }); - let historic_role = - layout - .old_versions - .iter() - .rev() - .find_map(|x| match x.roles.get(&adv.id) { - Some(NodeRoleV(Some(cfg))) => Some(cfg), - _ => None, - }); if let Some(cfg) = prev_role { healthy_nodes.push(format!( "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\tdraining metadata...", @@ -109,15 +100,6 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> tags = cfg.tags.join(","), zone = cfg.zone, )); - } else if let Some(cfg) = historic_role { - healthy_nodes.push(format!( - "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\tremoved, metadata drained", - id = adv.id, - host = adv.status.hostname, - addr = adv.addr, - tags = cfg.tags.join(","), - zone = cfg.zone, - )); } else { let new_role = match layout.staging.get().roles.get(&adv.id) { Some(NodeRoleV(Some(_))) => "pending...", -- cgit v1.2.3 From c04dd8788a3764da2f307b1d10c2d56b7b0e4a61 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 28 Nov 2023 14:25:04 +0100 Subject: admin: more info in admin GetClusterStatus --- src/garage/admin/mod.rs | 2 +- src/garage/cli/cmd.rs | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'src/garage') diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs index 77918a0f..da4226cf 100644 --- a/src/garage/admin/mod.rs +++ b/src/garage/admin/mod.rs @@ -295,7 +295,7 @@ impl AdminRpcHandler { let info = node_info.get(id); let status = info.map(|x| &x.status); let role = layout.current().roles.get(id).and_then(|x| x.0.as_ref()); - let hostname = status.map(|x| x.hostname.as_str()).unwrap_or("?"); + let hostname = status.and_then(|x| x.hostname.as_deref()).unwrap_or("?"); let zone = role.map(|x| x.zone.as_str()).unwrap_or("?"); let capacity = role .map(|x| x.capacity_string()) diff --git a/src/garage/cli/cmd.rs b/src/garage/cli/cmd.rs index 4d1306b6..c7f0ad2b 100644 --- a/src/garage/cli/cmd.rs +++ b/src/garage/cli/cmd.rs @@ -62,6 +62,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> let mut healthy_nodes = vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tDataAvail".to_string()]; for adv in status.iter().filter(|adv| adv.is_up) { + let host = adv.status.hostname.as_deref().unwrap_or("?"); if let Some(NodeRoleV(Some(cfg))) = layout.current().roles.get(&adv.id) { let data_avail = match &adv.status.data_disk_avail { _ if cfg.capacity.is_none() => "N/A".into(), @@ -75,7 +76,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> healthy_nodes.push(format!( "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{data_avail}", id = adv.id, - host = adv.status.hostname, + host = host, addr = adv.addr, tags = cfg.tags.join(","), zone = cfg.zone, @@ -95,7 +96,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> healthy_nodes.push(format!( "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\tdraining metadata...", id = adv.id, - host = adv.status.hostname, + host = host, addr = adv.addr, tags = cfg.tags.join(","), zone = cfg.zone, @@ -108,7 +109,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> healthy_nodes.push(format!( "{id:?}\t{h}\t{addr}\t\t\t{new_role}", id = adv.id, - h = adv.status.hostname, + h = host, addr = adv.addr, new_role = new_role, )); @@ -149,7 +150,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> // it is in a failed state, add proper line to the output let (host, addr, last_seen) = match adv { Some(adv) => ( - adv.status.hostname.as_str(), + adv.status.hostname.as_deref().unwrap_or("?"), adv.addr.to_string(), adv.last_seen_secs_ago .map(|s| tf.convert(Duration::from_secs(s))) -- cgit v1.2.3 From aa59059a910eb6e1e824b84413a66909d697ef8a Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 7 Dec 2023 11:50:00 +0100 Subject: layout cli: safer skip-dead-nodes command --- src/garage/cli/cmd.rs | 23 ++++++++++++++++------- src/garage/cli/layout.rs | 35 +++++++++++++++++++++++++---------- src/garage/cli/structs.rs | 12 ++++++++---- 3 files changed, 49 insertions(+), 21 deletions(-) (limited to 'src/garage') diff --git a/src/garage/cli/cmd.rs b/src/garage/cli/cmd.rs index c7f0ad2b..196c0cb3 100644 --- a/src/garage/cli/cmd.rs +++ b/src/garage/cli/cmd.rs @@ -49,13 +49,7 @@ pub async fn cli_command_dispatch( } pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> Result<(), Error> { - let status = match rpc_cli - .call(&rpc_host, SystemRpc::GetKnownNodes, PRIO_NORMAL) - .await?? - { - SystemRpc::ReturnKnownNodes(nodes) => nodes, - resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), - }; + let status = fetch_status(rpc_cli, rpc_host).await?; let layout = fetch_layout(rpc_cli, rpc_host).await?; println!("==== HEALTHY NODES ===="); @@ -268,3 +262,18 @@ pub async fn cmd_admin( } Ok(()) } + +// ---- utility ---- + +pub async fn fetch_status( + rpc_cli: &Endpoint, + rpc_host: NodeID, +) -> Result, Error> { + match rpc_cli + .call(&rpc_host, SystemRpc::GetKnownNodes, PRIO_NORMAL) + .await?? + { + SystemRpc::ReturnKnownNodes(nodes) => Ok(nodes), + resp => Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), + } +} diff --git a/src/garage/cli/layout.rs b/src/garage/cli/layout.rs index 3c7843bd..cdf77c04 100644 --- a/src/garage/cli/layout.rs +++ b/src/garage/cli/layout.rs @@ -33,8 +33,8 @@ pub async fn cli_layout_command_dispatch( cmd_config_layout(system_rpc_endpoint, rpc_host, config_opt).await } LayoutOperation::History => cmd_layout_history(system_rpc_endpoint, rpc_host).await, - LayoutOperation::AssumeSync(assume_sync_opt) => { - cmd_layout_assume_sync(system_rpc_endpoint, rpc_host, assume_sync_opt).await + LayoutOperation::SkipDeadNodes(assume_sync_opt) => { + cmd_layout_skip_dead_nodes(system_rpc_endpoint, rpc_host, assume_sync_opt).await } } } @@ -388,13 +388,21 @@ pub async fn cmd_layout_history( Ok(()) } -pub async fn cmd_layout_assume_sync( +pub async fn cmd_layout_skip_dead_nodes( rpc_cli: &Endpoint, rpc_host: NodeID, - opt: AssumeSyncOpt, + opt: SkipDeadNodesOpt, ) -> Result<(), Error> { + let status = fetch_status(rpc_cli, rpc_host).await?; let mut layout = fetch_layout(rpc_cli, rpc_host).await?; + if layout.versions.len() == 1 { + return Err(Error::Message( + "This command cannot be called when there is only one live cluster layout version" + .into(), + )); + } + let min_v = layout.min_stored(); if opt.version <= min_v || opt.version > layout.current().version { return Err(Error::Message(format!( @@ -408,12 +416,19 @@ pub async fn cmd_layout_assume_sync( let all_nodes = layout.get_all_nodes(); for node in all_nodes.iter() { - layout.update_trackers.ack_map.set_max(*node, opt.version); - layout.update_trackers.sync_map.set_max(*node, opt.version); - layout - .update_trackers - .sync_ack_map - .set_max(*node, opt.version); + if status.iter().any(|x| x.id == *node && x.is_up) { + continue; + } + + if layout.update_trackers.ack_map.set_max(*node, opt.version) { + println!("Increased the ACK tracker for node {:?}", node); + } + + if opt.allow_missing_data { + if layout.update_trackers.sync_map.set_max(*node, opt.version) { + println!("Increased the SYNC tracker for node {:?}", node); + } + } } send_layout(rpc_cli, rpc_host, layout).await?; diff --git a/src/garage/cli/structs.rs b/src/garage/cli/structs.rs index c4b400f4..6bc3da22 100644 --- a/src/garage/cli/structs.rs +++ b/src/garage/cli/structs.rs @@ -117,9 +117,9 @@ pub enum LayoutOperation { #[structopt(name = "history", version = garage_version())] History, - /// Assume all nodes are synchronized up to a certain layout version - #[structopt(name = "assume-sync", version = garage_version())] - AssumeSync(AssumeSyncOpt), + /// Skip dead nodes when awaiting for a new layout version to be synchronized + #[structopt(name = "skip-dead-nodes", version = garage_version())] + SkipDeadNodes(SkipDeadNodesOpt), } #[derive(StructOpt, Debug)] @@ -178,11 +178,15 @@ pub struct RevertLayoutOpt { } #[derive(StructOpt, Debug)] -pub struct AssumeSyncOpt { +pub struct SkipDeadNodesOpt { /// Version number of the layout to assume is currently up-to-date. /// This will generally be the current layout version. #[structopt(long = "version")] pub(crate) version: u64, + /// Allow the skip even if a quorum of ndoes could not be found for + /// the data among the remaining nodes + #[structopt(long = "allow-missing-data")] + pub(crate) allow_missing_data: bool, } #[derive(Serialize, Deserialize, StructOpt, Debug)] -- cgit v1.2.3 From 9cecea64d4509e95ac9793b29c947e2ecf9bb0b8 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 7 Dec 2023 14:27:53 +0100 Subject: layout: allow sync update tracker to progress with only quorums --- src/garage/cli/layout.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/garage') diff --git a/src/garage/cli/layout.rs b/src/garage/cli/layout.rs index cdf77c04..fac826f5 100644 --- a/src/garage/cli/layout.rs +++ b/src/garage/cli/layout.rs @@ -365,9 +365,9 @@ pub async fn cmd_layout_history( table.push(format!( "{:?}\t#{}\t#{}\t#{}", node, - layout.update_trackers.ack_map.get(node), - layout.update_trackers.sync_map.get(node), - layout.update_trackers.sync_ack_map.get(node), + layout.update_trackers.ack_map.get(node, min_stored), + layout.update_trackers.sync_map.get(node, min_stored), + layout.update_trackers.sync_ack_map.get(node, min_stored), )); } table[1..].sort(); -- cgit v1.2.3 From 7f2541101f15614c79020b35d3d7dab767c32676 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 8 Dec 2023 11:24:23 +0100 Subject: cli: improvements to the layout commands when multiple layouts are live --- src/garage/admin/mod.rs | 3 +-- src/garage/cli/cmd.rs | 4 +-- src/garage/cli/layout.rs | 67 +++++++++++++++++++++++++++++++----------------- src/garage/cli/util.rs | 4 ++- 4 files changed, 49 insertions(+), 29 deletions(-) (limited to 'src/garage') diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs index da4226cf..de7851e1 100644 --- a/src/garage/admin/mod.rs +++ b/src/garage/admin/mod.rs @@ -274,8 +274,7 @@ impl AdminRpcHandler { fn gather_cluster_stats(&self) -> String { let mut ret = String::new(); - // Gather storage node and free space statistics - // TODO: not only layout.current() ??? + // Gather storage node and free space statistics for current nodes let layout = &self.garage.system.cluster_layout(); let mut node_partition_count = HashMap::::new(); for short_id in layout.current().ring_assignment_data.iter() { diff --git a/src/garage/cli/cmd.rs b/src/garage/cli/cmd.rs index 196c0cb3..fb6dface 100644 --- a/src/garage/cli/cmd.rs +++ b/src/garage/cli/cmd.rs @@ -179,7 +179,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> println!("Your cluster is expecting to drain data from nodes that are currently unavailable."); println!("If these nodes are definitely dead, please review the layout history with"); println!( - "`garage layout history` and use `garage layout assume-sync` to force progress." + "`garage layout history` and use `garage layout skip-dead-nodes` to force progress." ); } } @@ -274,6 +274,6 @@ pub async fn fetch_status( .await?? { SystemRpc::ReturnKnownNodes(nodes) => Ok(nodes), - resp => Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), + resp => Err(Error::unexpected_rpc_message(resp)), } } diff --git a/src/garage/cli/layout.rs b/src/garage/cli/layout.rs index fac826f5..f76e33c5 100644 --- a/src/garage/cli/layout.rs +++ b/src/garage/cli/layout.rs @@ -354,35 +354,44 @@ pub async fn cmd_layout_history( )); } format_table(table); - - println!(); - println!("==== UPDATE TRACKERS ===="); - println!("This is the internal data that Garage stores to know which nodes have what data."); println!(); - let mut table = vec!["Node\tAck\tSync\tSync_ack".to_string()]; - let all_nodes = layout.get_all_nodes(); - for node in all_nodes.iter() { - table.push(format!( - "{:?}\t#{}\t#{}\t#{}", - node, - layout.update_trackers.ack_map.get(node, min_stored), - layout.update_trackers.sync_map.get(node, min_stored), - layout.update_trackers.sync_ack_map.get(node, min_stored), - )); - } - table[1..].sort(); - format_table(table); if layout.versions.len() > 1 { + println!("==== UPDATE TRACKERS ===="); + println!("Several layout versions are currently live in the version, and data is being migrated."); + println!( + "This is the internal data that Garage stores to know which nodes have what data." + ); + println!(); + let mut table = vec!["Node\tAck\tSync\tSync_ack".to_string()]; + let all_nodes = layout.get_all_nodes(); + for node in all_nodes.iter() { + table.push(format!( + "{:?}\t#{}\t#{}\t#{}", + node, + layout.update_trackers.ack_map.get(node, min_stored), + layout.update_trackers.sync_map.get(node, min_stored), + layout.update_trackers.sync_ack_map.get(node, min_stored), + )); + } + table[1..].sort(); + format_table(table); + println!(); println!( - "If some nodes are not catching up to the latest layout version in the update tracker," + "If some nodes are not catching up to the latest layout version in the update trackers," ); println!("it might be because they are offline or unable to complete a sync successfully."); println!( - "You may force progress using `garage layout assume-sync --version {}`", + "You may force progress using `garage layout skip-dead-nodes --version {}`", layout.current().version ); + } else { + println!("Your cluster is currently in a stable state with a single live layout version."); + println!("No metadata migration is in progress. Note that the migration of data blocks is not tracked,"); + println!( + "so you might want to keep old nodes online until their data directories become empty." + ); } Ok(()) @@ -415,6 +424,7 @@ pub async fn cmd_layout_skip_dead_nodes( } let all_nodes = layout.get_all_nodes(); + let mut did_something = false; for node in all_nodes.iter() { if status.iter().any(|x| x.id == *node && x.is_up) { continue; @@ -422,19 +432,28 @@ pub async fn cmd_layout_skip_dead_nodes( if layout.update_trackers.ack_map.set_max(*node, opt.version) { println!("Increased the ACK tracker for node {:?}", node); + did_something = true; } if opt.allow_missing_data { if layout.update_trackers.sync_map.set_max(*node, opt.version) { println!("Increased the SYNC tracker for node {:?}", node); + did_something = true; } } } - send_layout(rpc_cli, rpc_host, layout).await?; - println!("Success."); - - Ok(()) + if did_something { + send_layout(rpc_cli, rpc_host, layout).await?; + println!("Success."); + Ok(()) + } else if !opt.allow_missing_data { + Err(Error::Message("Nothing was done, try passing the `--allow-missing-data` flag to force progress even when not enough nodes can complete a metadata sync.".into())) + } else { + Err(Error::Message( + "Sorry, there is nothing I can do for you. Please wait patiently. If you ask for help, please send the output of the `garage layout history` command.".into(), + )) + } } // --- utility --- @@ -448,7 +467,7 @@ pub async fn fetch_layout( .await?? { SystemRpc::AdvertiseClusterLayout(t) => Ok(t), - resp => Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), + resp => Err(Error::unexpected_rpc_message(resp)), } } diff --git a/src/garage/cli/util.rs b/src/garage/cli/util.rs index 2232d395..0511e2b1 100644 --- a/src/garage/cli/util.rs +++ b/src/garage/cli/util.rs @@ -450,6 +450,8 @@ pub fn print_block_info( if refcount != nondeleted_count { println!(); - println!("Warning: refcount does not match number of non-deleted versions"); + println!( + "Warning: refcount does not match number of non-deleted versions (see issue #644)." + ); } } -- cgit v1.2.3 From db48dd3d6c1f9e86a62e9b8edfce2c1620bcd5f3 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 11 Jan 2024 12:05:51 +0100 Subject: bump crate versions to 0.10.0 --- src/garage/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/garage') diff --git a/src/garage/Cargo.toml b/src/garage/Cargo.toml index 7c3a79cb..dce7ea73 100644 --- a/src/garage/Cargo.toml +++ b/src/garage/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "garage" -version = "0.9.0" +version = "0.10.0" authors = ["Alex Auvolat "] edition = "2018" license = "AGPL-3.0" -- cgit v1.2.3 From c1769bbe69f723fb3980cf4fdac7615cfb782720 Mon Sep 17 00:00:00 2001 From: Yureka Date: Mon, 4 Mar 2024 19:58:32 +0100 Subject: ReplicationMode -> ConsistencyMode+ReplicationFactor --- src/garage/secrets.rs | 6 +++--- src/garage/tests/common/garage.rs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/garage') diff --git a/src/garage/secrets.rs b/src/garage/secrets.rs index c3d704aa..8d2ff475 100644 --- a/src/garage/secrets.rs +++ b/src/garage/secrets.rs @@ -163,7 +163,7 @@ mod tests { r#" metadata_dir = "/tmp/garage/meta" data_dir = "/tmp/garage/data" - replication_mode = "3" + replication_factor = 3 rpc_bind_addr = "[::]:3901" rpc_secret_file = "{}" @@ -185,7 +185,7 @@ mod tests { r#" metadata_dir = "/tmp/garage/meta" data_dir = "/tmp/garage/data" - replication_mode = "3" + replication_factor = 3 rpc_bind_addr = "[::]:3901" rpc_secret_file = "{}" allow_world_readable_secrets = true @@ -296,7 +296,7 @@ mod tests { r#" metadata_dir = "/tmp/garage/meta" data_dir = "/tmp/garage/data" - replication_mode = "3" + replication_factor = 3 rpc_bind_addr = "[::]:3901" rpc_secret= "dummy" rpc_secret_file = "dummy" diff --git a/src/garage/tests/common/garage.rs b/src/garage/tests/common/garage.rs index ebc82f37..f1c1efc8 100644 --- a/src/garage/tests/common/garage.rs +++ b/src/garage/tests/common/garage.rs @@ -54,7 +54,7 @@ metadata_dir = "{path}/meta" data_dir = "{path}/data" db_engine = "lmdb" -replication_mode = "1" +replication_factor = 1 rpc_bind_addr = "127.0.0.1:{rpc_port}" rpc_public_addr = "127.0.0.1:{rpc_port}" -- cgit v1.2.3 From fa4878bad6434f33ab9e0f663d8529e0db66d7e6 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 28 Feb 2024 14:09:41 +0100 Subject: [sse-c] Testing for SSE-C encryption --- src/garage/tests/s3/mod.rs | 1 + src/garage/tests/s3/ssec.rs | 455 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 456 insertions(+) create mode 100644 src/garage/tests/s3/ssec.rs (limited to 'src/garage') diff --git a/src/garage/tests/s3/mod.rs b/src/garage/tests/s3/mod.rs index 4ebc4914..e75b1397 100644 --- a/src/garage/tests/s3/mod.rs +++ b/src/garage/tests/s3/mod.rs @@ -3,5 +3,6 @@ mod multipart; mod objects; mod presigned; mod simple; +mod ssec; mod streaming_signature; mod website; diff --git a/src/garage/tests/s3/ssec.rs b/src/garage/tests/s3/ssec.rs new file mode 100644 index 00000000..d8f11950 --- /dev/null +++ b/src/garage/tests/s3/ssec.rs @@ -0,0 +1,455 @@ +use crate::common::{self, Context}; +use aws_sdk_s3::primitives::ByteStream; +use aws_sdk_s3::types::{CompletedMultipartUpload, CompletedPart}; + +const SSEC_KEY: &str = "u8zCfnEyt5Imo/krN+sxA1DQXxLWtPJavU6T6gOVj1Y="; +const SSEC_KEY_MD5: &str = "jMGbs3GyZkYjJUP6q5jA7g=="; +const SSEC_KEY2: &str = "XkYVk4Z3vVDO2yJaUqCAEZX6lL10voMxtV06d8my/eU="; +const SSEC_KEY2_MD5: &str = "kedo2ab8J1MCjHwJuLTJHw=="; + +const SZ_2MB: usize = 2 * 1024 * 1024; + +#[tokio::test] +async fn test_ssec_object() { + let ctx = common::context(); + let bucket = ctx.create_bucket("sse-c"); + + let bytes1 = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".to_vec(); + let bytes2 = (0..400000) + .map(|x| ((x * 3792) % 256) as u8) + .collect::>(); + + for data in vec![bytes1, bytes2] { + let stream = ByteStream::new(data.clone().into()); + + // Write encrypted object + let r = ctx + .client + .put_object() + .bucket(&bucket) + .key("testobj") + .sse_customer_algorithm("AES256") + .sse_customer_key(SSEC_KEY) + .sse_customer_key_md5(SSEC_KEY_MD5) + .body(stream) + .send() + .await + .unwrap(); + assert_eq!(r.sse_customer_algorithm, Some("AES256".into())); + assert_eq!(r.sse_customer_key_md5, Some(SSEC_KEY_MD5.into())); + + test_read_encrypted( + &ctx, + &bucket, + "testobj", + &data, + SSEC_KEY, + SSEC_KEY_MD5, + SSEC_KEY2, + SSEC_KEY2_MD5, + ) + .await; + + // Test copy from encrypted to non-encrypted + let r = ctx + .client + .copy_object() + .bucket(&bucket) + .key("test-copy-enc-dec") + .copy_source(format!("{}/{}", bucket, "testobj")) + .copy_source_sse_customer_algorithm("AES256") + .copy_source_sse_customer_key(SSEC_KEY) + .copy_source_sse_customer_key_md5(SSEC_KEY_MD5) + .send() + .await + .unwrap(); + assert_eq!(r.sse_customer_algorithm, None); + assert_eq!(r.sse_customer_key_md5, None); + + // Test read decrypted file + let r = ctx + .client + .get_object() + .bucket(&bucket) + .key("test-copy-enc-dec") + .send() + .await + .unwrap(); + assert_bytes_eq!(r.body, &data); + assert_eq!(r.sse_customer_algorithm, None); + assert_eq!(r.sse_customer_key_md5, None); + + // Test copy from non-encrypted to encrypted + let r = ctx + .client + .copy_object() + .bucket(&bucket) + .key("test-copy-enc-dec-enc") + .copy_source(format!("{}/test-copy-enc-dec", bucket)) + .sse_customer_algorithm("AES256") + .sse_customer_key(SSEC_KEY2) + .sse_customer_key_md5(SSEC_KEY2_MD5) + .send() + .await + .unwrap(); + assert_eq!(r.sse_customer_algorithm, Some("AES256".into())); + assert_eq!(r.sse_customer_key_md5, Some(SSEC_KEY2_MD5.into())); + + test_read_encrypted( + &ctx, + &bucket, + "test-copy-enc-dec-enc", + &data, + SSEC_KEY2, + SSEC_KEY2_MD5, + SSEC_KEY, + SSEC_KEY_MD5, + ) + .await; + + // Test copy from encrypted to encrypted with different keys + let r = ctx + .client + .copy_object() + .bucket(&bucket) + .key("test-copy-enc-enc") + .copy_source(format!("{}/{}", bucket, "testobj")) + .copy_source_sse_customer_algorithm("AES256") + .copy_source_sse_customer_key(SSEC_KEY) + .copy_source_sse_customer_key_md5(SSEC_KEY_MD5) + .sse_customer_algorithm("AES256") + .sse_customer_key(SSEC_KEY2) + .sse_customer_key_md5(SSEC_KEY2_MD5) + .send() + .await + .unwrap(); + assert_eq!(r.sse_customer_algorithm, Some("AES256".into())); + assert_eq!(r.sse_customer_key_md5, Some(SSEC_KEY2_MD5.into())); + test_read_encrypted( + &ctx, + &bucket, + "test-copy-enc-enc", + &data, + SSEC_KEY2, + SSEC_KEY2_MD5, + SSEC_KEY, + SSEC_KEY_MD5, + ) + .await; + + // Test copy from encrypted to encrypted with the same key + let r = ctx + .client + .copy_object() + .bucket(&bucket) + .key("test-copy-enc-enc-same") + .copy_source(format!("{}/{}", bucket, "testobj")) + .copy_source_sse_customer_algorithm("AES256") + .copy_source_sse_customer_key(SSEC_KEY) + .copy_source_sse_customer_key_md5(SSEC_KEY_MD5) + .sse_customer_algorithm("AES256") + .sse_customer_key(SSEC_KEY) + .sse_customer_key_md5(SSEC_KEY_MD5) + .send() + .await + .unwrap(); + assert_eq!(r.sse_customer_algorithm, Some("AES256".into())); + assert_eq!(r.sse_customer_key_md5, Some(SSEC_KEY_MD5.into())); + test_read_encrypted( + &ctx, + &bucket, + "test-copy-enc-enc-same", + &data, + SSEC_KEY, + SSEC_KEY_MD5, + SSEC_KEY2, + SSEC_KEY2_MD5, + ) + .await; + } +} + +#[tokio::test] +async fn test_multipart_upload() { + let ctx = common::context(); + let bucket = ctx.create_bucket("test-ssec-mpu"); + + let u1 = vec![0x11; SZ_2MB]; + let u2 = vec![0x22; SZ_2MB]; + let u3 = vec![0x33; SZ_2MB]; + let all = [&u1[..], &u2[..], &u3[..]].concat(); + + // Test simple encrypted mpu + { + let up = ctx + .client + .create_multipart_upload() + .bucket(&bucket) + .key("a") + .sse_customer_algorithm("AES256") + .sse_customer_key(SSEC_KEY) + .sse_customer_key_md5(SSEC_KEY_MD5) + .send() + .await + .unwrap(); + assert!(up.upload_id.is_some()); + assert_eq!(up.sse_customer_algorithm, Some("AES256".into())); + assert_eq!(up.sse_customer_key_md5, Some(SSEC_KEY_MD5.into())); + + let uid = up.upload_id.as_ref().unwrap(); + + let mut etags = vec![]; + for (i, part) in vec![&u1, &u2, &u3].into_iter().enumerate() { + let pu = ctx + .client + .upload_part() + .bucket(&bucket) + .key("a") + .upload_id(uid) + .part_number((i + 1) as i32) + .sse_customer_algorithm("AES256") + .sse_customer_key(SSEC_KEY) + .sse_customer_key_md5(SSEC_KEY_MD5) + .body(ByteStream::from(part.to_vec())) + .send() + .await + .unwrap(); + etags.push(pu.e_tag.unwrap()); + } + + let mut cmp = CompletedMultipartUpload::builder(); + for (i, etag) in etags.into_iter().enumerate() { + cmp = cmp.parts( + CompletedPart::builder() + .part_number((i + 1) as i32) + .e_tag(etag) + .build(), + ); + } + + ctx.client + .complete_multipart_upload() + .bucket(&bucket) + .key("a") + .upload_id(uid) + .multipart_upload(cmp.build()) + .send() + .await + .unwrap(); + + test_read_encrypted( + &ctx, + &bucket, + "a", + &all, + SSEC_KEY, + SSEC_KEY_MD5, + SSEC_KEY2, + SSEC_KEY2_MD5, + ) + .await; + } + + // Test upload part copy from first object + { + // (setup) Upload a single part object + ctx.client + .put_object() + .bucket(&bucket) + .key("b") + .body(ByteStream::from(u1.clone())) + .sse_customer_algorithm("AES256") + .sse_customer_key(SSEC_KEY2) + .sse_customer_key_md5(SSEC_KEY2_MD5) + .send() + .await + .unwrap(); + + let up = ctx + .client + .create_multipart_upload() + .bucket(&bucket) + .key("target") + .sse_customer_algorithm("AES256") + .sse_customer_key(SSEC_KEY2) + .sse_customer_key_md5(SSEC_KEY2_MD5) + .send() + .await + .unwrap(); + let uid = up.upload_id.as_ref().unwrap(); + + let p1 = ctx + .client + .upload_part() + .bucket(&bucket) + .key("target") + .upload_id(uid) + .part_number(1) + .sse_customer_algorithm("AES256") + .sse_customer_key(SSEC_KEY2) + .sse_customer_key_md5(SSEC_KEY2_MD5) + .body(ByteStream::from(u3.clone())) + .send() + .await + .unwrap(); + + let p2 = ctx + .client + .upload_part_copy() + .bucket(&bucket) + .key("target") + .upload_id(uid) + .part_number(2) + .copy_source(format!("{}/a", bucket)) + .copy_source_range("bytes=500-550000") + .copy_source_sse_customer_algorithm("AES256") + .copy_source_sse_customer_key(SSEC_KEY) + .copy_source_sse_customer_key_md5(SSEC_KEY_MD5) + .sse_customer_algorithm("AES256") + .sse_customer_key(SSEC_KEY2) + .sse_customer_key_md5(SSEC_KEY2_MD5) + .send() + .await + .unwrap(); + + let p3 = ctx + .client + .upload_part() + .bucket(&bucket) + .key("target") + .upload_id(uid) + .part_number(3) + .sse_customer_algorithm("AES256") + .sse_customer_key(SSEC_KEY2) + .sse_customer_key_md5(SSEC_KEY2_MD5) + .body(ByteStream::from(u2.clone())) + .send() + .await + .unwrap(); + + let p4 = ctx + .client + .upload_part_copy() + .bucket(&bucket) + .key("target") + .upload_id(uid) + .part_number(4) + .copy_source(format!("{}/b", bucket)) + .copy_source_range("bytes=1500-20500") + .copy_source_sse_customer_algorithm("AES256") + .copy_source_sse_customer_key(SSEC_KEY2) + .copy_source_sse_customer_key_md5(SSEC_KEY2_MD5) + .sse_customer_algorithm("AES256") + .sse_customer_key(SSEC_KEY2) + .sse_customer_key_md5(SSEC_KEY2_MD5) + .send() + .await + .unwrap(); + + let cmp = CompletedMultipartUpload::builder() + .parts( + CompletedPart::builder() + .part_number(1) + .e_tag(p1.e_tag.unwrap()) + .build(), + ) + .parts( + CompletedPart::builder() + .part_number(2) + .e_tag(p2.copy_part_result.unwrap().e_tag.unwrap()) + .build(), + ) + .parts( + CompletedPart::builder() + .part_number(3) + .e_tag(p3.e_tag.unwrap()) + .build(), + ) + .parts( + CompletedPart::builder() + .part_number(4) + .e_tag(p4.copy_part_result.unwrap().e_tag.unwrap()) + .build(), + ) + .build(); + + ctx.client + .complete_multipart_upload() + .bucket(&bucket) + .key("target") + .upload_id(uid) + .multipart_upload(cmp) + .send() + .await + .unwrap(); + + // (check) Get object + let expected = [&u3[..], &all[500..550001], &u2[..], &u1[1500..20501]].concat(); + test_read_encrypted( + &ctx, + &bucket, + "target", + &expected, + SSEC_KEY2, + SSEC_KEY2_MD5, + SSEC_KEY, + SSEC_KEY_MD5, + ) + .await; + } +} + +async fn test_read_encrypted( + ctx: &Context, + bucket: &str, + obj_key: &str, + expected_data: &[u8], + enc_key: &str, + enc_key_md5: &str, + wrong_enc_key: &str, + wrong_enc_key_md5: &str, +) { + // Test read encrypted without key + let o = ctx + .client + .get_object() + .bucket(bucket) + .key(obj_key) + .send() + .await; + assert!( + o.is_err(), + "encrypted file could be read without encryption key" + ); + + // Test read encrypted with wrong key + let o = ctx + .client + .get_object() + .bucket(bucket) + .key(obj_key) + .sse_customer_key(wrong_enc_key) + .sse_customer_key_md5(wrong_enc_key_md5) + .send() + .await; + assert!( + o.is_err(), + "encrypted file could be read with incorrect encryption key" + ); + + // Test read encrypted with correct key + let o = ctx + .client + .get_object() + .bucket(bucket) + .key(obj_key) + .sse_customer_algorithm("AES256") + .sse_customer_key(enc_key) + .sse_customer_key_md5(enc_key_md5) + .send() + .await + .unwrap(); + assert_bytes_eq!(o.body, expected_data); + assert_eq!(o.sse_customer_algorithm, Some("AES256".into())); + assert_eq!(o.sse_customer_key_md5, Some(enc_key_md5.to_string())); +} -- cgit v1.2.3 From f537f76681760e9b2b3cc095a6031ebb59ca4733 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 8 Mar 2024 13:24:47 +0100 Subject: [rm-migration] Remove migration path from Garage v0.5 --- src/garage/admin/mod.rs | 21 --------------------- src/garage/cli/cmd.rs | 3 --- src/garage/cli/structs.rs | 22 ---------------------- 3 files changed, 46 deletions(-) (limited to 'src/garage') diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs index de7851e1..073693ed 100644 --- a/src/garage/admin/mod.rs +++ b/src/garage/admin/mod.rs @@ -27,7 +27,6 @@ use garage_model::bucket_table::*; use garage_model::garage::Garage; use garage_model::helper::error::{Error, OkOrBadRequest}; use garage_model::key_table::*; -use garage_model::migrate::Migrate; use garage_model::s3::mpu_table::MultipartUpload; use garage_model::s3::version_table::Version; @@ -42,7 +41,6 @@ pub enum AdminRpc { BucketOperation(BucketOperation), KeyOperation(KeyOperation), LaunchRepair(RepairOpt), - Migrate(MigrateOpt), Stats(StatsOpt), Worker(WorkerOperation), BlockOperation(BlockOperation), @@ -95,24 +93,6 @@ impl AdminRpcHandler { admin } - // ================ MIGRATION COMMANDS ==================== - - async fn handle_migrate(self: &Arc, opt: MigrateOpt) -> Result { - if !opt.yes { - return Err(Error::BadRequest( - "Please provide the --yes flag to initiate migration operation.".to_string(), - )); - } - - let m = Migrate { - garage: self.garage.clone(), - }; - match opt.what { - MigrateWhat::Buckets050 => m.migrate_buckets050().await, - }?; - Ok(AdminRpc::Ok("Migration successfull.".into())) - } - // ================ REPAIR COMMANDS ==================== async fn handle_launch_repair(self: &Arc, opt: RepairOpt) -> Result { @@ -530,7 +510,6 @@ impl EndpointHandler for AdminRpcHandler { match message { AdminRpc::BucketOperation(bo) => self.handle_bucket_cmd(bo).await, AdminRpc::KeyOperation(ko) => self.handle_key_cmd(ko).await, - AdminRpc::Migrate(opt) => self.handle_migrate(opt.clone()).await, AdminRpc::LaunchRepair(opt) => self.handle_launch_repair(opt.clone()).await, AdminRpc::Stats(opt) => self.handle_stats(opt.clone()).await, AdminRpc::Worker(wo) => self.handle_worker_cmd(wo).await, diff --git a/src/garage/cli/cmd.rs b/src/garage/cli/cmd.rs index fb6dface..7440457f 100644 --- a/src/garage/cli/cmd.rs +++ b/src/garage/cli/cmd.rs @@ -33,9 +33,6 @@ pub async fn cli_command_dispatch( Command::Key(ko) => { cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::KeyOperation(ko)).await } - Command::Migrate(mo) => { - cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::Migrate(mo)).await - } Command::Repair(ro) => { cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::LaunchRepair(ro)).await } diff --git a/src/garage/cli/structs.rs b/src/garage/cli/structs.rs index 40e47ee1..63014dbc 100644 --- a/src/garage/cli/structs.rs +++ b/src/garage/cli/structs.rs @@ -31,11 +31,6 @@ pub enum Command { #[structopt(name = "key", version = garage_version())] Key(KeyOperation), - /// Run migrations from previous Garage version - /// (DO NOT USE WITHOUT READING FULL DOCUMENTATION) - #[structopt(name = "migrate", version = garage_version())] - Migrate(MigrateOpt), - /// Start repair of node data on remote node #[structopt(name = "repair", version = garage_version())] Repair(RepairOpt), @@ -445,23 +440,6 @@ pub struct KeyImportOpt { pub yes: bool, } -#[derive(Serialize, Deserialize, StructOpt, Debug, Clone)] -pub struct MigrateOpt { - /// Confirm the launch of the migrate operation - #[structopt(long = "yes")] - pub yes: bool, - - #[structopt(subcommand)] - pub what: MigrateWhat, -} - -#[derive(Serialize, Deserialize, StructOpt, Debug, Eq, PartialEq, Clone)] -pub enum MigrateWhat { - /// Migrate buckets and permissions from v0.5.0 - #[structopt(name = "buckets050", version = garage_version())] - Buckets050, -} - #[derive(Serialize, Deserialize, StructOpt, Debug, Clone)] pub struct RepairOpt { /// Launch repair operation on all nodes -- cgit v1.2.3 From 44454aac012cbef9158110f2352301ffcfaf31c7 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 8 Mar 2024 14:11:02 +0100 Subject: [rm-sled] Remove the Sled database engine --- src/garage/Cargo.toml | 5 ++--- src/garage/cli/convert_db.rs | 2 +- src/garage/main.rs | 6 ++---- 3 files changed, 5 insertions(+), 8 deletions(-) (limited to 'src/garage') diff --git a/src/garage/Cargo.toml b/src/garage/Cargo.toml index 00ecb35e..53449a1c 100644 --- a/src/garage/Cargo.toml +++ b/src/garage/Cargo.toml @@ -80,12 +80,11 @@ k2v-client.workspace = true [features] -default = [ "bundled-libs", "metrics", "sled", "lmdb", "sqlite", "k2v" ] +default = [ "bundled-libs", "metrics", "lmdb", "sqlite", "k2v" ] k2v = [ "garage_util/k2v", "garage_api/k2v" ] -# Database engines, Sled is still our default even though we don't like it -sled = [ "garage_model/sled" ] +# Database engines lmdb = [ "garage_model/lmdb" ] sqlite = [ "garage_model/sqlite" ] diff --git a/src/garage/cli/convert_db.rs b/src/garage/cli/convert_db.rs index 2aadb1d6..5346d55a 100644 --- a/src/garage/cli/convert_db.rs +++ b/src/garage/cli/convert_db.rs @@ -11,7 +11,7 @@ pub struct ConvertDbOpt { /// https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#db-engine-since-v0-8-0) #[structopt(short = "i")] input_path: PathBuf, - /// Input database engine (sled, lmdb or sqlite; limited by db engines + /// Input database engine (lmdb or sqlite; limited by db engines /// enabled in this build) #[structopt(short = "a")] input_engine: Engine, diff --git a/src/garage/main.rs b/src/garage/main.rs index e489fff0..5e9c061f 100644 --- a/src/garage/main.rs +++ b/src/garage/main.rs @@ -18,8 +18,8 @@ compile_error!("Either bundled-libs or system-libs Cargo feature must be enabled #[cfg(all(feature = "bundled-libs", feature = "system-libs"))] compile_error!("Only one of bundled-libs and system-libs Cargo features must be enabled"); -#[cfg(not(any(feature = "lmdb", feature = "sled", feature = "sqlite")))] -compile_error!("Must activate the Cargo feature for at least one DB engine: lmdb, sled or sqlite."); +#[cfg(not(any(feature = "lmdb", feature = "sqlite")))] +compile_error!("Must activate the Cargo feature for at least one DB engine: lmdb or sqlite."); use std::net::SocketAddr; use std::path::PathBuf; @@ -72,8 +72,6 @@ async fn main() { let features = &[ #[cfg(feature = "k2v")] "k2v", - #[cfg(feature = "sled")] - "sled", #[cfg(feature = "lmdb")] "lmdb", #[cfg(feature = "sqlite")] -- cgit v1.2.3 From 05c92204ecab87540806073ac4deedfd58519240 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 8 Mar 2024 14:59:56 +0100 Subject: [rm-sled] Remove counted_tree_hack --- src/garage/admin/mod.rs | 51 +++++++++-------------------------------------- src/garage/cli/structs.rs | 4 ---- 2 files changed, 9 insertions(+), 46 deletions(-) (limited to 'src/garage') diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs index de7851e1..896751cc 100644 --- a/src/garage/admin/mod.rs +++ b/src/garage/admin/mod.rs @@ -217,11 +217,11 @@ impl AdminRpcHandler { // Gather table statistics let mut table = vec![" Table\tItems\tMklItems\tMklTodo\tGcTodo".into()]; - table.push(self.gather_table_stats(&self.garage.bucket_table, opt.detailed)?); - table.push(self.gather_table_stats(&self.garage.key_table, opt.detailed)?); - table.push(self.gather_table_stats(&self.garage.object_table, opt.detailed)?); - table.push(self.gather_table_stats(&self.garage.version_table, opt.detailed)?); - table.push(self.gather_table_stats(&self.garage.block_ref_table, opt.detailed)?); + table.push(self.gather_table_stats(&self.garage.bucket_table)?); + table.push(self.gather_table_stats(&self.garage.key_table)?); + table.push(self.gather_table_stats(&self.garage.object_table)?); + table.push(self.gather_table_stats(&self.garage.version_table)?); + table.push(self.gather_table_stats(&self.garage.block_ref_table)?); write!( &mut ret, "\nTable stats:\n{}", @@ -231,15 +231,7 @@ impl AdminRpcHandler { // Gather block manager statistics writeln!(&mut ret, "\nBlock manager stats:").unwrap(); - let rc_len = if opt.detailed { - self.garage.block_manager.rc_len()?.to_string() - } else { - self.garage - .block_manager - .rc_fast_len()? - .map(|x| x.to_string()) - .unwrap_or_else(|| "NC".into()) - }; + let rc_len = self.garage.block_manager.rc_len()?.to_string(); writeln!( &mut ret, @@ -260,10 +252,6 @@ impl AdminRpcHandler { ) .unwrap(); - if !opt.detailed { - writeln!(&mut ret, "\nIf values are missing above (marked as NC), consider adding the --detailed flag (this will be slow).").unwrap(); - } - if !opt.skip_global { write!(&mut ret, "\n{}", self.gather_cluster_stats()).unwrap(); } @@ -365,34 +353,13 @@ impl AdminRpcHandler { ret } - fn gather_table_stats( - &self, - t: &Arc>, - detailed: bool, - ) -> Result + fn gather_table_stats(&self, t: &Arc>) -> Result where F: TableSchema + 'static, R: TableReplication + 'static, { - let (data_len, mkl_len) = if detailed { - ( - t.data.store.len().map_err(GarageError::from)?.to_string(), - t.merkle_updater.merkle_tree_len()?.to_string(), - ) - } else { - ( - t.data - .store - .fast_len() - .map_err(GarageError::from)? - .map(|x| x.to_string()) - .unwrap_or_else(|| "NC".into()), - t.merkle_updater - .merkle_tree_fast_len()? - .map(|x| x.to_string()) - .unwrap_or_else(|| "NC".into()), - ) - }; + let data_len = t.data.store.len().map_err(GarageError::from)?.to_string(); + let mkl_len = t.merkle_updater.merkle_tree_len()?.to_string(); Ok(format!( " {}\t{}\t{}\t{}\t{}", diff --git a/src/garage/cli/structs.rs b/src/garage/cli/structs.rs index 40e47ee1..7e7ab71b 100644 --- a/src/garage/cli/structs.rs +++ b/src/garage/cli/structs.rs @@ -553,10 +553,6 @@ pub struct StatsOpt { #[structopt(short = "a", long = "all-nodes")] pub all_nodes: bool, - /// Gather detailed statistics (this can be long) - #[structopt(short = "d", long = "detailed")] - pub detailed: bool, - /// Don't show global cluster stats (internal use in RPC) #[structopt(skip)] #[serde(default)] -- cgit v1.2.3 From dc0b78cdb88e9cbfd7dc1a2ee0b15333939be549 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 19 Mar 2024 11:04:20 +0100 Subject: [block-ref-repair] Block refcount recalculation and repair - We always recalculate the reference count of a block before deleting it locally, to make sure that it is indeed zero. - If we had to fetch a remote block but we were not able to get it, check that refcount is indeed > 0. - Repair procedure that checks everything --- src/garage/cli/structs.rs | 5 ++- src/garage/repair/online.rs | 106 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+), 1 deletion(-) (limited to 'src/garage') diff --git a/src/garage/cli/structs.rs b/src/garage/cli/structs.rs index 1f572a9a..8380b5e2 100644 --- a/src/garage/cli/structs.rs +++ b/src/garage/cli/structs.rs @@ -473,8 +473,11 @@ pub enum RepairWhat { #[structopt(name = "mpu", version = garage_version())] MultipartUploads, /// Repropagate version deletions to the block ref table - #[structopt(name = "block_refs", version = garage_version())] + #[structopt(name = "block-refs", version = garage_version())] BlockRefs, + /// Recalculate block reference counters + #[structopt(name = "block-rc", version = garage_version())] + BlockRc, /// Verify integrity of all blocks on disc #[structopt(name = "scrub", version = garage_version())] Scrub { diff --git a/src/garage/repair/online.rs b/src/garage/repair/online.rs index 9e4de873..ecccdf6d 100644 --- a/src/garage/repair/online.rs +++ b/src/garage/repair/online.rs @@ -4,6 +4,7 @@ use std::time::Duration; use async_trait::async_trait; use tokio::sync::watch; +use garage_block::manager::BlockManager; use garage_block::repair::ScrubWorkerCommand; use garage_model::garage::Garage; @@ -16,11 +17,14 @@ use garage_table::replication::*; use garage_table::*; use garage_util::background::*; +use garage_util::data::*; use garage_util::error::Error; use garage_util::migrate::Migrate; use crate::*; +const RC_REPAIR_ITER_COUNT: usize = 64; + pub async fn launch_online_repair( garage: &Arc, bg: &BackgroundRunner, @@ -47,6 +51,13 @@ pub async fn launch_online_repair( info!("Repairing the block refs table"); bg.spawn_worker(TableRepairWorker::new(garage.clone(), RepairBlockRefs)); } + RepairWhat::BlockRc => { + info!("Repairing the block reference counters"); + bg.spawn_worker(BlockRcRepair::new( + garage.block_manager.clone(), + garage.block_ref_table.clone(), + )); + } RepairWhat::Blocks => { info!("Repairing the stored blocks"); bg.spawn_worker(garage_block::repair::RepairWorker::new( @@ -282,3 +293,98 @@ impl TableRepair for RepairMpu { Ok(false) } } + +// ===== block reference counter repair ===== + +pub struct BlockRcRepair { + block_manager: Arc, + block_ref_table: Arc>, + cursor: Hash, + counter: u64, + repairs: u64, +} + +impl BlockRcRepair { + fn new( + block_manager: Arc, + block_ref_table: Arc>, + ) -> Self { + Self { + block_manager, + block_ref_table, + cursor: [0u8; 32].into(), + counter: 0, + repairs: 0, + } + } +} + +#[async_trait] +impl Worker for BlockRcRepair { + fn name(&self) -> String { + format!("Block refcount repair worker") + } + + fn status(&self) -> WorkerStatus { + WorkerStatus { + progress: Some(format!("{} ({})", self.counter, self.repairs)), + ..Default::default() + } + } + + async fn work(&mut self, _must_exit: &mut watch::Receiver) -> Result { + for _i in 0..RC_REPAIR_ITER_COUNT { + let next1 = self + .block_manager + .rc + .rc + .range(self.cursor.as_slice()..)? + .next() + .transpose()? + .map(|(k, _)| Hash::try_from(k.as_slice()).unwrap()); + let next2 = self + .block_ref_table + .data + .store + .range(self.cursor.as_slice()..)? + .next() + .transpose()? + .map(|(k, _)| Hash::try_from(&k[..32]).unwrap()); + let next = match (next1, next2) { + (Some(k1), Some(k2)) => std::cmp::min(k1, k2), + (Some(k), None) | (None, Some(k)) => k, + (None, None) => { + info!( + "{}: finished, done {}, fixed {}", + self.name(), + self.counter, + self.repairs + ); + return Ok(WorkerState::Done); + } + }; + + if self.block_manager.rc.recalculate_rc(&next)?.1 { + self.repairs += 1; + } + self.counter += 1; + if let Some(next_incr) = next.increment() { + self.cursor = next_incr; + } else { + info!( + "{}: finished, done {}, fixed {}", + self.name(), + self.counter, + self.repairs + ); + return Ok(WorkerState::Done); + } + } + + Ok(WorkerState::Busy) + } + + async fn wait_for_work(&mut self) -> WorkerState { + unreachable!() + } +} -- cgit v1.2.3 From 3165ab926c665b795eab7a227f65a67a0874641e Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 19 Mar 2024 16:09:47 +0100 Subject: [block-ref-repair] rename rc's rc field to rc_table --- src/garage/repair/online.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/garage') diff --git a/src/garage/repair/online.rs b/src/garage/repair/online.rs index ecccdf6d..2c5227d2 100644 --- a/src/garage/repair/online.rs +++ b/src/garage/repair/online.rs @@ -337,7 +337,7 @@ impl Worker for BlockRcRepair { let next1 = self .block_manager .rc - .rc + .rc_table .range(self.cursor.as_slice()..)? .next() .transpose()? -- cgit v1.2.3 From 3eab639c146f67fc67534633ae26c9aec116327d Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 19 Mar 2024 16:24:34 +0100 Subject: [block-ref-repair] mention `garage block repair-rc` in documentation --- src/garage/cli/util.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/garage') diff --git a/src/garage/cli/util.rs b/src/garage/cli/util.rs index 0511e2b1..21c14f42 100644 --- a/src/garage/cli/util.rs +++ b/src/garage/cli/util.rs @@ -451,7 +451,7 @@ pub fn print_block_info( if refcount != nondeleted_count { println!(); println!( - "Warning: refcount does not match number of non-deleted versions (see issue #644)." + "Warning: refcount does not match number of non-deleted versions, you should try `garage repair block-rc`." ); } } -- cgit v1.2.3 From 961b4f9af36a7fb5d3a661ac19e8f2c168bb48ae Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 21 Mar 2024 10:45:34 +0100 Subject: [net-fixes] fix issues with local peer address (fix #761) --- src/garage/cli/cmd.rs | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'src/garage') diff --git a/src/garage/cli/cmd.rs b/src/garage/cli/cmd.rs index a84061a7..44d3d96c 100644 --- a/src/garage/cli/cmd.rs +++ b/src/garage/cli/cmd.rs @@ -57,6 +57,10 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tDataAvail".to_string()]; for adv in status.iter().filter(|adv| adv.is_up) { let host = adv.status.hostname.as_deref().unwrap_or("?"); + let addr = match adv.addr { + Some(addr) => addr.to_string(), + None => "N/A".to_string(), + }; if let Some(NodeRoleV(Some(cfg))) = layout.current().roles.get(&adv.id) { let data_avail = match &adv.status.data_disk_avail { _ if cfg.capacity.is_none() => "N/A".into(), @@ -71,7 +75,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{data_avail}", id = adv.id, host = host, - addr = adv.addr, + addr = addr, tags = cfg.tags.join(","), zone = cfg.zone, capacity = cfg.capacity_string(), @@ -91,7 +95,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\tdraining metadata...", id = adv.id, host = host, - addr = adv.addr, + addr = addr, tags = cfg.tags.join(","), zone = cfg.zone, )); @@ -104,7 +108,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> "{id:?}\t{h}\t{addr}\t\t\t{new_role}", id = adv.id, h = host, - addr = adv.addr, + addr = addr, new_role = new_role, )); } @@ -120,8 +124,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> let tf = timeago::Formatter::new(); let mut drain_msg = false; - let mut failed_nodes = - vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tLast seen".to_string()]; + let mut failed_nodes = vec!["ID\tHostname\tTags\tZone\tCapacity\tLast seen".to_string()]; let mut listed = HashSet::new(); for ver in layout.versions.iter().rev() { for (node, _, role) in ver.roles.items().iter() { @@ -142,15 +145,14 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> // Node is in a layout version, is not a gateway node, and is not up: // it is in a failed state, add proper line to the output - let (host, addr, last_seen) = match adv { + let (host, last_seen) = match adv { Some(adv) => ( adv.status.hostname.as_deref().unwrap_or("?"), - adv.addr.to_string(), adv.last_seen_secs_ago .map(|s| tf.convert(Duration::from_secs(s))) .unwrap_or_else(|| "never seen".into()), ), - None => ("??", "??".into(), "never seen".into()), + None => ("??", "never seen".into()), }; let capacity = if ver.version == layout.current().version { cfg.capacity_string() @@ -159,10 +161,9 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> "draining metadata...".to_string() }; failed_nodes.push(format!( - "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{last_seen}", + "{id:?}\t{host}\t[{tags}]\t{zone}\t{capacity}\t{last_seen}", id = node, host = host, - addr = addr, tags = cfg.tags.join(","), zone = cfg.zone, capacity = capacity, -- cgit v1.2.3 From 74949c69cbf1a8222b6d10a02fcf5fe139ccb560 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 21 Mar 2024 14:06:59 +0100 Subject: [s3-checksum] implement x-amz-checksum-* headers --- src/garage/Cargo.toml | 1 + src/garage/tests/s3/multipart.rs | 158 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 158 insertions(+), 1 deletion(-) (limited to 'src/garage') diff --git a/src/garage/Cargo.toml b/src/garage/Cargo.toml index 53449a1c..17da68f8 100644 --- a/src/garage/Cargo.toml +++ b/src/garage/Cargo.toml @@ -42,6 +42,7 @@ tracing.workspace = true tracing-subscriber.workspace = true rand.workspace = true async-trait.workspace = true +sha1.workspace = true sodiumoxide.workspace = true structopt.workspace = true git-version.workspace = true diff --git a/src/garage/tests/s3/multipart.rs b/src/garage/tests/s3/multipart.rs index 51c9df74..cc424f59 100644 --- a/src/garage/tests/s3/multipart.rs +++ b/src/garage/tests/s3/multipart.rs @@ -1,6 +1,7 @@ use crate::common; use aws_sdk_s3::primitives::ByteStream; -use aws_sdk_s3::types::{CompletedMultipartUpload, CompletedPart}; +use aws_sdk_s3::types::{ChecksumAlgorithm, CompletedMultipartUpload, CompletedPart}; +use base64::prelude::*; const SZ_5MB: usize = 5 * 1024 * 1024; const SZ_10MB: usize = 10 * 1024 * 1024; @@ -189,6 +190,153 @@ async fn test_multipart_upload() { } } +#[tokio::test] +async fn test_multipart_with_checksum() { + let ctx = common::context(); + let bucket = ctx.create_bucket("testmpu-cksum"); + + let u1 = vec![0x11; SZ_5MB]; + let u2 = vec![0x22; SZ_5MB]; + let u3 = vec![0x33; SZ_5MB]; + + let ck1 = calculate_sha1(&u1); + let ck2 = calculate_sha1(&u2); + let ck3 = calculate_sha1(&u3); + + let up = ctx + .client + .create_multipart_upload() + .bucket(&bucket) + .checksum_algorithm(ChecksumAlgorithm::Sha1) + .key("a") + .send() + .await + .unwrap(); + assert!(up.upload_id.is_some()); + + let uid = up.upload_id.as_ref().unwrap(); + + let p1 = ctx + .client + .upload_part() + .bucket(&bucket) + .key("a") + .upload_id(uid) + .part_number(1) + .checksum_sha1(&ck1) + .body(ByteStream::from(u1.clone())) + .send() + .await + .unwrap(); + + // wrong checksum value should return an error + let err1 = ctx + .client + .upload_part() + .bucket(&bucket) + .key("a") + .upload_id(uid) + .part_number(2) + .checksum_sha1(&ck1) + .body(ByteStream::from(u2.clone())) + .send() + .await; + assert!(err1.is_err()); + + let p2 = ctx + .client + .upload_part() + .bucket(&bucket) + .key("a") + .upload_id(uid) + .part_number(2) + .checksum_sha1(&ck2) + .body(ByteStream::from(u2)) + .send() + .await + .unwrap(); + + let p3 = ctx + .client + .upload_part() + .bucket(&bucket) + .key("a") + .upload_id(uid) + .part_number(3) + .checksum_sha1(&ck3) + .body(ByteStream::from(u3.clone())) + .send() + .await + .unwrap(); + + { + let r = ctx + .client + .list_parts() + .bucket(&bucket) + .key("a") + .upload_id(uid) + .send() + .await + .unwrap(); + let parts = r.parts.unwrap(); + assert_eq!(parts.len(), 3); + assert!(parts[0].checksum_crc32.is_none()); + assert!(parts[0].checksum_crc32_c.is_none()); + assert!(parts[0].checksum_sha256.is_none()); + assert_eq!(parts[0].checksum_sha1.as_deref().unwrap(), ck1); + assert_eq!(parts[1].checksum_sha1.as_deref().unwrap(), ck2); + assert_eq!(parts[2].checksum_sha1.as_deref().unwrap(), ck3); + } + + let cmp = CompletedMultipartUpload::builder() + .parts( + CompletedPart::builder() + .part_number(1) + .checksum_sha1(&ck1) + .e_tag(p1.e_tag.unwrap()) + .build(), + ) + .parts( + CompletedPart::builder() + .part_number(2) + .checksum_sha1(&ck2) + .e_tag(p2.e_tag.unwrap()) + .build(), + ) + .parts( + CompletedPart::builder() + .part_number(3) + .checksum_sha1(&ck3) + .e_tag(p3.e_tag.unwrap()) + .build(), + ) + .build(); + + let expected_checksum = calculate_sha1( + &vec![ + BASE64_STANDARD.decode(&ck1).unwrap(), + BASE64_STANDARD.decode(&ck2).unwrap(), + BASE64_STANDARD.decode(&ck3).unwrap(), + ] + .concat(), + ); + + let res = ctx + .client + .complete_multipart_upload() + .bucket(&bucket) + .key("a") + .upload_id(uid) + .checksum_sha1(expected_checksum.clone()) + .multipart_upload(cmp) + .send() + .await + .unwrap(); + + assert_eq!(res.checksum_sha1, Some(expected_checksum)); +} + #[tokio::test] async fn test_uploadlistpart() { let ctx = common::context(); @@ -624,3 +772,11 @@ async fn test_uploadpartcopy() { assert_eq!(real_obj.len(), exp_obj.len()); assert_eq!(real_obj, exp_obj); } + +fn calculate_sha1(bytes: &[u8]) -> String { + use sha1::{Digest, Sha1}; + + let mut hasher = Sha1::new(); + hasher.update(bytes); + BASE64_STANDARD.encode(&hasher.finalize()[..]) +} -- cgit v1.2.3 From afad62939e071621666ca7255f7164f92c4475bb Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 28 Mar 2024 15:19:44 +0100 Subject: [next-0.10] bump version number to 1.0 --- src/garage/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/garage') diff --git a/src/garage/Cargo.toml b/src/garage/Cargo.toml index a4acbb1f..9cc71abd 100644 --- a/src/garage/Cargo.toml +++ b/src/garage/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "garage" -version = "0.10.0" +version = "1.0.0" authors = ["Alex Auvolat "] edition = "2018" license = "AGPL-3.0" -- cgit v1.2.3