aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-12-08 11:24:23 +0100
committerAlex Auvolat <alex@adnab.me>2023-12-08 11:24:23 +0100
commit7f2541101f15614c79020b35d3d7dab767c32676 (patch)
treede4cd319331a0f427ddebb5b0544a3861e8f315c
parent91b874c4efa40e64663368369a712e0a5a389e53 (diff)
downloadgarage-7f2541101f15614c79020b35d3d7dab767c32676.tar.gz
garage-7f2541101f15614c79020b35d3d7dab767c32676.zip
cli: improvements to the layout commands when multiple layouts are live
-rw-r--r--src/garage/admin/mod.rs3
-rw-r--r--src/garage/cli/cmd.rs4
-rw-r--r--src/garage/cli/layout.rs67
-rw-r--r--src/garage/cli/util.rs4
4 files changed, 49 insertions, 29 deletions
diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs
index da4226cf..de7851e1 100644
--- a/src/garage/admin/mod.rs
+++ b/src/garage/admin/mod.rs
@@ -274,8 +274,7 @@ impl AdminRpcHandler {
fn gather_cluster_stats(&self) -> String {
let mut ret = String::new();
- // Gather storage node and free space statistics
- // TODO: not only layout.current() ???
+ // Gather storage node and free space statistics for current nodes
let layout = &self.garage.system.cluster_layout();
let mut node_partition_count = HashMap::<Uuid, u64>::new();
for short_id in layout.current().ring_assignment_data.iter() {
diff --git a/src/garage/cli/cmd.rs b/src/garage/cli/cmd.rs
index 196c0cb3..fb6dface 100644
--- a/src/garage/cli/cmd.rs
+++ b/src/garage/cli/cmd.rs
@@ -179,7 +179,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
println!("Your cluster is expecting to drain data from nodes that are currently unavailable.");
println!("If these nodes are definitely dead, please review the layout history with");
println!(
- "`garage layout history` and use `garage layout assume-sync` to force progress."
+ "`garage layout history` and use `garage layout skip-dead-nodes` to force progress."
);
}
}
@@ -274,6 +274,6 @@ pub async fn fetch_status(
.await??
{
SystemRpc::ReturnKnownNodes(nodes) => Ok(nodes),
- resp => Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
+ resp => Err(Error::unexpected_rpc_message(resp)),
}
}
diff --git a/src/garage/cli/layout.rs b/src/garage/cli/layout.rs
index fac826f5..f76e33c5 100644
--- a/src/garage/cli/layout.rs
+++ b/src/garage/cli/layout.rs
@@ -354,35 +354,44 @@ pub async fn cmd_layout_history(
));
}
format_table(table);
-
- println!();
- println!("==== UPDATE TRACKERS ====");
- println!("This is the internal data that Garage stores to know which nodes have what data.");
println!();
- let mut table = vec!["Node\tAck\tSync\tSync_ack".to_string()];
- let all_nodes = layout.get_all_nodes();
- for node in all_nodes.iter() {
- table.push(format!(
- "{:?}\t#{}\t#{}\t#{}",
- node,
- layout.update_trackers.ack_map.get(node, min_stored),
- layout.update_trackers.sync_map.get(node, min_stored),
- layout.update_trackers.sync_ack_map.get(node, min_stored),
- ));
- }
- table[1..].sort();
- format_table(table);
if layout.versions.len() > 1 {
+ println!("==== UPDATE TRACKERS ====");
+ println!("Several layout versions are currently live in the version, and data is being migrated.");
+ println!(
+ "This is the internal data that Garage stores to know which nodes have what data."
+ );
+ println!();
+ let mut table = vec!["Node\tAck\tSync\tSync_ack".to_string()];
+ let all_nodes = layout.get_all_nodes();
+ for node in all_nodes.iter() {
+ table.push(format!(
+ "{:?}\t#{}\t#{}\t#{}",
+ node,
+ layout.update_trackers.ack_map.get(node, min_stored),
+ layout.update_trackers.sync_map.get(node, min_stored),
+ layout.update_trackers.sync_ack_map.get(node, min_stored),
+ ));
+ }
+ table[1..].sort();
+ format_table(table);
+
println!();
println!(
- "If some nodes are not catching up to the latest layout version in the update tracker,"
+ "If some nodes are not catching up to the latest layout version in the update trackers,"
);
println!("it might be because they are offline or unable to complete a sync successfully.");
println!(
- "You may force progress using `garage layout assume-sync --version {}`",
+ "You may force progress using `garage layout skip-dead-nodes --version {}`",
layout.current().version
);
+ } else {
+ println!("Your cluster is currently in a stable state with a single live layout version.");
+ println!("No metadata migration is in progress. Note that the migration of data blocks is not tracked,");
+ println!(
+ "so you might want to keep old nodes online until their data directories become empty."
+ );
}
Ok(())
@@ -415,6 +424,7 @@ pub async fn cmd_layout_skip_dead_nodes(
}
let all_nodes = layout.get_all_nodes();
+ let mut did_something = false;
for node in all_nodes.iter() {
if status.iter().any(|x| x.id == *node && x.is_up) {
continue;
@@ -422,19 +432,28 @@ pub async fn cmd_layout_skip_dead_nodes(
if layout.update_trackers.ack_map.set_max(*node, opt.version) {
println!("Increased the ACK tracker for node {:?}", node);
+ did_something = true;
}
if opt.allow_missing_data {
if layout.update_trackers.sync_map.set_max(*node, opt.version) {
println!("Increased the SYNC tracker for node {:?}", node);
+ did_something = true;
}
}
}
- send_layout(rpc_cli, rpc_host, layout).await?;
- println!("Success.");
-
- Ok(())
+ if did_something {
+ send_layout(rpc_cli, rpc_host, layout).await?;
+ println!("Success.");
+ Ok(())
+ } else if !opt.allow_missing_data {
+ Err(Error::Message("Nothing was done, try passing the `--allow-missing-data` flag to force progress even when not enough nodes can complete a metadata sync.".into()))
+ } else {
+ Err(Error::Message(
+ "Sorry, there is nothing I can do for you. Please wait patiently. If you ask for help, please send the output of the `garage layout history` command.".into(),
+ ))
+ }
}
// --- utility ---
@@ -448,7 +467,7 @@ pub async fn fetch_layout(
.await??
{
SystemRpc::AdvertiseClusterLayout(t) => Ok(t),
- resp => Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
+ resp => Err(Error::unexpected_rpc_message(resp)),
}
}
diff --git a/src/garage/cli/util.rs b/src/garage/cli/util.rs
index 2232d395..0511e2b1 100644
--- a/src/garage/cli/util.rs
+++ b/src/garage/cli/util.rs
@@ -450,6 +450,8 @@ pub fn print_block_info(
if refcount != nondeleted_count {
println!();
- println!("Warning: refcount does not match number of non-deleted versions");
+ println!(
+ "Warning: refcount does not match number of non-deleted versions (see issue #644)."
+ );
}
}