aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-11-28 14:25:04 +0100
committerAlex Auvolat <alex@adnab.me>2023-11-28 14:25:04 +0100
commitc04dd8788a3764da2f307b1d10c2d56b7b0e4a61 (patch)
tree2c02648080fde466dcef7c39987d55a03d4a4399 /src
parent539af6eac434bd94acbcabcc5bb5c10450b71c5d (diff)
downloadgarage-c04dd8788a3764da2f307b1d10c2d56b7b0e4a61.tar.gz
garage-c04dd8788a3764da2f307b1d10c2d56b7b0e4a61.zip
admin: more info in admin GetClusterStatus
Diffstat (limited to 'src')
-rw-r--r--src/api/admin/cluster.rs122
-rw-r--r--src/garage/admin/mod.rs2
-rw-r--r--src/garage/cli/cmd.rs9
-rw-r--r--src/rpc/system.rs12
4 files changed, 116 insertions, 29 deletions
diff --git a/src/api/admin/cluster.rs b/src/api/admin/cluster.rs
index 593bd778..3ce1b254 100644
--- a/src/api/admin/cluster.rs
+++ b/src/api/admin/cluster.rs
@@ -1,3 +1,4 @@
+use std::collections::HashMap;
use std::net::SocketAddr;
use std::sync::Arc;
@@ -15,25 +16,95 @@ use crate::admin::error::*;
use crate::helpers::{json_ok_response, parse_json_body};
pub async fn handle_get_cluster_status(garage: &Arc<Garage>) -> Result<Response<Body>, Error> {
+ let layout = garage.system.cluster_layout();
+ let mut nodes = garage
+ .system
+ .get_known_nodes()
+ .into_iter()
+ .map(|i| {
+ (
+ i.id,
+ NodeResp {
+ id: hex::encode(i.id),
+ addr: Some(i.addr),
+ hostname: i.status.hostname,
+ is_up: i.is_up,
+ last_seen_secs_ago: i.last_seen_secs_ago,
+ data_partition: i
+ .status
+ .data_disk_avail
+ .map(|(avail, total)| FreeSpaceResp {
+ available: avail,
+ total,
+ }),
+ metadata_partition: i.status.meta_disk_avail.map(|(avail, total)| {
+ FreeSpaceResp {
+ available: avail,
+ total,
+ }
+ }),
+ ..Default::default()
+ },
+ )
+ })
+ .collect::<HashMap<_, _>>();
+
+ for (id, _, role) in layout.current().roles.items().iter() {
+ if let layout::NodeRoleV(Some(r)) = role {
+ let role = NodeRoleResp {
+ id: hex::encode(id),
+ zone: r.zone.to_string(),
+ capacity: r.capacity,
+ tags: r.tags.clone(),
+ };
+ match nodes.get_mut(id) {
+ None => {
+ nodes.insert(
+ *id,
+ NodeResp {
+ id: hex::encode(id),
+ role: Some(role),
+ ..Default::default()
+ },
+ );
+ }
+ Some(n) => {
+ if n.role.is_none() {
+ n.role = Some(role);
+ }
+ }
+ }
+ }
+ }
+
+ for ver in layout.versions.iter().rev().skip(1) {
+ for (id, _, role) in ver.roles.items().iter() {
+ if let layout::NodeRoleV(Some(r)) = role {
+ if !nodes.contains_key(id) && r.capacity.is_some() {
+ nodes.insert(
+ *id,
+ NodeResp {
+ id: hex::encode(id),
+ draining: true,
+ ..Default::default()
+ },
+ );
+ }
+ }
+ }
+ }
+
+ let mut nodes = nodes.into_iter().map(|(_, v)| v).collect::<Vec<_>>();
+ nodes.sort_by(|x, y| x.id.cmp(&y.id));
+
let res = GetClusterStatusResponse {
node: hex::encode(garage.system.id),
garage_version: garage_util::version::garage_version(),
garage_features: garage_util::version::garage_features(),
rust_version: garage_util::version::rust_version(),
db_engine: garage.db.engine(),
- known_nodes: garage
- .system
- .get_known_nodes()
- .into_iter()
- .map(|i| KnownNodeResp {
- id: hex::encode(i.id),
- addr: i.addr,
- is_up: i.is_up,
- last_seen_secs_ago: i.last_seen_secs_ago,
- hostname: i.status.hostname,
- })
- .collect(),
- layout: format_cluster_layout(&garage.system.cluster_layout()),
+ layout_version: layout.current().version,
+ nodes,
};
Ok(json_ok_response(&res)?)
@@ -157,8 +228,8 @@ struct GetClusterStatusResponse {
garage_features: Option<&'static [&'static str]>,
rust_version: &'static str,
db_engine: String,
- known_nodes: Vec<KnownNodeResp>,
- layout: GetClusterLayoutResponse,
+ layout_version: u64,
+ nodes: Vec<NodeResp>,
}
#[derive(Serialize)]
@@ -192,14 +263,27 @@ struct NodeRoleResp {
tags: Vec<String>,
}
-#[derive(Serialize)]
+#[derive(Serialize, Default)]
+#[serde(rename_all = "camelCase")]
+struct FreeSpaceResp {
+ available: u64,
+ total: u64,
+}
+
+#[derive(Serialize, Default)]
#[serde(rename_all = "camelCase")]
-struct KnownNodeResp {
+struct NodeResp {
id: String,
- addr: SocketAddr,
+ role: Option<NodeRoleResp>,
+ addr: Option<SocketAddr>,
+ hostname: Option<String>,
is_up: bool,
last_seen_secs_ago: Option<u64>,
- hostname: String,
+ draining: bool,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ data_partition: Option<FreeSpaceResp>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ metadata_partition: Option<FreeSpaceResp>,
}
// ---- update functions ----
diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs
index 77918a0f..da4226cf 100644
--- a/src/garage/admin/mod.rs
+++ b/src/garage/admin/mod.rs
@@ -295,7 +295,7 @@ impl AdminRpcHandler {
let info = node_info.get(id);
let status = info.map(|x| &x.status);
let role = layout.current().roles.get(id).and_then(|x| x.0.as_ref());
- let hostname = status.map(|x| x.hostname.as_str()).unwrap_or("?");
+ let hostname = status.and_then(|x| x.hostname.as_deref()).unwrap_or("?");
let zone = role.map(|x| x.zone.as_str()).unwrap_or("?");
let capacity = role
.map(|x| x.capacity_string())
diff --git a/src/garage/cli/cmd.rs b/src/garage/cli/cmd.rs
index 4d1306b6..c7f0ad2b 100644
--- a/src/garage/cli/cmd.rs
+++ b/src/garage/cli/cmd.rs
@@ -62,6 +62,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
let mut healthy_nodes =
vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tDataAvail".to_string()];
for adv in status.iter().filter(|adv| adv.is_up) {
+ let host = adv.status.hostname.as_deref().unwrap_or("?");
if let Some(NodeRoleV(Some(cfg))) = layout.current().roles.get(&adv.id) {
let data_avail = match &adv.status.data_disk_avail {
_ if cfg.capacity.is_none() => "N/A".into(),
@@ -75,7 +76,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
healthy_nodes.push(format!(
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{data_avail}",
id = adv.id,
- host = adv.status.hostname,
+ host = host,
addr = adv.addr,
tags = cfg.tags.join(","),
zone = cfg.zone,
@@ -95,7 +96,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
healthy_nodes.push(format!(
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\tdraining metadata...",
id = adv.id,
- host = adv.status.hostname,
+ host = host,
addr = adv.addr,
tags = cfg.tags.join(","),
zone = cfg.zone,
@@ -108,7 +109,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
healthy_nodes.push(format!(
"{id:?}\t{h}\t{addr}\t\t\t{new_role}",
id = adv.id,
- h = adv.status.hostname,
+ h = host,
addr = adv.addr,
new_role = new_role,
));
@@ -149,7 +150,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
// it is in a failed state, add proper line to the output
let (host, addr, last_seen) = match adv {
Some(adv) => (
- adv.status.hostname.as_str(),
+ adv.status.hostname.as_deref().unwrap_or("?"),
adv.addr.to_string(),
adv.last_seen_secs_ago
.map(|s| tf.convert(Duration::from_secs(s)))
diff --git a/src/rpc/system.rs b/src/rpc/system.rs
index c7d41ee4..be4aefa2 100644
--- a/src/rpc/system.rs
+++ b/src/rpc/system.rs
@@ -126,7 +126,7 @@ pub struct System {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NodeStatus {
/// Hostname of the node
- pub hostname: String,
+ pub hostname: Option<String>,
/// Replication factor configured on the node
pub replication_factor: usize,
@@ -765,9 +765,11 @@ impl EndpointHandler<SystemRpc> for System {
impl NodeStatus {
fn initial(replication_factor: usize, layout_manager: &LayoutManager) -> Self {
NodeStatus {
- hostname: gethostname::gethostname()
- .into_string()
- .unwrap_or_else(|_| "<invalid utf-8>".to_string()),
+ hostname: Some(
+ gethostname::gethostname()
+ .into_string()
+ .unwrap_or_else(|_| "<invalid utf-8>".to_string()),
+ ),
replication_factor,
layout_digest: layout_manager.layout().digest(),
meta_disk_avail: None,
@@ -777,7 +779,7 @@ impl NodeStatus {
fn unknown() -> Self {
NodeStatus {
- hostname: "?".to_string(),
+ hostname: None,
replication_factor: 0,
layout_digest: Default::default(),
meta_disk_avail: None,