aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-11-28 14:25:04 +0100
committerAlex Auvolat <alex@adnab.me>2023-11-28 14:25:04 +0100
commitc04dd8788a3764da2f307b1d10c2d56b7b0e4a61 (patch)
tree2c02648080fde466dcef7c39987d55a03d4a4399
parent539af6eac434bd94acbcabcc5bb5c10450b71c5d (diff)
downloadgarage-c04dd8788a3764da2f307b1d10c2d56b7b0e4a61.tar.gz
garage-c04dd8788a3764da2f307b1d10c2d56b7b0e4a61.zip
admin: more info in admin GetClusterStatus
-rw-r--r--doc/drafts/admin-api.md139
-rw-r--r--src/api/admin/cluster.rs122
-rw-r--r--src/garage/admin/mod.rs2
-rw-r--r--src/garage/cli/cmd.rs9
-rw-r--r--src/rpc/system.rs12
5 files changed, 190 insertions, 94 deletions
diff --git a/doc/drafts/admin-api.md b/doc/drafts/admin-api.md
index 411f6418..274bd5c4 100644
--- a/doc/drafts/admin-api.md
+++ b/doc/drafts/admin-api.md
@@ -69,8 +69,8 @@ Example response body:
```json
{
- "node": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f",
- "garageVersion": "git:v0.9.0-dev",
+ "node": "b10c110e4e854e5aa3f4637681befac755154b20059ec163254ddbfae86b09df",
+ "garageVersion": "v0.10.0",
"garageFeatures": [
"k2v",
"sled",
@@ -81,83 +81,92 @@ Example response body:
],
"rustVersion": "1.68.0",
"dbEngine": "LMDB (using Heed crate)",
- "knownNodes": [
+ "layoutVersion": 5,
+ "nodes": [
{
- "id": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f",
- "addr": "10.0.0.11:3901",
+ "id": "62b218d848e86a64f7fe1909735f29a4350547b54c4b204f91246a14eb0a1a8c",
+ "role": {
+ "id": "62b218d848e86a64f7fe1909735f29a4350547b54c4b204f91246a14eb0a1a8c",
+ "zone": "dc1",
+ "capacity": 100000000000,
+ "tags": []
+ },
+ "addr": "10.0.0.3:3901",
+ "hostname": "node3",
"isUp": true,
- "lastSeenSecsAgo": 9,
- "hostname": "node1"
+ "lastSeenSecsAgo": 12,
+ "draining": false,
+ "dataPartition": {
+ "available": 660270088192,
+ "total": 873862266880
+ },
+ "metadataPartition": {
+ "available": 660270088192,
+ "total": 873862266880
+ }
},
{
- "id": "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff",
- "addr": "10.0.0.12:3901",
+ "id": "a11c7cf18af297379eff8688360155fe68d9061654449ba0ce239252f5a7487f",
+ "role": null,
+ "addr": "10.0.0.2:3901",
+ "hostname": "node2",
"isUp": true,
- "lastSeenSecsAgo": 1,
- "hostname": "node2"
+ "lastSeenSecsAgo": 11,
+ "draining": true,
+ "dataPartition": {
+ "available": 660270088192,
+ "total": 873862266880
+ },
+ "metadataPartition": {
+ "available": 660270088192,
+ "total": 873862266880
+ }
},
{
- "id": "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27",
- "addr": "10.0.0.21:3901",
+ "id": "a235ac7695e0c54d7b403943025f57504d500fdcc5c3e42c71c5212faca040a2",
+ "role": {
+ "id": "a235ac7695e0c54d7b403943025f57504d500fdcc5c3e42c71c5212faca040a2",
+ "zone": "dc1",
+ "capacity": 100000000000,
+ "tags": []
+ },
+ "addr": "127.0.0.1:3904",
+ "hostname": "lindy",
"isUp": true,
- "lastSeenSecsAgo": 7,
- "hostname": "node3"
+ "lastSeenSecsAgo": 2,
+ "draining": false,
+ "dataPartition": {
+ "available": 660270088192,
+ "total": 873862266880
+ },
+ "metadataPartition": {
+ "available": 660270088192,
+ "total": 873862266880
+ }
},
{
- "id": "e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b",
- "addr": "10.0.0.22:3901",
- "isUp": true,
- "lastSeenSecsAgo": 1,
- "hostname": "node4"
- }
- ],
- "layout": {
- "version": 12,
- "roles": [
- {
- "id": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f",
+ "id": "b10c110e4e854e5aa3f4637681befac755154b20059ec163254ddbfae86b09df",
+ "role": {
+ "id": "b10c110e4e854e5aa3f4637681befac755154b20059ec163254ddbfae86b09df",
"zone": "dc1",
- "capacity": 10737418240,
- "tags": [
- "node1"
- ]
+ "capacity": 100000000000,
+ "tags": []
},
- {
- "id": "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff",
- "zone": "dc1",
- "capacity": 10737418240,
- "tags": [
- "node2"
- ]
+ "addr": "10.0.0.1:3901",
+ "hostname": "node1",
+ "isUp": true,
+ "lastSeenSecsAgo": 3,
+ "draining": false,
+ "dataPartition": {
+ "available": 660270088192,
+ "total": 873862266880
},
- {
- "id": "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27",
- "zone": "dc2",
- "capacity": 10737418240,
- "tags": [
- "node3"
- ]
+ "metadataPartition": {
+ "available": 660270088192,
+ "total": 873862266880
}
- ],
- "stagedRoleChanges": [
- {
- "id": "e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b",
- "remove": false,
- "zone": "dc2",
- "capacity": 10737418240,
- "tags": [
- "node4"
- ]
- }
- {
- "id": "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27",
- "remove": true,
- "zone": null,
- "capacity": null,
- "tags": null,
- }
- ]
- }
+ }
+ ]
}
```
diff --git a/src/api/admin/cluster.rs b/src/api/admin/cluster.rs
index 593bd778..3ce1b254 100644
--- a/src/api/admin/cluster.rs
+++ b/src/api/admin/cluster.rs
@@ -1,3 +1,4 @@
+use std::collections::HashMap;
use std::net::SocketAddr;
use std::sync::Arc;
@@ -15,25 +16,95 @@ use crate::admin::error::*;
use crate::helpers::{json_ok_response, parse_json_body};
pub async fn handle_get_cluster_status(garage: &Arc<Garage>) -> Result<Response<Body>, Error> {
+ let layout = garage.system.cluster_layout();
+ let mut nodes = garage
+ .system
+ .get_known_nodes()
+ .into_iter()
+ .map(|i| {
+ (
+ i.id,
+ NodeResp {
+ id: hex::encode(i.id),
+ addr: Some(i.addr),
+ hostname: i.status.hostname,
+ is_up: i.is_up,
+ last_seen_secs_ago: i.last_seen_secs_ago,
+ data_partition: i
+ .status
+ .data_disk_avail
+ .map(|(avail, total)| FreeSpaceResp {
+ available: avail,
+ total,
+ }),
+ metadata_partition: i.status.meta_disk_avail.map(|(avail, total)| {
+ FreeSpaceResp {
+ available: avail,
+ total,
+ }
+ }),
+ ..Default::default()
+ },
+ )
+ })
+ .collect::<HashMap<_, _>>();
+
+ for (id, _, role) in layout.current().roles.items().iter() {
+ if let layout::NodeRoleV(Some(r)) = role {
+ let role = NodeRoleResp {
+ id: hex::encode(id),
+ zone: r.zone.to_string(),
+ capacity: r.capacity,
+ tags: r.tags.clone(),
+ };
+ match nodes.get_mut(id) {
+ None => {
+ nodes.insert(
+ *id,
+ NodeResp {
+ id: hex::encode(id),
+ role: Some(role),
+ ..Default::default()
+ },
+ );
+ }
+ Some(n) => {
+ if n.role.is_none() {
+ n.role = Some(role);
+ }
+ }
+ }
+ }
+ }
+
+ for ver in layout.versions.iter().rev().skip(1) {
+ for (id, _, role) in ver.roles.items().iter() {
+ if let layout::NodeRoleV(Some(r)) = role {
+ if !nodes.contains_key(id) && r.capacity.is_some() {
+ nodes.insert(
+ *id,
+ NodeResp {
+ id: hex::encode(id),
+ draining: true,
+ ..Default::default()
+ },
+ );
+ }
+ }
+ }
+ }
+
+ let mut nodes = nodes.into_iter().map(|(_, v)| v).collect::<Vec<_>>();
+ nodes.sort_by(|x, y| x.id.cmp(&y.id));
+
let res = GetClusterStatusResponse {
node: hex::encode(garage.system.id),
garage_version: garage_util::version::garage_version(),
garage_features: garage_util::version::garage_features(),
rust_version: garage_util::version::rust_version(),
db_engine: garage.db.engine(),
- known_nodes: garage
- .system
- .get_known_nodes()
- .into_iter()
- .map(|i| KnownNodeResp {
- id: hex::encode(i.id),
- addr: i.addr,
- is_up: i.is_up,
- last_seen_secs_ago: i.last_seen_secs_ago,
- hostname: i.status.hostname,
- })
- .collect(),
- layout: format_cluster_layout(&garage.system.cluster_layout()),
+ layout_version: layout.current().version,
+ nodes,
};
Ok(json_ok_response(&res)?)
@@ -157,8 +228,8 @@ struct GetClusterStatusResponse {
garage_features: Option<&'static [&'static str]>,
rust_version: &'static str,
db_engine: String,
- known_nodes: Vec<KnownNodeResp>,
- layout: GetClusterLayoutResponse,
+ layout_version: u64,
+ nodes: Vec<NodeResp>,
}
#[derive(Serialize)]
@@ -192,14 +263,27 @@ struct NodeRoleResp {
tags: Vec<String>,
}
-#[derive(Serialize)]
+#[derive(Serialize, Default)]
+#[serde(rename_all = "camelCase")]
+struct FreeSpaceResp {
+ available: u64,
+ total: u64,
+}
+
+#[derive(Serialize, Default)]
#[serde(rename_all = "camelCase")]
-struct KnownNodeResp {
+struct NodeResp {
id: String,
- addr: SocketAddr,
+ role: Option<NodeRoleResp>,
+ addr: Option<SocketAddr>,
+ hostname: Option<String>,
is_up: bool,
last_seen_secs_ago: Option<u64>,
- hostname: String,
+ draining: bool,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ data_partition: Option<FreeSpaceResp>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ metadata_partition: Option<FreeSpaceResp>,
}
// ---- update functions ----
diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs
index 77918a0f..da4226cf 100644
--- a/src/garage/admin/mod.rs
+++ b/src/garage/admin/mod.rs
@@ -295,7 +295,7 @@ impl AdminRpcHandler {
let info = node_info.get(id);
let status = info.map(|x| &x.status);
let role = layout.current().roles.get(id).and_then(|x| x.0.as_ref());
- let hostname = status.map(|x| x.hostname.as_str()).unwrap_or("?");
+ let hostname = status.and_then(|x| x.hostname.as_deref()).unwrap_or("?");
let zone = role.map(|x| x.zone.as_str()).unwrap_or("?");
let capacity = role
.map(|x| x.capacity_string())
diff --git a/src/garage/cli/cmd.rs b/src/garage/cli/cmd.rs
index 4d1306b6..c7f0ad2b 100644
--- a/src/garage/cli/cmd.rs
+++ b/src/garage/cli/cmd.rs
@@ -62,6 +62,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
let mut healthy_nodes =
vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tDataAvail".to_string()];
for adv in status.iter().filter(|adv| adv.is_up) {
+ let host = adv.status.hostname.as_deref().unwrap_or("?");
if let Some(NodeRoleV(Some(cfg))) = layout.current().roles.get(&adv.id) {
let data_avail = match &adv.status.data_disk_avail {
_ if cfg.capacity.is_none() => "N/A".into(),
@@ -75,7 +76,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
healthy_nodes.push(format!(
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{data_avail}",
id = adv.id,
- host = adv.status.hostname,
+ host = host,
addr = adv.addr,
tags = cfg.tags.join(","),
zone = cfg.zone,
@@ -95,7 +96,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
healthy_nodes.push(format!(
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\tdraining metadata...",
id = adv.id,
- host = adv.status.hostname,
+ host = host,
addr = adv.addr,
tags = cfg.tags.join(","),
zone = cfg.zone,
@@ -108,7 +109,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
healthy_nodes.push(format!(
"{id:?}\t{h}\t{addr}\t\t\t{new_role}",
id = adv.id,
- h = adv.status.hostname,
+ h = host,
addr = adv.addr,
new_role = new_role,
));
@@ -149,7 +150,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
// it is in a failed state, add proper line to the output
let (host, addr, last_seen) = match adv {
Some(adv) => (
- adv.status.hostname.as_str(),
+ adv.status.hostname.as_deref().unwrap_or("?"),
adv.addr.to_string(),
adv.last_seen_secs_ago
.map(|s| tf.convert(Duration::from_secs(s)))
diff --git a/src/rpc/system.rs b/src/rpc/system.rs
index c7d41ee4..be4aefa2 100644
--- a/src/rpc/system.rs
+++ b/src/rpc/system.rs
@@ -126,7 +126,7 @@ pub struct System {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NodeStatus {
/// Hostname of the node
- pub hostname: String,
+ pub hostname: Option<String>,
/// Replication factor configured on the node
pub replication_factor: usize,
@@ -765,9 +765,11 @@ impl EndpointHandler<SystemRpc> for System {
impl NodeStatus {
fn initial(replication_factor: usize, layout_manager: &LayoutManager) -> Self {
NodeStatus {
- hostname: gethostname::gethostname()
- .into_string()
- .unwrap_or_else(|_| "<invalid utf-8>".to_string()),
+ hostname: Some(
+ gethostname::gethostname()
+ .into_string()
+ .unwrap_or_else(|_| "<invalid utf-8>".to_string()),
+ ),
replication_factor,
layout_digest: layout_manager.layout().digest(),
meta_disk_avail: None,
@@ -777,7 +779,7 @@ impl NodeStatus {
fn unknown() -> Self {
NodeStatus {
- hostname: "?".to_string(),
+ hostname: None,
replication_factor: 0,
layout_digest: Default::default(),
meta_disk_avail: None,