aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/drafts/admin-api.md40
-rw-r--r--src/api/admin/cluster.rs28
-rw-r--r--src/api/admin/router.rs2
-rw-r--r--src/rpc/system.rs4
4 files changed, 51 insertions, 23 deletions
diff --git a/doc/drafts/admin-api.md b/doc/drafts/admin-api.md
index b1a8f402..c80147ef 100644
--- a/doc/drafts/admin-api.md
+++ b/doc/drafts/admin-api.md
@@ -52,7 +52,7 @@ Returns an HTTP status 200 if the node is ready to answer user's requests,
and an HTTP status 503 (Service Unavailable) if there are some partitions
for which a quorum of nodes is not available.
A simple textual message is also returned in a body with content-type `text/plain`.
-See `/v0/health` for an API that also returns JSON output.
+See `/v1/health` for an API that also returns JSON output.
### Cluster operations
@@ -161,21 +161,21 @@ Example response body:
}
```
-#### GetClusterHealth `GET /v0/health`
+#### GetClusterHealth `GET /v1/health`
Returns the cluster's current health in JSON format, with the following variables:
-- `status`: one of `Healthy`, `Degraded` or `Unavailable`:
- - Healthy: Garage node is connected to all storage nodes
- - Degraded: Garage node is not connected to all storage nodes, but a quorum of write nodes is available for all partitions
- - Unavailable: a quorum of write nodes is not available for some partitions
-- `known_nodes`: the number of nodes this Garage node has had a TCP connection to since the daemon started
-- `connected_nodes`: the nubmer of nodes this Garage node currently has an open connection to
-- `storage_nodes`: the number of storage nodes currently registered in the cluster layout
-- `storage_nodes_ok`: the number of storage nodes to which a connection is currently open
+- `status`: one of `healthy`, `degraded` or `unavailable`:
+ - healthy: Garage node is connected to all storage nodes
+ - degraded: Garage node is not connected to all storage nodes, but a quorum of write nodes is available for all partitions
+ - unavailable: a quorum of write nodes is not available for some partitions
+- `knownNodes`: the number of nodes this Garage node has had a TCP connection to since the daemon started
+- `connectedNodes`: the nubmer of nodes this Garage node currently has an open connection to
+- `storageNodes`: the number of storage nodes currently registered in the cluster layout
+- `storageNodesOk`: the number of storage nodes to which a connection is currently open
- `partitions`: the total number of partitions of the data (currently always 256)
-- `partitions_quorum`: the number of partitions for which a quorum of write nodes is available
-- `partitions_all_ok`: the number of partitions for which we are connected to all storage nodes responsible of storing it
+- `partitionsQuorum`: the number of partitions for which a quorum of write nodes is available
+- `partitionsAllOk`: the number of partitions for which we are connected to all storage nodes responsible of storing it
Contrarily to `GET /health`, this endpoint always returns a 200 OK HTTP response code.
@@ -183,14 +183,14 @@ Example response body:
```json
{
- "status": "Degraded",
- "known_nodes": 3,
- "connected_nodes": 2,
- "storage_nodes": 3,
- "storage_nodes_ok": 2,
- "partitions": 256,
- "partitions_quorum": 256,
- "partitions_all_ok": 0
+ "status": "degraded",
+ "knownNodes": 3,
+ "connectedNodes": 3,
+ "storageNodes": 4,
+ "storageNodesOk": 3,
+ "partitions": 256,
+ "partitionsQuorum": 256,
+ "partitionsAllOk": 64
}
```
diff --git a/src/api/admin/cluster.rs b/src/api/admin/cluster.rs
index 8a208a2c..90203043 100644
--- a/src/api/admin/cluster.rs
+++ b/src/api/admin/cluster.rs
@@ -40,7 +40,22 @@ pub async fn handle_get_cluster_status(garage: &Arc<Garage>) -> Result<Response<
}
pub async fn handle_get_cluster_health(garage: &Arc<Garage>) -> Result<Response<Body>, Error> {
+ use garage_rpc::system::ClusterHealthStatus;
let health = garage.system.health();
+ let health = ClusterHealth {
+ status: match health.status {
+ ClusterHealthStatus::Healthy => "healthy",
+ ClusterHealthStatus::Degraded => "degraded",
+ ClusterHealthStatus::Unavailable => "unavailable",
+ },
+ known_nodes: health.known_nodes,
+ connected_nodes: health.connected_nodes,
+ storage_nodes: health.storage_nodes,
+ storage_nodes_ok: health.storage_nodes_ok,
+ partitions: health.partitions,
+ partitions_quorum: health.partitions_quorum,
+ partitions_all_ok: health.partitions_all_ok,
+ };
Ok(json_ok_response(&health)?)
}
@@ -120,6 +135,19 @@ fn get_cluster_layout(garage: &Arc<Garage>) -> GetClusterLayoutResponse {
// ----
+#[derive(Debug, Clone, Copy, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ClusterHealth {
+ pub status: &'static str,
+ pub known_nodes: usize,
+ pub connected_nodes: usize,
+ pub storage_nodes: usize,
+ pub storage_nodes_ok: usize,
+ pub partitions: usize,
+ pub partitions_quorum: usize,
+ pub partitions_all_ok: usize,
+}
+
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct GetClusterStatusResponse {
diff --git a/src/api/admin/router.rs b/src/api/admin/router.rs
index 5af3ffb5..b98db284 100644
--- a/src/api/admin/router.rs
+++ b/src/api/admin/router.rs
@@ -96,7 +96,7 @@ impl Endpoint {
GET "/health" => Health,
GET "/metrics" => Metrics,
GET "/v1/status" => GetClusterStatus,
- GET "/v0/health" => GetClusterHealth,
+ GET "/v1/health" => GetClusterHealth,
POST "/v0/connect" => ConnectClusterNodes,
// Layout endpoints
GET "/v1/layout" => GetClusterLayout,
diff --git a/src/rpc/system.rs b/src/rpc/system.rs
index c549d8fc..1675e70e 100644
--- a/src/rpc/system.rs
+++ b/src/rpc/system.rs
@@ -151,7 +151,7 @@ pub struct KnownNodeInfo {
pub status: NodeStatus,
}
-#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy)]
pub struct ClusterHealth {
/// The current health status of the cluster (see below)
pub status: ClusterHealthStatus,
@@ -171,7 +171,7 @@ pub struct ClusterHealth {
pub partitions_all_ok: usize,
}
-#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy)]
pub enum ClusterHealthStatus {
/// All nodes are available
Healthy,