9 files changed, 833 insertions, 10 deletions
diff --git a/doc/drafts/admin-api.md b/doc/drafts/admin-api.md
new file mode 100644
index 00000000..9a697a59
--- /dev/null
+++ b/doc/drafts/admin-api.md
@@ -0,0 +1,686 @@
++++
+title = "Administration API"
+weight = 60
++++
+
+The Garage administration API is accessible through a dedicated server whose
+listen address is specified in the `[admin]` section of the configuration
+file (see [configuration file
+reference](@/documentation/reference-manual/configuration.md))
+
+**WARNING.** At this point, there is no comittement to stability of the APIs described in this document.
+We will bump the version numbers prefixed to each API endpoint at each time the syntax
+or semantics change, meaning that code that relies on these endpoint will break
+when changes are introduced.
+
+The Garage administration API was introduced in version 0.7.2, this document
+does not apply to older versions of Garage.
+
+
+## Access control
+
+The admin API uses two different tokens for acces control, that are specified in the config file's `[admin]` section:
+
+- `metrics_token`: the token for accessing the Metrics endpoint (if this token
+  is not set in the config file, the Metrics endpoint can be accessed without
+  access control);
+
+- `admin_token`: the token for accessing all of the other administration
+  endpoints (if this token is not set in the config file, access to these
+  endpoints is disabled entirely).
+
+These tokens are used as simple HTTP bearer tokens. In other words, to
+authenticate access to an admin API endpoint, add the following HTTP header
+to your request:
+
+```
+Authorization: Bearer <token>
+```
+
+## Administration API endpoints
+
+### Metrics-related endpoints
+
+#### Metrics `GET /metrics`
+
+Returns internal Garage metrics in Prometheus format.
+
+#### Health `GET /health`
+
+Used for simple health checks in a cluster setting with an orchestrator.
+Returns an HTTP status 200 if the node is ready to answer user's requests,
+and an HTTP status 503 (Service Unavailable) if there are some partitions
+for which a quorum of nodes is not available.
+A simple textual message is also returned in a body with content-type `text/plain`.
+See `/v0/health` for an API that also returns JSON output.
+
+### Cluster operations
+
+#### GetClusterStatus `GET /v0/status`
+
+Returns the cluster's current status in JSON, including:
+
+- ID of the node being queried and its version of the Garage daemon
+- Live nodes
+- Currently configured cluster layout
+- Staged changes to the cluster layout
+
+Example response body:
+
+```json
+{
+  "node": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f",
+  "garage_version": "git:v0.8.0",
+  "knownNodes": {
+    "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": {
+      "addr": "10.0.0.11:3901",
+      "is_up": true,
+      "last_seen_secs_ago": 9,
+      "hostname": "node1"
+    },
+    "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": {
+      "addr": "10.0.0.12:3901",
+      "is_up": true,
+      "last_seen_secs_ago": 1,
+      "hostname": "node2"
+    },
+    "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": {
+      "addr": "10.0.0.21:3901",
+      "is_up": true,
+      "last_seen_secs_ago": 7,
+      "hostname": "node3"
+    },
+    "e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": {
+      "addr": "10.0.0.22:3901",
+      "is_up": true,
+      "last_seen_secs_ago": 1,
+      "hostname": "node4"
+    }
+  },
+  "layout": {
+    "version": 12,
+    "roles": {
+      "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": {
+        "zone": "dc1",
+        "capacity": 4,
+        "tags": [
+          "node1"
+        ]
+      },
+      "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": {
+        "zone": "dc1",
+        "capacity": 6,
+        "tags": [
+          "node2"
+        ]
+      },
+      "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": {
+        "zone": "dc2",
+        "capacity": 10,
+        "tags": [
+          "node3"
+        ]
+      }
+    },
+    "stagedRoleChanges": {
+      "e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": {
+        "zone": "dc2",
+        "capacity": 5,
+        "tags": [
+          "node4"
+        ]
+      }
+    }
+  }
+}
+```
+
+#### GetClusterHealth `GET /v0/health`
+
+Returns the cluster's current health in JSON format, with the following variables:
+
+- `status`: one of `Healthy`, `Degraded` or `Unavailable`:
+  - Healthy: Garage node is connected to all storage nodes
+  - Degraded: Garage node is not connected to all storage nodes, but a quorum of write nodes is available for all partitions
+  - Unavailable: a quorum of write nodes is not available for some partitions
+- `known_nodes`: the number of nodes this Garage node has had a TCP connection to since the daemon started
+- `connected_nodes`: the nubmer of nodes this Garage node currently has an open connection to
+- `storage_nodes`: the number of storage nodes currently registered in the cluster layout
+- `storage_nodes_ok`: the number of storage nodes to which a connection is currently open
+- `partitions`: the total number of partitions of the data (currently always 256)
+- `partitions_quorum`: the number of partitions for which a quorum of write nodes is available
+- `partitions_all_ok`: the number of partitions for which we are connected to all storage nodes responsible of storing it
+
+Contrarily to `GET /health`, this endpoint always returns a 200 OK HTTP response code.
+
+Example response body:
+
+```json
+{
+    "status": "Degraded",
+    "known_nodes": 3,
+    "connected_nodes": 2,
+    "storage_nodes": 3,
+    "storage_nodes_ok": 2,
+    "partitions": 256,
+    "partitions_quorum": 256,
+    "partitions_all_ok": 0
+}
+```
+
+#### ConnectClusterNodes `POST /v0/connect`
+
+Instructs this Garage node to connect to other Garage nodes at specified addresses.
+
+Example request body:
+
+```json
+[
+    "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f@10.0.0.11:3901",
+    "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff@10.0.0.12:3901"
+]
+```
+
+The format of the string for a node to connect to is: `<node ID>@<ip address>:<port>`, same as in the `garage node connect` CLI call.
+
+Example response:
+
+```json
+[
+    {
+        "success": true,
+        "error": null
+    },
+    {
+        "success": false,
+        "error": "Handshake error"
+    }
+]
+```
+
+#### GetClusterLayout `GET /v0/layout`
+
+Returns the cluster's current layout in JSON, including:
+
+- Currently configured cluster layout
+- Staged changes to the cluster layout
+
+(the info returned by this endpoint is a subset of the info returned by GetClusterStatus)
+
+Example response body:
+
+```json
+{
+  "version": 12,
+  "roles": {
+    "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": {
+      "zone": "dc1",
+      "capacity": 4,
+      "tags": [
+        "node1"
+      ]
+    },
+    "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": {
+      "zone": "dc1",
+      "capacity": 6,
+      "tags": [
+        "node2"
+      ]
+    },
+    "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": {
+      "zone": "dc2",
+      "capacity": 10,
+      "tags": [
+        "node3"
+      ]
+    }
+  },
+  "stagedRoleChanges": {
+    "e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": {
+      "zone": "dc2",
+      "capacity": 5,
+      "tags": [
+        "node4"
+      ]
+    }
+  }
+}
+```
+
+#### UpdateClusterLayout `POST /v0/layout`
+
+Send modifications to the cluster layout. These modifications will
+be included in the staged role changes, visible in subsequent calls
+of `GetClusterLayout`. Once the set of staged changes is satisfactory,
+the user may call `ApplyClusterLayout` to apply the changed changes,
+or `Revert ClusterLayout` to clear all of the staged changes in
+the layout.
+
+Request body format:
+
+```json
+{
+  <node_id>: {
+    "capacity": <new_capacity>,
+    "zone": <new_zone>,
+    "tags": [
+      <new_tag>,
+      ...
+    ]
+  },
+  <node_id_to_remove>: null,
+  ...
+}
+```
+
+Contrary to the CLI that may update only a subset of the fields
+`capacity`, `zone` and `tags`, when calling this API all of these
+values must be specified.
+
+
+#### ApplyClusterLayout `POST /v0/layout/apply`
+
+Applies to the cluster the layout changes currently registered as
+staged layout changes.
+
+Request body format:
+
+```json
+{
+  "version": 13
+}
+```
+
+Similarly to the CLI, the body must include the version of the new layout
+that will be created, which MUST be 1 + the value of the currently
+existing layout in the cluster.
+
+#### RevertClusterLayout `POST /v0/layout/revert`
+
+Clears all of the staged layout changes.
+
+Request body format:
+
+```json
+{
+  "version": 13
+}
+```
+
+Reverting the staged changes is done by incrementing the version number
+and clearing the contents of the staged change list.
+Similarly to the CLI, the body must include the incremented
+version number, which MUST be 1 + the value of the currently
+existing layout in the cluster.
+
+
+### Access key operations
+
+#### ListKeys `GET /v0/key`
+
+Returns all API access keys in the cluster.
+
+Example response:
+
+```json
+[
+  {
+    "id": "GK31c2f218a2e44f485b94239e",
+    "name": "test"
+  },
+  {
+    "id": "GKe10061ac9c2921f09e4c5540",
+    "name": "test2"
+  }
+]
+```
+
+#### CreateKey `POST /v0/key`
+
+Creates a new API access key.
+
+Request body format:
+
+```json
+{
+    "name": "NameOfMyKey"
+}
+```
+
+#### ImportKey `POST /v0/key/import`
+
+Imports an existing API key.
+
+Request body format:
+
+```json
+{
+    "accessKeyId": "GK31c2f218a2e44f485b94239e",
+    "secretAccessKey": "b892c0665f0ada8a4755dae98baa3b133590e11dae3bcc1f9d769d67f16c3835",
+    "name": "NameOfMyKey"
+}
+```
+
+#### GetKeyInfo `GET /v0/key?id=<acces key id>`
+#### GetKeyInfo `GET /v0/key?search=<pattern>`
+
+Returns information about the requested API access key.
+
+If `id` is set, the key is looked up using its exact identifier (faster).
+If `search` is set, the key is looked up using its name or prefix
+of identifier (slower, all keys are enumerated to do this).
+
+Example response:
+
+```json
+{
+  "name": "test",
+  "accessKeyId": "GK31c2f218a2e44f485b94239e",
+  "secretAccessKey": "b892c0665f0ada8a4755dae98baa3b133590e11dae3bcc1f9d769d67f16c3835",
+  "permissions": {
+    "createBucket": false
+  },
+  "buckets": [
+    {
+      "id": "70dc3bed7fe83a75e46b66e7ddef7d56e65f3c02f9f80b6749fb97eccb5e1033",
+      "globalAliases": [
+        "test2"
+      ],
+      "localAliases": [],
+      "permissions": {
+        "read": true,
+        "write": true,
+        "owner": false
+      }
+    },
+    {
+      "id": "d7452a935e663fc1914f3a5515163a6d3724010ce8dfd9e4743ca8be5974f995",
+      "globalAliases": [
+        "test3"
+      ],
+      "localAliases": [],
+      "permissions": {
+        "read": true,
+        "write": true,
+        "owner": false
+      }
+    },
+    {
+      "id": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
+      "globalAliases": [],
+      "localAliases": [
+        "test"
+      ],
+      "permissions": {
+        "read": true,
+        "write": true,
+        "owner": true
+      }
+    },
+    {
+      "id": "96470e0df00ec28807138daf01915cfda2bee8eccc91dea9558c0b4855b5bf95",
+      "globalAliases": [
+        "alex"
+      ],
+      "localAliases": [],
+      "permissions": {
+        "read": true,
+        "write": true,
+        "owner": true
+      }
+    }
+  ]
+}
+```
+
+#### DeleteKey `DELETE /v0/key?id=<acces key id>`
+
+Deletes an API access key.
+
+#### UpdateKey `POST /v0/key?id=<acces key id>`
+
+Updates information about the specified API access key.
+
+Request body format:
+
+```json
+{
+    "name": "NameOfMyKey",
+    "allow": {
+        "createBucket": true,
+    },
+    "deny": {}
+}
+```
+
+All fields (`name`, `allow` and `deny`) are optionnal.
+If they are present, the corresponding modifications are applied to the key, otherwise nothing is changed.
+The possible flags in `allow` and `deny` are: `createBucket`.
+
+
+### Bucket operations
+
+#### ListBuckets `GET /v0/bucket`
+
+Returns all storage buckets in the cluster.
+
+Example response:
+
+```json
+[
+  {
+    "id": "70dc3bed7fe83a75e46b66e7ddef7d56e65f3c02f9f80b6749fb97eccb5e1033",
+    "globalAliases": [
+      "test2"
+    ],
+    "localAliases": []
+  },
+  {
+    "id": "96470e0df00ec28807138daf01915cfda2bee8eccc91dea9558c0b4855b5bf95",
+    "globalAliases": [
+      "alex"
+    ],
+    "localAliases": []
+  },
+  {
+    "id": "d7452a935e663fc1914f3a5515163a6d3724010ce8dfd9e4743ca8be5974f995",
+    "globalAliases": [
+      "test3"
+    ],
+    "localAliases": []
+  },
+  {
+    "id": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
+    "globalAliases": [],
+    "localAliases": [
+      {
+        "accessKeyId": "GK31c2f218a2e44f485b94239e",
+        "alias": "test"
+      }
+    ]
+  }
+]
+```
+
+#### GetBucketInfo `GET /v0/bucket?id=<bucket id>`
+#### GetBucketInfo `GET /v0/bucket?globalAlias=<alias>`
+
+Returns information about the requested storage bucket.
+
+If `id` is set, the bucket is looked up using its exact identifier.
+If `globalAlias` is set, the bucket is looked up using its global alias.
+(both are fast)
+
+Example response:
+
+```json
+{
+    "id": "afa8f0a22b40b1247ccd0affb869b0af5cff980924a20e4b5e0720a44deb8d39",
+        "globalAliases": [],
+        "websiteAccess": false,
+        "websiteConfig": null,
+        "keys": [
+        {
+            "accessKeyId": "GK31c2f218a2e44f485b94239e",
+            "name": "Imported key",
+            "permissions": {
+                "read": true,
+                "write": true,
+                "owner": true
+            },
+            "bucketLocalAliases": [
+                "debug"
+            ]
+        }
+        ],
+        "objects": 14827,
+        "bytes": 13189855625,
+        "unfinshedUploads": 0,
+        "quotas": {
+            "maxSize": null,
+            "maxObjects": null
+        }
+}
+```
+
+#### CreateBucket `POST /v0/bucket`
+
+Creates a new storage bucket.
+
+Request body format:
+
+```json
+{
+    "globalAlias": "NameOfMyBucket"
+}
+```
+
+OR
+
+```json
+{
+    "localAlias": {
+        "accessKeyId": "GK31c2f218a2e44f485b94239e",
+        "alias": "NameOfMyBucket",
+        "allow": {
+            "read": true,
+            "write": true,
+            "owner": false
+        }
+    }
+}
+```
+
+OR
+
+```json
+{}
+```
+
+Creates a new bucket, either with a global alias, a local one,
+or no alias at all.
+
+Technically, you can also specify both `globalAlias` and `localAlias` and that would create
+two aliases, but I don't see why you would want to do that.
+
+#### DeleteBucket `DELETE /v0/bucket?id=<bucket id>`
+
+Deletes a storage bucket. A bucket cannot be deleted if it is not empty.
+
+Warning: this will delete all aliases associated with the bucket!
+
+#### UpdateBucket `PUT /v0/bucket?id=<bucket id>`
+
+Updates configuration of the given bucket.
+
+Request body format:
+
+```json
+{
+    "websiteAccess": {
+        "enabled": true,
+        "indexDocument": "index.html",
+        "errorDocument": "404.html"
+    },
+    "quotas": {
+        "maxSize": 19029801,
+        "maxObjects": null,
+    }
+}
+```
+
+All fields (`websiteAccess` and `quotas`) are optionnal.
+If they are present, the corresponding modifications are applied to the bucket, otherwise nothing is changed.
+
+In `websiteAccess`: if `enabled` is `true`, `indexDocument` must be specified.
+The field `errorDocument` is optional, if no error document is set a generic
+error message is displayed when errors happen. Conversely, if `enabled` is
+`false`, neither `indexDocument` nor `errorDocument` must be specified.
+
+In `quotas`: new values of `maxSize` and `maxObjects` must both be specified, or set to `null`
+to remove the quotas. An absent value will be considered the same as a `null`. It is not possible
+to change only one of the two quotas.
+
+### Operations on permissions for keys on buckets
+
+#### BucketAllowKey `POST /v0/bucket/allow`
+
+Allows a key to do read/write/owner operations on a bucket.
+
+Request body format:
+
+```json
+{
+    "bucketId": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
+    "accessKeyId": "GK31c2f218a2e44f485b94239e",
+    "permissions": {
+        "read": true,
+        "write": true,
+        "owner": true
+    },
+}
+```
+
+Flags in `permissions` which have the value `true` will be activated.
+Other flags will remain unchanged.
+
+#### BucketDenyKey `POST /v0/bucket/deny`
+
+Denies a key from doing read/write/owner operations on a bucket.
+
+Request body format:
+
+```json
+{
+    "bucketId": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
+    "accessKeyId": "GK31c2f218a2e44f485b94239e",
+    "permissions": {
+        "read": false,
+        "write": false,
+        "owner": true
+    },
+}
+```
+
+Flags in `permissions` which have the value `true` will be deactivated.
+Other flags will remain unchanged.
+
+
+### Operations on bucket aliases
+
+#### GlobalAliasBucket `PUT /v0/bucket/alias/global?id=<bucket id>&alias=<global alias>`
+
+Empty body. Creates a global alias for a bucket.
+
+#### GlobalUnaliasBucket `DELETE /v0/bucket/alias/global?id=<bucket id>&alias=<global alias>`
+
+Removes a global alias for a bucket.
+
+#### LocalAliasBucket `PUT /v0/bucket/alias/local?id=<bucket id>&accessKeyId=<access key ID>&alias=<local alias>`
+
+Empty body. Creates a local alias for a bucket in the namespace of a specific access key.
+
+#### LocalUnaliasBucket `DELETE /v0/bucket/alias/local?id=<bucket id>&accessKeyId<access key ID>&alias=<local alias>`
+
+Removes a local alias for a bucket in the namespace of a specific access key.
+
diff --git a/src/api/admin/api_server.rs b/src/api/admin/api_server.rs
index 2896d058..2d325fb1 100644
--- a/src/api/admin/api_server.rs
+++ b/src/api/admin/api_server.rs
@@ -15,6 +15,7 @@ use opentelemetry_prometheus::PrometheusExporter;
 use prometheus::{Encoder, TextEncoder};
 
 use garage_model::garage::Garage;
+use garage_rpc::system::ClusterHealthStatus;
 use garage_util::error::Error as GarageError;
 
 use crate::generic_server::*;
@@ -76,6 +77,31 @@ impl AdminApiServer {
 			.body(Body::empty())?)
 	}
 
+	fn handle_health(&self) -> Result<Response<Body>, Error> {
+		let health = self.garage.system.health();
+
+		let (status, status_str) = match health.status {
+			ClusterHealthStatus::Healthy => (StatusCode::OK, "Garage is fully operational"),
+			ClusterHealthStatus::Degraded => (
+				StatusCode::OK,
+				"Garage is operational but some storage nodes are unavailable",
+			),
+			ClusterHealthStatus::Unavailable => (
+				StatusCode::SERVICE_UNAVAILABLE,
+				"Quorum is not available for some/all partitions, reads and writes will fail",
+			),
+		};
+		let status_str = format!(
+			"{}\nConsult the full health check API endpoint at /v0/health for more details\n",
+			status_str
+		);
+
+		Ok(Response::builder()
+			.status(status)
+			.header(http::header::CONTENT_TYPE, "text/plain")
+			.body(Body::from(status_str))?)
+	}
+
 	fn handle_metrics(&self) -> Result<Response<Body>, Error> {
 		#[cfg(feature = "metrics")]
 		{
@@ -124,6 +150,7 @@ impl ApiHandler for AdminApiServer {
 	) -> Result<Response<Body>, Error> {
 		let expected_auth_header =
 			match endpoint.authorization_type() {
+				Authorization::None => None,
 				Authorization::MetricsToken => self.metrics_token.as_ref(),
 				Authorization::AdminToken => match &self.admin_token {
 					None => return Err(Error::forbidden(
@@ -147,8 +174,10 @@ impl ApiHandler for AdminApiServer {
 
 		match endpoint {
 			Endpoint::Options => self.handle_options(&req),
+			Endpoint::Health => self.handle_health(),
 			Endpoint::Metrics => self.handle_metrics(),
 			Endpoint::GetClusterStatus => handle_get_cluster_status(&self.garage).await,
+			Endpoint::GetClusterHealth => handle_get_cluster_health(&self.garage).await,
 			Endpoint::ConnectClusterNodes => handle_connect_cluster_nodes(&self.garage, req).await,
 			// Layout
 			Endpoint::GetClusterLayout => handle_get_cluster_layout(&self.garage).await,
diff --git a/src/api/admin/cluster.rs b/src/api/admin/cluster.rs
index 706db727..182a4f6f 100644
--- a/src/api/admin/cluster.rs
+++ b/src/api/admin/cluster.rs
@@ -43,6 +43,11 @@ pub async fn handle_get_cluster_status(garage: &Arc<Garage>) -> Result<Response<
 	Ok(json_ok_response(&res)?)
 }
 
+pub async fn handle_get_cluster_health(garage: &Arc<Garage>) -> Result<Response<Body>, Error> {
+	let health = garage.system.health();
+	Ok(json_ok_response(&health)?)
+}
+
 pub async fn handle_connect_cluster_nodes(
 	garage: &Arc<Garage>,
 	req: Request<Body>,
diff --git a/src/api/admin/router.rs b/src/api/admin/router.rs
index 3eee8b67..3fa07b3c 100644
--- a/src/api/admin/router.rs
+++ b/src/api/admin/router.rs
@@ -6,6 +6,7 @@ use crate::admin::error::*;
 use crate::router_macros::*;
 
 pub enum Authorization {
+	None,
 	MetricsToken,
 	AdminToken,
 }
@@ -16,8 +17,10 @@ router_match! {@func
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum Endpoint {
 	Options,
+	Health,
 	Metrics,
 	GetClusterStatus,
+	GetClusterHealth,
 	ConnectClusterNodes,
 	// Layout
 	GetClusterLayout,
@@ -88,8 +91,10 @@ impl Endpoint {
 
 		let res = router_match!(@gen_path_parser (req.method(), path, query) [
 			OPTIONS _ => Options,
+			GET "/health" => Health,
 			GET "/metrics" => Metrics,
 			GET "/v0/status" => GetClusterStatus,
+			GET "/v0/health" => GetClusterHealth,
 			POST "/v0/connect" => ConnectClusterNodes,
 			// Layout endpoints
 			GET "/v0/layout" => GetClusterLayout,
@@ -130,6 +135,7 @@ impl Endpoint {
 	/// Get the kind of authorization which is required to perform the operation.
 	pub fn authorization_type(&self) -> Authorization {
 		match self {
+			Self::Health => Authorization::None,
 			Self::Metrics => Authorization::MetricsToken,
 			_ => Authorization::AdminToken,
 		}
@@ -137,6 +143,7 @@ impl Endpoint {
 }
 
 generateQueryParameters! {
+	"format" => format,
 	"id" => id,
 	"search" => search,
 	"globalAlias" => global_alias,
diff --git a/src/model/garage.rs b/src/model/garage.rs
index 75012952..e34d034f 100644
--- a/src/model/garage.rs
+++ b/src/model/garage.rs
@@ -8,10 +8,10 @@ use garage_util::background::*;
 use garage_util::config::*;
 use garage_util::error::*;
 
+use garage_rpc::replication_mode::ReplicationMode;
 use garage_rpc::system::System;
 
 use garage_block::manager::*;
-use garage_table::replication::ReplicationMode;
 use garage_table::replication::TableFullReplication;
 use garage_table::replication::TableShardedReplication;
 use garage_table::*;
@@ -34,6 +34,9 @@ pub struct Garage {
 	/// The parsed configuration Garage is running
 	pub config: Config,
 
+	/// The replication mode of this cluster
+	pub replication_mode: ReplicationMode,
+
 	/// The local database
 	pub db: db::Db,
 	/// A background job runner
@@ -164,12 +167,7 @@ impl Garage {
 			.expect("Invalid replication_mode in config file.");
 
 		info!("Initialize membership management system...");
-		let system = System::new(
-			network_key,
-			background.clone(),
-			replication_mode.replication_factor(),
-			&config,
-		)?;
+		let system = System::new(network_key, background.clone(), replication_mode, &config)?;
 
 		let data_rep_param = TableShardedReplication {
 			system: system.clone(),
@@ -258,6 +256,7 @@ impl Garage {
 		// -- done --
 		Ok(Arc::new(Self {
 			config,
+			replication_mode,
 			db,
 			background,
 			system,
diff --git a/src/rpc/lib.rs b/src/rpc/lib.rs
index 92caf75d..86f63568 100644
--- a/src/rpc/lib.rs
+++ b/src/rpc/lib.rs
@@ -9,6 +9,7 @@ mod consul;
 mod kubernetes;
 
 pub mod layout;
+pub mod replication_mode;
 pub mod ring;
 pub mod system;
 
diff --git a/src/table/replication/mode.rs b/src/rpc/replication_mode.rs
index c6f84c45..e244e063 100644
--- a/src/table/replication/mode.rs
+++ b/src/rpc/replication_mode.rs
@@ -1,3 +1,4 @@
+#[derive(Clone, Copy)]
 pub enum ReplicationMode {
 	None,
 	TwoWay,
diff --git a/src/rpc/system.rs b/src/rpc/system.rs
index d6576f20..2c6f14fd 100644
--- a/src/rpc/system.rs
+++ b/src/rpc/system.rs
@@ -35,6 +35,7 @@ use crate::consul::ConsulDiscovery;
 #[cfg(feature = "kubernetes-discovery")]
 use crate::kubernetes::*;
 use crate::layout::*;
+use crate::replication_mode::*;
 use crate::ring::*;
 use crate::rpc_helper::*;
 
@@ -102,6 +103,7 @@ pub struct System {
 	#[cfg(feature = "kubernetes-discovery")]
 	kubernetes_discovery: Option<KubernetesDiscoveryConfig>,
 
+	replication_mode: ReplicationMode,
 	replication_factor: usize,
 
 	/// The ring
@@ -136,6 +138,37 @@ pub struct KnownNodeInfo {
 	pub status: NodeStatus,
 }
 
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+pub struct ClusterHealth {
+	/// The current health status of the cluster (see below)
+	pub status: ClusterHealthStatus,
+	/// Number of nodes already seen once in the cluster
+	pub known_nodes: usize,
+	/// Number of nodes currently connected
+	pub connected_nodes: usize,
+	/// Number of storage nodes declared in the current layout
+	pub storage_nodes: usize,
+	/// Number of storage nodes currently connected
+	pub storage_nodes_ok: usize,
+	/// Number of partitions in the layout
+	pub partitions: usize,
+	/// Number of partitions for which we have a quorum of connected nodes
+	pub partitions_quorum: usize,
+	/// Number of partitions for which all storage nodes are connected
+	pub partitions_all_ok: usize,
+}
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+pub enum ClusterHealthStatus {
+	/// All nodes are available
+	Healthy,
+	/// Some storage nodes are unavailable, but quorum is stil
+	/// achieved for all partitions
+	Degraded,
+	/// Quorum is not available for some partitions
+	Unavailable,
+}
+
 pub fn read_node_id(metadata_dir: &Path) -> Result<NodeID, Error> {
 	let mut pubkey_file = metadata_dir.to_path_buf();
 	pubkey_file.push("node_key.pub");
@@ -200,9 +233,11 @@ impl System {
 	pub fn new(
 		network_key: NetworkKey,
 		background: Arc<BackgroundRunner>,
-		replication_factor: usize,
+		replication_mode: ReplicationMode,
 		config: &Config,
 	) -> Result<Arc<Self>, Error> {
+		let replication_factor = replication_mode.replication_factor();
+
 		let node_key =
 			gen_node_key(&config.metadata_dir).expect("Unable to read or generate node ID");
 		info!(
@@ -324,6 +359,7 @@ impl System {
 				config.rpc_timeout_msec.map(Duration::from_millis),
 			),
 			system_endpoint,
+			replication_mode,
 			replication_factor,
 			rpc_listen_addr: config.rpc_bind_addr,
 			#[cfg(any(feature = "consul-discovery", feature = "kubernetes-discovery"))]
@@ -429,6 +465,67 @@ impl System {
 		}
 	}
 
+	pub fn health(&self) -> ClusterHealth {
+		let ring: Arc<_> = self.ring.borrow().clone();
+		let quorum = self.replication_mode.write_quorum();
+		let replication_factor = self.replication_factor;
+
+		let nodes = self
+			.get_known_nodes()
+			.into_iter()
+			.map(|n| (n.id, n))
+			.collect::<HashMap<Uuid, _>>();
+		let connected_nodes = nodes.iter().filter(|(_, n)| n.is_up).count();
+
+		let storage_nodes = ring
+			.layout
+			.roles
+			.items()
+			.iter()
+			.filter(|(_, _, v)| matches!(v, NodeRoleV(Some(r)) if r.capacity.is_some()))
+			.collect::<Vec<_>>();
+		let storage_nodes_ok = storage_nodes
+			.iter()
+			.filter(|(x, _, _)| nodes.get(x).map(|n| n.is_up).unwrap_or(false))
+			.count();
+
+		let partitions = ring.partitions();
+		let partitions_n_up = partitions
+			.iter()
+			.map(|(_, h)| {
+				let pn = ring.get_nodes(h, ring.replication_factor);
+				pn.iter()
+					.filter(|x| nodes.get(x).map(|n| n.is_up).unwrap_or(false))
+					.count()
+			})
+			.collect::<Vec<usize>>();
+		let partitions_all_ok = partitions_n_up
+			.iter()
+			.filter(|c| **c == replication_factor)
+			.count();
+		let partitions_quorum = partitions_n_up.iter().filter(|c| **c >= quorum).count();
+
+		let status =
+			if partitions_quorum == partitions.len() && storage_nodes_ok == storage_nodes.len() {
+				ClusterHealthStatus::Healthy
+			} else if partitions_quorum == partitions.len() {
+				ClusterHealthStatus::Degraded
+			} else {
+				ClusterHealthStatus::Unavailable
+			};
+
+		ClusterHealth {
+			status,
+			known_nodes: nodes.len(),
+			connected_nodes,
+			storage_nodes: storage_nodes.len(),
+			storage_nodes_ok,
+			partitions: partitions.len(),
+			partitions_quorum,
+			partitions_all_ok,
+		}
+	}
+
 	// ---- INTERNALS ----
 
 	#[cfg(feature = "consul-discovery")]
diff --git a/src/table/replication/mod.rs b/src/table/replication/mod.rs
index 19e6772f..dfcb026a 100644
--- a/src/table/replication/mod.rs
+++ b/src/table/replication/mod.rs
@@ -1,10 +1,8 @@
 mod parameters;
 
 mod fullcopy;
-mod mode;
 mod sharded;
 
 pub use fullcopy::TableFullReplication;
-pub use mode::ReplicationMode;
 pub use parameters::*;
 pub use sharded::TableShardedReplication;