From e54effec45642942c961a96222dbbfa8d901a8f3 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 19 Feb 2024 11:36:22 +0100 Subject: [doc-fixes] fixes to index of configuration options --- doc/book/reference-manual/configuration.md | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'doc') diff --git a/doc/book/reference-manual/configuration.md b/doc/book/reference-manual/configuration.md index 5e12a7da..02064444 100644 --- a/doc/book/reference-manual/configuration.md +++ b/doc/book/reference-manual/configuration.md @@ -83,7 +83,7 @@ Top-level configuration options: [`block_size`](#block_size), [`bootstrap_peers`](#bootstrap_peers), [`compression_level`](#compression_level), -[`data_dir`](#metadata_dir), +[`data_dir`](#data_dir), [`data_fsync`](#data_fsync), [`db_engine`](#db_engine), [`lmdb_map_size`](#lmdb_map_size), @@ -92,20 +92,19 @@ Top-level configuration options: [`replication_mode`](#replication_mode), [`rpc_bind_addr`](#rpc_bind_addr), [`rpc_public_addr`](#rpc_public_addr), -[`rpc_secret`](#rpc_secret), -[`rpc_secret_file`](#rpc_secret), +[`rpc_secret`/`rpc_secret_file`](#rpc_secret), [`sled_cache_capacity`](#sled_cache_capacity), [`sled_flush_every_ms`](#sled_flush_every_ms). The `[consul_discovery]` section: [`api`](#consul_api), [`ca_cert`](#consul_ca_cert), -[`client_cert`](#consul_client_cert), -[`client_key`](#consul_client_cert), +[`client_cert`](#consul_client_cert_and_key), +[`client_key`](#consul_client_cert_and_key), [`consul_http_addr`](#consul_http_addr), -[`meta`](#consul_tags), +[`meta`](#consul_tags_and_meta), [`service_name`](#consul_service_name), -[`tags`](#consul_tags), +[`tags`](#consul_tags_and_meta), [`tls_skip_verify`](#consul_tls_skip_verify), [`token`](#consul_token). @@ -125,10 +124,8 @@ The `[s3_web]` section: The `[admin]` section: [`api_bind_addr`](#admin_api_bind_addr), -[`metrics_token`](#admin_metrics_token), -[`metrics_token_file`](#admin_metrics_token), -[`admin_token`](#admin_token), -[`admin_token_file`](#admin_token), +[`metrics_token`/`metrics_token_file`](#admin_metrics_token), +[`admin_token`/`admin_token_file`](#admin_token), [`trace_sink`](#admin_trace_sink), @@ -474,7 +471,7 @@ the `/v1/catalog` endpoints, enabling mTLS if `client_cert` and `client_key` are `service_name` should be set to the service name under which Garage's RPC ports are announced. -#### `client_cert`, `client_key` {#consul_client_cert} +#### `client_cert`, `client_key` {#consul_client_cert_and_key} TLS client certificate and client key to use when communicating with Consul over TLS. Both are mandatory when doing so. Only available when `api = "catalog"`. @@ -508,7 +505,7 @@ node_prefix "" { } ``` -#### `tags` and `meta` {#consul_tags} +#### `tags` and `meta` {#consul_tags_and_meta} Additional list of tags and map of service meta to add during service registration. -- cgit v1.2.3 From e73cb79e1edf9324c4880c1c92e4f9c869b8b130 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 19 Feb 2024 11:42:06 +0100 Subject: [doc-fixes] configuration reference: fix typo and set block size in M --- doc/book/reference-manual/configuration.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'doc') diff --git a/doc/book/reference-manual/configuration.md b/doc/book/reference-manual/configuration.md index 02064444..27d9fd54 100644 --- a/doc/book/reference-manual/configuration.md +++ b/doc/book/reference-manual/configuration.md @@ -17,7 +17,7 @@ data_fsync = false db_engine = "lmdb" -block_size = 1048576 +block_size = "1M" sled_cache_capacity = "128MiB" sled_flush_every_ms = 2000 @@ -31,7 +31,7 @@ rpc_public_addr = "[fc00:1::1]:3901" bootstrap_peers = [ "563e1ac825ee3323aa441e72c26d1030d6d4414aeb3dd25287c531e7fc2bc95d@[fc00:1::1]:3901", - "86f0f26ae4afbd59aaf9cfb059eefac844951efd5b8caeec0d53f4ed6c85f332[fc00:1::2]:3901", + "86f0f26ae4afbd59aaf9cfb059eefac844951efd5b8caeec0d53f4ed6c85f332@[fc00:1::2]:3901", "681456ab91350f92242e80a531a3ec9392cb7c974f72640112f90a600d7921a4@[fc00:B::1]:3901", "212fd62eeaca72c122b45a7f4fa0f55e012aa5e24ac384a72a3016413fa724ff@[fc00:F::1]:3901", ] -- cgit v1.2.3 From eab54b37981073651712a15334b60b2c409f2576 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 19 Feb 2024 11:31:09 +0100 Subject: [networking-fixes] add doc for rpc_bind_outgoing --- doc/book/reference-manual/configuration.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'doc') diff --git a/doc/book/reference-manual/configuration.md b/doc/book/reference-manual/configuration.md index 27d9fd54..f1474613 100644 --- a/doc/book/reference-manual/configuration.md +++ b/doc/book/reference-manual/configuration.md @@ -27,6 +27,7 @@ compression_level = 1 rpc_secret = "4425f5c26c5e11581d3223904324dcb5b5d5dfb14e5e7f35e38c595424f5f1e6" rpc_bind_addr = "[::]:3901" +rpc_bind_outgoing = false rpc_public_addr = "[fc00:1::1]:3901" bootstrap_peers = [ @@ -91,6 +92,7 @@ Top-level configuration options: [`metadata_fsync`](#metadata_fsync), [`replication_mode`](#replication_mode), [`rpc_bind_addr`](#rpc_bind_addr), +[`rpc_bind_outgoing`](#rpc_bind_outgoing), [`rpc_public_addr`](#rpc_public_addr), [`rpc_secret`/`rpc_secret_file`](#rpc_secret), [`sled_cache_capacity`](#sled_cache_capacity), @@ -415,6 +417,17 @@ the node, even in the case of a NAT: the NAT should be configured to forward the port number to the same internal port nubmer. This means that if you have several nodes running behind a NAT, they should each use a different RPC port number. +#### `rpc_bind_outgoing` {#rpc_bind_outgoing} (since v0.9.2) + +If enabled, pre-bind all sockets for outgoing connections to the same IP address +used for listening (the IP address specified in `rpc_bind_addr`) before +trying to connect to remote nodes. +This can be necessary if a node has multiple IP addresses, +but only one is allowed or able to reach the other nodes, +for instance due to firewall rules or specific routing configuration. + +Disabled by default. + #### `rpc_public_addr` {#rpc_public_addr} The address and port that other nodes need to use to contact this node for -- cgit v1.2.3 From bcd571ef57662a1dc86ef5bb0c055d3f8bd9e3bc Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 20 Feb 2024 14:59:04 +0100 Subject: [peer-metrics] add documentation for new cluster status metrics --- doc/book/reference-manual/monitoring.md | 106 ++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) (limited to 'doc') diff --git a/doc/book/reference-manual/monitoring.md b/doc/book/reference-manual/monitoring.md index 97c533d3..f392c133 100644 --- a/doc/book/reference-manual/monitoring.md +++ b/doc/book/reference-manual/monitoring.md @@ -27,6 +27,112 @@ Exposes the Garage replication factor configured on the node garage_replication_factor 3 ``` +#### `garage_local_disk_avail` and `garage_local_disk_total` (gauge) + +Reports the available and total disk space on each node, for data and metadata separately. + +``` +garage_local_disk_avail{volume="data"} 540341960704 +garage_local_disk_avail{volume="metadata"} 540341960704 +garage_local_disk_total{volume="data"} 763063566336 +garage_local_disk_total{volume="metadata"} 763063566336 +``` + +### Cluster health status metrics + +#### `cluster_healthy` (gauge) + +Whether all storage nodes are connected (0 or 1) + +``` +cluster_healthy 0 +``` + +#### `cluster_available` (gauge) + +Whether all requests can be served, even if some storage nodes are disconnected + +``` +cluster_available 1 +``` + +#### `cluster_connected_nodes` (gauge) + +Number of nodes currently connected + +``` +cluster_connected_nodes 3 +``` + +#### `cluster_known_nodes` (gauge) + +Number of nodes already seen once in the cluster + +``` +cluster_known_nodes 3 +``` + +#### `cluster_layout_node_connected` (gauge) + +Connection status for individual nodes of the cluster layout + +``` +cluster_layout_node_connected{id="62b218d848e86a64",role_capacity="1000000000",role_gateway="0",role_zone="dc1"} 1 +cluster_layout_node_connected{id="a11c7cf18af29737",role_capacity="1000000000",role_gateway="0",role_zone="dc1"} 0 +cluster_layout_node_connected{id="a235ac7695e0c54d",role_capacity="1000000000",role_gateway="0",role_zone="dc1"} 1 +cluster_layout_node_connected{id="b10c110e4e854e5a",role_capacity="1000000000",role_gateway="0",role_zone="dc1"} 1 +``` + +#### `cluster_layout_node_disconnected_time` (gauge) + +Time (in seconds) since last connection to individual nodes of the cluster layout + +``` +cluster_layout_node_disconnected_time{id="62b218d848e86a64",role_capacity="1000000000",role_gateway="0",role_zone="dc1"} 0 +cluster_layout_node_disconnected_time{id="a235ac7695e0c54d",role_capacity="1000000000",role_gateway="0",role_zone="dc1"} 0 +cluster_layout_node_disconnected_time{id="b10c110e4e854e5a",role_capacity="1000000000",role_gateway="0",role_zone="dc1"} 0 +``` + +#### `cluster_storage_nodes` (gauge) + +Number of storage nodes declared in the current layout + +``` +cluster_storage_nodes 4 +``` + +#### `cluster_storage_nodes_ok` (gauge) + +Number of storage nodes currently connected + +``` +cluster_storage_nodes_ok 3 +``` + +#### `cluster_partitions` (gauge) + +Number of partitions in the layout (this is always 256) + +``` +cluster_partitions 256 +``` + +#### `cluster_partitions_all_ok` (gauge) + +Number of partitions for which all storage nodes are connected + +``` +cluster_partitions_all_ok 64 +``` + +#### `cluster_partitions_quorum` (gauge) + +Number of partitions for which we have a quorum of connected nodes and all requests can be served + +``` +cluster_partitions_quorum 256 +``` + ### Metrics of the API endpoints #### `api_admin_request_counter` (counter) -- cgit v1.2.3