aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock81
-rw-r--r--Cargo.toml3
-rwxr-xr-xscript/dev-cluster.sh3
-rw-r--r--src/admin/Cargo.toml28
-rw-r--r--src/admin/lib.rs6
-rw-r--r--src/admin/metrics.rs141
-rw-r--r--src/garage/Cargo.toml1
-rw-r--r--src/garage/server.rs13
-rw-r--r--src/util/config.rs10
9 files changed, 284 insertions, 2 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 09097857..de1035bd 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -404,6 +404,16 @@ dependencies = [
]
[[package]]
+name = "crossbeam-channel"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e54ea8bc3fb1ee042f5aace6e3c6e025d3874866da222930f70ce62aceba0bfa"
+dependencies = [
+ "cfg-if",
+ "crossbeam-utils",
+]
+
+[[package]]
name = "crossbeam-epoch"
version = "0.9.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -766,6 +776,7 @@ dependencies = [
"bytes 1.1.0",
"futures",
"futures-util",
+ "garage_admin",
"garage_api",
"garage_model 0.6.0",
"garage_rpc 0.6.0",
@@ -792,6 +803,23 @@ dependencies = [
]
[[package]]
+name = "garage_admin"
+version = "0.6.0"
+dependencies = [
+ "futures",
+ "futures-util",
+ "garage_model 0.6.0",
+ "garage_util 0.6.0",
+ "http",
+ "hyper",
+ "lazy_static",
+ "log",
+ "opentelemetry",
+ "opentelemetry-prometheus",
+ "prometheus",
+]
+
+[[package]]
name = "garage_api"
version = "0.6.0"
dependencies = [
@@ -1825,6 +1853,38 @@ dependencies = [
]
[[package]]
+name = "opentelemetry"
+version = "0.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6105e89802af13fdf48c49d7646d3b533a70e536d818aae7e78ba0433d01acb8"
+dependencies = [
+ "async-trait",
+ "crossbeam-channel",
+ "dashmap",
+ "fnv",
+ "futures-channel",
+ "futures-executor",
+ "futures-util",
+ "js-sys",
+ "lazy_static",
+ "percent-encoding",
+ "pin-project 1.0.10",
+ "rand",
+ "thiserror",
+]
+
+[[package]]
+name = "opentelemetry-prometheus"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9328977e479cebe12ce0d3fcecdaea4721d234895a9440c5b5dfd113f0594ac6"
+dependencies = [
+ "opentelemetry",
+ "prometheus",
+ "protobuf",
+]
+
+[[package]]
name = "ordered-float"
version = "2.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2077,6 +2137,27 @@ dependencies = [
]
[[package]]
+name = "prometheus"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7f64969ffd5dd8f39bd57a68ac53c163a095ed9d0fb707146da1b27025a3504"
+dependencies = [
+ "cfg-if",
+ "fnv",
+ "lazy_static",
+ "memchr",
+ "parking_lot",
+ "protobuf",
+ "thiserror",
+]
+
+[[package]]
+name = "protobuf"
+version = "2.27.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf7e6d18738ecd0902d30d1ad232c9125985a3422929b16c65517b38adc14f96"
+
+[[package]]
name = "quick-error"
version = "1.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index 739e698e..88c8ad7d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,9 +4,10 @@ members = [
"src/rpc",
"src/table",
"src/model",
+ "src/admin",
"src/api",
"src/web",
- "src/garage",
+ "src/garage"
]
[profile.dev]
diff --git a/script/dev-cluster.sh b/script/dev-cluster.sh
index c1ffb355..5cc003ef 100755
--- a/script/dev-cluster.sh
+++ b/script/dev-cluster.sh
@@ -44,6 +44,9 @@ root_domain = ".s3.garage.localhost"
bind_addr = "0.0.0.0:$((3920+$count))"
root_domain = ".web.garage.localhost"
index = "index.html"
+
+[admin_api]
+bind_addr = "0.0.0.0:$((9900+$count))"
EOF
echo -en "$LABEL configuration written to $CONF_PATH\n"
diff --git a/src/admin/Cargo.toml b/src/admin/Cargo.toml
new file mode 100644
index 00000000..9775b667
--- /dev/null
+++ b/src/admin/Cargo.toml
@@ -0,0 +1,28 @@
+[package]
+name = "garage_admin"
+version = "0.6.0"
+authors = ["Maximilien Richer <code@mricher.fr>"]
+edition = "2018"
+license = "AGPL-3.0"
+description = "Administration and metrics REST HTTP server for Garage"
+repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
+
+[lib]
+path = "lib.rs"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+garage_model = { version = "0.6.0", path = "../model" }
+garage_util = { version = "0.6.0", path = "../util" }
+
+futures = "0.3"
+futures-util = "0.3"
+http = "0.2"
+hyper = "0.14"
+log = "0.4"
+
+opentelemetry = "0.17"
+opentelemetry-prometheus = "0.10"
+prometheus = "0.13"
+lazy_static = "1.4"
diff --git a/src/admin/lib.rs b/src/admin/lib.rs
new file mode 100644
index 00000000..443361be
--- /dev/null
+++ b/src/admin/lib.rs
@@ -0,0 +1,6 @@
+//! Crate for handling the admin and metric HTTP APIs
+#[macro_use]
+extern crate log;
+extern crate lazy_static;
+
+pub mod metrics;
diff --git a/src/admin/metrics.rs b/src/admin/metrics.rs
new file mode 100644
index 00000000..547ee4c8
--- /dev/null
+++ b/src/admin/metrics.rs
@@ -0,0 +1,141 @@
+use hyper::{
+ header::CONTENT_TYPE,
+ service::{make_service_fn, service_fn},
+ Body, Method, Request, Response, Server,
+};
+use lazy_static::lazy_static;
+use opentelemetry::{
+ global,
+ metrics::{BoundCounter, BoundValueRecorder},
+ KeyValue,
+};
+use opentelemetry_prometheus::PrometheusExporter;
+use prometheus::{Encoder, TextEncoder};
+use std::convert::Infallible;
+use std::sync::Arc;
+use std::time::SystemTime;
+
+use futures::future::*;
+use garage_model::garage::Garage;
+use garage_util::error::Error as GarageError;
+
+lazy_static! {
+ // This defines the differennt tags that will be referenced by the object
+ static ref HANDLER_ALL: [KeyValue; 1] = [KeyValue::new("handler", "all")];
+}
+
+// serve_req on metric endpoint
+async fn serve_req(
+ req: Request<Body>,
+ admin_server: Arc<AdminServer>,
+) -> Result<Response<Body>, hyper::Error> {
+ println!("Receiving request at path {}", req.uri());
+ let request_start = SystemTime::now();
+
+ admin_server.metrics.http_counter.add(1);
+
+ let response = match (req.method(), req.uri().path()) {
+ (&Method::GET, "/metrics") => {
+ let mut buffer = vec![];
+ let encoder = TextEncoder::new();
+ let metric_families = admin_server.exporter.registry().gather();
+ encoder.encode(&metric_families, &mut buffer).unwrap();
+ admin_server
+ .metrics
+ .http_body_gauge
+ .record(buffer.len() as u64);
+
+ Response::builder()
+ .status(200)
+ .header(CONTENT_TYPE, encoder.format_type())
+ .body(Body::from(buffer))
+ .unwrap()
+ }
+ _ => Response::builder()
+ .status(404)
+ .body(Body::from("Not implemented"))
+ .unwrap(),
+ };
+
+ admin_server
+ .metrics
+ .http_req_histogram
+ .record(request_start.elapsed().map_or(0.0, |d| d.as_secs_f64()));
+ Ok(response)
+}
+
+// AdminServer hold the admin server internal admin_server and the metric exporter
+pub struct AdminServer {
+ exporter: PrometheusExporter,
+ metrics: AdminServerMetrics,
+}
+
+// GarageMetricadmin_server holds the metrics counter definition for Garage
+// FIXME: we would rather have that split up among the different libraries?
+struct AdminServerMetrics {
+ http_counter: BoundCounter<u64>,
+ http_body_gauge: BoundValueRecorder<u64>,
+ http_req_histogram: BoundValueRecorder<f64>,
+ bucket_v2_merkle_updater_todo_queue_length: BoundValueRecorder<f64>,
+}
+
+impl AdminServer {
+ /// init initilialize the AdminServer and background metric server
+ pub fn init() -> AdminServer {
+ let exporter = opentelemetry_prometheus::exporter().init();
+ let meter = global::meter("garage/admin_server");
+ AdminServer {
+ exporter,
+ metrics: AdminServerMetrics {
+ http_counter: meter
+ .u64_counter("router.http_requests_total")
+ .with_description("Total number of HTTP requests made.")
+ .init()
+ .bind(HANDLER_ALL.as_ref()),
+ http_body_gauge: meter
+ .u64_value_recorder("example.http_response_size_bytes")
+ .with_description("The metrics HTTP response sizes in bytes.")
+ .init()
+ .bind(HANDLER_ALL.as_ref()),
+ http_req_histogram: meter
+ .f64_value_recorder("example.http_request_duration_seconds")
+ .with_description("The HTTP request latencies in seconds.")
+ .init()
+ .bind(HANDLER_ALL.as_ref()),
+ bucket_v2_merkle_updater_todo_queue_length: meter
+ .f64_value_recorder("bucket_v2.merkle_updater.todo_queue_length")
+ .with_description("Bucket merkle updater TODO queue length.")
+ .init()
+ .bind(HANDLER_ALL.as_ref()),
+ },
+ }
+ }
+ /// run execute the admin server on the designated HTTP port and listen for requests
+ pub async fn run(
+ self,
+ garage: Arc<Garage>,
+ shutdown_signal: impl Future<Output = ()>,
+ ) -> Result<(), GarageError> {
+ let admin_server = Arc::new(self);
+ // For every connection, we must make a `Service` to handle all
+ // incoming HTTP requests on said connection.
+ let make_svc = make_service_fn(move |_conn| {
+ let admin_server = admin_server.clone();
+ // This is the `Service` that will handle the connection.
+ // `service_fn` is a helper to convert a function that
+ // returns a Response into a `Service`.
+ async move {
+ Ok::<_, Infallible>(service_fn(move |req| serve_req(req, admin_server.clone())))
+ }
+ });
+
+ let addr = &garage.config.admin_api.bind_addr;
+
+ let server = Server::bind(&addr).serve(make_svc);
+ let graceful = server.with_graceful_shutdown(shutdown_signal);
+ info!("Admin server listening on http://{}", addr);
+
+ graceful.await?;
+ Ok(())
+ }
+}
diff --git a/src/garage/Cargo.toml b/src/garage/Cargo.toml
index 463f83e7..22e0f0f0 100644
--- a/src/garage/Cargo.toml
+++ b/src/garage/Cargo.toml
@@ -27,6 +27,7 @@ garage_rpc = { version = "0.6.0", path = "../rpc" }
garage_table = { version = "0.6.0", path = "../table" }
garage_util = { version = "0.6.0", path = "../util" }
garage_web = { version = "0.6.0", path = "../web" }
+garage_admin = { version = "0.6.0", path = "../admin" }
bytes = "1.0"
git-version = "0.3.4"
diff --git a/src/garage/server.rs b/src/garage/server.rs
index f4d62e91..923df1cd 100644
--- a/src/garage/server.rs
+++ b/src/garage/server.rs
@@ -6,6 +6,7 @@ use garage_util::background::*;
use garage_util::config::*;
use garage_util::error::Error;
+use garage_admin::metrics::*;
use garage_api::run_api_server;
use garage_model::garage::Garage;
use garage_web::run_web_server;
@@ -34,6 +35,9 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
.open()
.expect("Unable to open sled DB");
+ info!("Configure and run admin web server...");
+ let admin_server_init = AdminServer::init();
+
info!("Initializing background runner...");
let watch_cancel = netapp::util::watch_ctrl_c();
let (background, await_background_done) = BackgroundRunner::new(16, watch_cancel.clone());
@@ -43,7 +47,7 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
let run_system = tokio::spawn(garage.system.clone().run(watch_cancel.clone()));
- info!("Crate admin RPC handler...");
+ info!("Create admin RPC handler...");
AdminRpcHandler::new(garage.clone());
info!("Initializing API server...");
@@ -58,6 +62,10 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
wait_from(watch_cancel.clone()),
));
+ info!("Configure and run admin web server...");
+ let admin_server =
+ tokio::spawn(admin_server_init.run(garage.clone(), wait_from(watch_cancel.clone())));
+
// Stuff runs
// When a cancel signal is sent, stuff stops
@@ -67,6 +75,9 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
if let Err(e) = web_server.await? {
warn!("Web server exited with error: {}", e);
}
+ if let Err(e) = admin_server.await? {
+ warn!("Admin web server exited with error: {}", e);
+ }
// Remove RPC handlers for system to break reference cycles
garage.system.netapp.drop_all_handlers();
diff --git a/src/util/config.rs b/src/util/config.rs
index 19c75478..2d15748f 100644
--- a/src/util/config.rs
+++ b/src/util/config.rs
@@ -73,6 +73,9 @@ pub struct Config {
/// Configuration for serving files as normal web server
pub s3_web: WebConfig,
+
+ /// Configuration for the admin API endpoint
+ pub admin_api: AdminConfig,
}
/// Configuration for S3 api
@@ -96,6 +99,13 @@ pub struct WebConfig {
pub root_domain: String,
}
+/// Configuration for the admin and monitoring HTTP API
+#[derive(Deserialize, Debug, Clone)]
+pub struct AdminConfig {
+ /// Address and port to bind for admin API serving
+ pub bind_addr: SocketAddr,
+}
+
fn default_sled_cache_capacity() -> u64 {
128 * 1024 * 1024
}