diff options
author | mricher <maximilien.richer@gmail.com> | 2021-09-28 08:57:20 +0200 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2022-03-14 10:51:12 +0100 |
commit | e349af13a7268d567c1bacc819af5b89c2d4231f (patch) | |
tree | b50fc78efac150bbf3c005c33d39a874c5e37b41 /src | |
parent | 9d44127245990cc55dbdff5a4bd0a1524348f110 (diff) | |
download | garage-e349af13a7268d567c1bacc819af5b89c2d4231f.tar.gz garage-e349af13a7268d567c1bacc819af5b89c2d4231f.zip |
Update dependencies and add admin module with metrics
- Global dependencies updated in Cargo.lock
- New module created in src/admin to host:
- the (future) admin REST API
- the metric collection
- add configuration block
No metrics implemented yet
Diffstat (limited to 'src')
-rw-r--r-- | src/admin/Cargo.toml | 28 | ||||
-rw-r--r-- | src/admin/lib.rs | 6 | ||||
-rw-r--r-- | src/admin/metrics.rs | 141 | ||||
-rw-r--r-- | src/garage/Cargo.toml | 1 | ||||
-rw-r--r-- | src/garage/server.rs | 13 | ||||
-rw-r--r-- | src/util/config.rs | 10 |
6 files changed, 198 insertions, 1 deletions
diff --git a/src/admin/Cargo.toml b/src/admin/Cargo.toml new file mode 100644 index 00000000..9775b667 --- /dev/null +++ b/src/admin/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "garage_admin" +version = "0.6.0" +authors = ["Maximilien Richer <code@mricher.fr>"] +edition = "2018" +license = "AGPL-3.0" +description = "Administration and metrics REST HTTP server for Garage" +repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage" + +[lib] +path = "lib.rs" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +garage_model = { version = "0.6.0", path = "../model" } +garage_util = { version = "0.6.0", path = "../util" } + +futures = "0.3" +futures-util = "0.3" +http = "0.2" +hyper = "0.14" +log = "0.4" + +opentelemetry = "0.17" +opentelemetry-prometheus = "0.10" +prometheus = "0.13" +lazy_static = "1.4" diff --git a/src/admin/lib.rs b/src/admin/lib.rs new file mode 100644 index 00000000..443361be --- /dev/null +++ b/src/admin/lib.rs @@ -0,0 +1,6 @@ +//! Crate for handling the admin and metric HTTP APIs +#[macro_use] +extern crate log; +extern crate lazy_static; + +pub mod metrics; diff --git a/src/admin/metrics.rs b/src/admin/metrics.rs new file mode 100644 index 00000000..547ee4c8 --- /dev/null +++ b/src/admin/metrics.rs @@ -0,0 +1,141 @@ +use hyper::{ + header::CONTENT_TYPE, + service::{make_service_fn, service_fn}, + Body, Method, Request, Response, Server, +}; +use lazy_static::lazy_static; +use opentelemetry::{ + global, + metrics::{BoundCounter, BoundValueRecorder}, + KeyValue, +}; +use opentelemetry_prometheus::PrometheusExporter; +use prometheus::{Encoder, TextEncoder}; +use std::convert::Infallible; +use std::sync::Arc; +use std::time::SystemTime; + +use futures::future::*; +use garage_model::garage::Garage; +use garage_util::error::Error as GarageError; + +lazy_static! { + // This defines the differennt tags that will be referenced by the object + static ref HANDLER_ALL: [KeyValue; 1] = [KeyValue::new("handler", "all")]; +} + +// serve_req on metric endpoint +async fn serve_req( + req: Request<Body>, + admin_server: Arc<AdminServer>, +) -> Result<Response<Body>, hyper::Error> { + println!("Receiving request at path {}", req.uri()); + let request_start = SystemTime::now(); + + admin_server.metrics.http_counter.add(1); + + let response = match (req.method(), req.uri().path()) { + (&Method::GET, "/metrics") => { + let mut buffer = vec![]; + let encoder = TextEncoder::new(); + let metric_families = admin_server.exporter.registry().gather(); + encoder.encode(&metric_families, &mut buffer).unwrap(); + admin_server + .metrics + .http_body_gauge + .record(buffer.len() as u64); + + Response::builder() + .status(200) + .header(CONTENT_TYPE, encoder.format_type()) + .body(Body::from(buffer)) + .unwrap() + } + _ => Response::builder() + .status(404) + .body(Body::from("Not implemented")) + .unwrap(), + }; + + admin_server + .metrics + .http_req_histogram + .record(request_start.elapsed().map_or(0.0, |d| d.as_secs_f64())); + Ok(response) +} + +// AdminServer hold the admin server internal admin_server and the metric exporter +pub struct AdminServer { + exporter: PrometheusExporter, + metrics: AdminServerMetrics, +} + +// GarageMetricadmin_server holds the metrics counter definition for Garage +// FIXME: we would rather have that split up among the different libraries? +struct AdminServerMetrics { + http_counter: BoundCounter<u64>, + http_body_gauge: BoundValueRecorder<u64>, + http_req_histogram: BoundValueRecorder<f64>, + bucket_v2_merkle_updater_todo_queue_length: BoundValueRecorder<f64>, +} + +impl AdminServer { + /// init initilialize the AdminServer and background metric server + pub fn init() -> AdminServer { + let exporter = opentelemetry_prometheus::exporter().init(); + let meter = global::meter("garage/admin_server"); + AdminServer { + exporter, + metrics: AdminServerMetrics { + http_counter: meter + .u64_counter("router.http_requests_total") + .with_description("Total number of HTTP requests made.") + .init() + .bind(HANDLER_ALL.as_ref()), + http_body_gauge: meter + .u64_value_recorder("example.http_response_size_bytes") + .with_description("The metrics HTTP response sizes in bytes.") + .init() + .bind(HANDLER_ALL.as_ref()), + http_req_histogram: meter + .f64_value_recorder("example.http_request_duration_seconds") + .with_description("The HTTP request latencies in seconds.") + .init() + .bind(HANDLER_ALL.as_ref()), + bucket_v2_merkle_updater_todo_queue_length: meter + .f64_value_recorder("bucket_v2.merkle_updater.todo_queue_length") + .with_description("Bucket merkle updater TODO queue length.") + .init() + .bind(HANDLER_ALL.as_ref()), + }, + } + } + /// run execute the admin server on the designated HTTP port and listen for requests + pub async fn run( + self, + garage: Arc<Garage>, + shutdown_signal: impl Future<Output = ()>, + ) -> Result<(), GarageError> { + let admin_server = Arc::new(self); + // For every connection, we must make a `Service` to handle all + // incoming HTTP requests on said connection. + let make_svc = make_service_fn(move |_conn| { + let admin_server = admin_server.clone(); + // This is the `Service` that will handle the connection. + // `service_fn` is a helper to convert a function that + // returns a Response into a `Service`. + async move { + Ok::<_, Infallible>(service_fn(move |req| serve_req(req, admin_server.clone()))) + } + }); + + let addr = &garage.config.admin_api.bind_addr; + + let server = Server::bind(&addr).serve(make_svc); + let graceful = server.with_graceful_shutdown(shutdown_signal); + info!("Admin server listening on http://{}", addr); + + graceful.await?; + Ok(()) + } +} diff --git a/src/garage/Cargo.toml b/src/garage/Cargo.toml index 463f83e7..22e0f0f0 100644 --- a/src/garage/Cargo.toml +++ b/src/garage/Cargo.toml @@ -27,6 +27,7 @@ garage_rpc = { version = "0.6.0", path = "../rpc" } garage_table = { version = "0.6.0", path = "../table" } garage_util = { version = "0.6.0", path = "../util" } garage_web = { version = "0.6.0", path = "../web" } +garage_admin = { version = "0.6.0", path = "../admin" } bytes = "1.0" git-version = "0.3.4" diff --git a/src/garage/server.rs b/src/garage/server.rs index f4d62e91..923df1cd 100644 --- a/src/garage/server.rs +++ b/src/garage/server.rs @@ -6,6 +6,7 @@ use garage_util::background::*; use garage_util::config::*; use garage_util::error::Error; +use garage_admin::metrics::*; use garage_api::run_api_server; use garage_model::garage::Garage; use garage_web::run_web_server; @@ -34,6 +35,9 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { .open() .expect("Unable to open sled DB"); + info!("Configure and run admin web server..."); + let admin_server_init = AdminServer::init(); + info!("Initializing background runner..."); let watch_cancel = netapp::util::watch_ctrl_c(); let (background, await_background_done) = BackgroundRunner::new(16, watch_cancel.clone()); @@ -43,7 +47,7 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { let run_system = tokio::spawn(garage.system.clone().run(watch_cancel.clone())); - info!("Crate admin RPC handler..."); + info!("Create admin RPC handler..."); AdminRpcHandler::new(garage.clone()); info!("Initializing API server..."); @@ -58,6 +62,10 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { wait_from(watch_cancel.clone()), )); + info!("Configure and run admin web server..."); + let admin_server = + tokio::spawn(admin_server_init.run(garage.clone(), wait_from(watch_cancel.clone()))); + // Stuff runs // When a cancel signal is sent, stuff stops @@ -67,6 +75,9 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { if let Err(e) = web_server.await? { warn!("Web server exited with error: {}", e); } + if let Err(e) = admin_server.await? { + warn!("Admin web server exited with error: {}", e); + } // Remove RPC handlers for system to break reference cycles garage.system.netapp.drop_all_handlers(); diff --git a/src/util/config.rs b/src/util/config.rs index 19c75478..2d15748f 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -73,6 +73,9 @@ pub struct Config { /// Configuration for serving files as normal web server pub s3_web: WebConfig, + + /// Configuration for the admin API endpoint + pub admin_api: AdminConfig, } /// Configuration for S3 api @@ -96,6 +99,13 @@ pub struct WebConfig { pub root_domain: String, } +/// Configuration for the admin and monitoring HTTP API +#[derive(Deserialize, Debug, Clone)] +pub struct AdminConfig { + /// Address and port to bind for admin API serving + pub bind_addr: SocketAddr, +} + fn default_sled_cache_capacity() -> u64 { 128 * 1024 * 1024 } |