aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2022-02-22 15:21:06 +0100
committerAlex Auvolat <alex@adnab.me>2022-03-14 10:53:50 +0100
commitd9a35359bf8903f37f5f7aa77421ed35b626e0af (patch)
tree1b781e5e6de4d850e4938d6192f15c628e2eed27 /src
parent2a5609b292de019085f93a79b7b73f7a8341bf51 (diff)
downloadgarage-d9a35359bf8903f37f5f7aa77421ed35b626e0af.tar.gz
garage-d9a35359bf8903f37f5f7aa77421ed35b626e0af.zip
Add metrics to web endpoint
Diffstat (limited to 'src')
-rw-r--r--src/admin/tracing_setup.rs3
-rw-r--r--src/api/api_server.rs21
-rw-r--r--src/util/metrics.rs9
-rw-r--r--src/web/Cargo.toml2
-rw-r--r--src/web/web_server.rs80
5 files changed, 98 insertions, 17 deletions
diff --git a/src/admin/tracing_setup.rs b/src/admin/tracing_setup.rs
index 83fa5622..c561d568 100644
--- a/src/admin/tracing_setup.rs
+++ b/src/admin/tracing_setup.rs
@@ -1,7 +1,7 @@
use std::time::Duration;
use opentelemetry::sdk::{
- trace::{self, IdGenerator, Sampler},
+ trace::{self, IdGenerator},
Resource,
};
use opentelemetry::KeyValue;
@@ -24,7 +24,6 @@ pub fn init_tracing(export_to: &str, node_id: Uuid) -> Result<(), Error> {
.with_trace_config(
trace::config()
.with_id_generator(IdGenerator::default())
- .with_sampler(Sampler::TraceIdRatioBased(0.01f64))
.with_resource(Resource::new(vec![
KeyValue::new("service.name", "garage"),
KeyValue::new("service.instance.id", node_id),
diff --git a/src/api/api_server.rs b/src/api/api_server.rs
index 8502d9d8..a6bf5a44 100644
--- a/src/api/api_server.rs
+++ b/src/api/api_server.rs
@@ -16,7 +16,7 @@ use opentelemetry::{
use garage_util::data::*;
use garage_util::error::Error as GarageError;
-use garage_util::metrics::RecordDuration;
+use garage_util::metrics::{gen_trace_id, RecordDuration};
use garage_model::garage::Garage;
use garage_model::key_table::Key;
@@ -110,16 +110,12 @@ async fn handler(
debug!("{:?}", req);
let tracer = opentelemetry::global::tracer("garage");
- let trace_id = gen_uuid();
let span = tracer
.span_builder("S3 API call (unknown)")
- .with_trace_id(
- opentelemetry::trace::TraceId::from_hex(&hex::encode(&trace_id.as_slice()[..16]))
- .unwrap(),
- )
+ .with_trace_id(gen_trace_id())
.with_attributes(vec![
KeyValue::new("method", format!("{}", req.method())),
- KeyValue::new("uri", req.uri().path().to_string()),
+ KeyValue::new("uri", req.uri().to_string()),
])
.start(&tracer);
@@ -177,9 +173,14 @@ async fn handler_stage2(
let (endpoint, bucket_name) = Endpoint::from_request(&req, bucket_name.map(ToOwned::to_owned))?;
debug!("Endpoint: {:?}", endpoint);
- Context::current()
- .span()
- .update_name::<String>(format!("S3 API {}", endpoint.name()));
+ let current_context = Context::current();
+ let current_span = current_context.span();
+ current_span.update_name::<String>(format!("S3 API {}", endpoint.name()));
+ current_span.set_attribute(KeyValue::new("endpoint", endpoint.name()));
+ current_span.set_attribute(KeyValue::new(
+ "bucket",
+ bucket_name.clone().unwrap_or_default(),
+ ));
let metrics_tags = &[KeyValue::new("api_endpoint", endpoint.name())];
diff --git a/src/util/metrics.rs b/src/util/metrics.rs
index cd5aa182..1b05eabe 100644
--- a/src/util/metrics.rs
+++ b/src/util/metrics.rs
@@ -1,8 +1,9 @@
use std::time::SystemTime;
use futures::{future::BoxFuture, Future, FutureExt};
+use rand::Rng;
-use opentelemetry::{metrics::*, KeyValue};
+use opentelemetry::{metrics::*, trace::TraceId, KeyValue};
pub trait RecordDuration<'a>: 'a {
type Output;
@@ -48,3 +49,9 @@ where
.boxed()
}
}
+
+// ----
+
+pub fn gen_trace_id() -> TraceId {
+ rand::thread_rng().gen::<[u8; 16]>().into()
+}
diff --git a/src/web/Cargo.toml b/src/web/Cargo.toml
index 269e29a5..81a8c995 100644
--- a/src/web/Cargo.toml
+++ b/src/web/Cargo.toml
@@ -27,3 +27,5 @@ futures = "0.3"
http = "0.2"
hyper = { version = "0.14", features = ["server", "http1", "runtime", "tcp", "stream"] }
+
+opentelemetry = "0.17"
diff --git a/src/web/web_server.rs b/src/web/web_server.rs
index 80d2feb9..c51347a3 100644
--- a/src/web/web_server.rs
+++ b/src/web/web_server.rs
@@ -9,6 +9,13 @@ use hyper::{
Body, Method, Request, Response, Server,
};
+use opentelemetry::{
+ global,
+ metrics::{Counter, ValueRecorder},
+ trace::{FutureExt, TraceContextExt, Tracer},
+ Context, KeyValue,
+};
+
use crate::error::*;
use garage_api::error::{Error as ApiError, OkOrBadRequest, OkOrInternalError};
@@ -20,6 +27,33 @@ use garage_model::garage::Garage;
use garage_table::*;
use garage_util::error::Error as GarageError;
+use garage_util::metrics::{gen_trace_id, RecordDuration};
+
+struct WebMetrics {
+ request_counter: Counter<u64>,
+ error_counter: Counter<u64>,
+ request_duration: ValueRecorder<f64>,
+}
+
+impl WebMetrics {
+ fn new() -> Self {
+ let meter = global::meter("garage/web");
+ Self {
+ request_counter: meter
+ .u64_counter("web.request_counter")
+ .with_description("Number of requests to the web endpoint")
+ .init(),
+ error_counter: meter
+ .u64_counter("web.error_counter")
+ .with_description("Number of requests to the web endpoint resulting in errors")
+ .init(),
+ request_duration: meter
+ .f64_value_recorder("web.request_duration")
+ .with_description("Duration of requests to the web endpoint")
+ .init(),
+ }
+ }
+}
/// Run a web server
pub async fn run_web_server(
@@ -28,13 +62,19 @@ pub async fn run_web_server(
) -> Result<(), GarageError> {
let addr = &garage.config.s3_web.bind_addr;
+ let metrics = Arc::new(WebMetrics::new());
+
let service = make_service_fn(|conn: &AddrStream| {
let garage = garage.clone();
+ let metrics = metrics.clone();
+
let client_addr = conn.remote_addr();
async move {
Ok::<_, Error>(service_fn(move |req: Request<Body>| {
let garage = garage.clone();
- handle_request(garage, req, client_addr)
+ let metrics = metrics.clone();
+
+ handle_request(garage, metrics, req, client_addr)
}))
}
});
@@ -49,23 +89,55 @@ pub async fn run_web_server(
async fn handle_request(
garage: Arc<Garage>,
+ metrics: Arc<WebMetrics>,
req: Request<Body>,
addr: SocketAddr,
) -> Result<Response<Body>, Infallible> {
info!("{} {} {}", addr, req.method(), req.uri());
- match serve_file(garage, &req).await {
+
+ // Lots of instrumentation
+ let tracer = opentelemetry::global::tracer("garage");
+ let span = tracer
+ .span_builder(format!("Web {} request", req.method()))
+ .with_trace_id(gen_trace_id())
+ .with_attributes(vec![
+ KeyValue::new("method", format!("{}", req.method())),
+ KeyValue::new("uri", req.uri().to_string()),
+ ])
+ .start(&tracer);
+
+ let metrics_tags = &[KeyValue::new("method", req.method().to_string())];
+
+ // The actual handler
+ let res = serve_file(garage, &req)
+ .with_context(Context::current_with_span(span))
+ .record_duration(&metrics.request_duration, &metrics_tags[..])
+ .await;
+
+ // More instrumentation
+ metrics.request_counter.add(1, &metrics_tags[..]);
+
+ // Returning the result
+ match res {
Ok(res) => {
- debug!("{} {} {}", req.method(), req.uri(), res.status());
+ debug!("{} {} {}", req.method(), res.status(), req.uri());
Ok(res)
}
Err(error) => {
info!(
"{} {} {} {}",
req.method(),
- req.uri(),
error.http_status_code(),
+ req.uri(),
error
);
+ metrics.error_counter.add(
+ 1,
+ &[
+ metrics_tags[0].clone(),
+ KeyValue::new("status_code", error.http_status_code().to_string()),
+ ],
+ );
Ok(error_to_res(error))
}
}