From 72d033dcd40a65ccf7f41f51af356ffc20144c30 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 25 Aug 2022 13:59:40 +0200 Subject: Remove garage files at bad location, add basic telemetry --- .../prod/app/telemetry/deploy/telemetry-system.hcl | 49 ++++++ cluster/prod/app/telemetry/deploy/telemetry.hcl | 189 +++++++++++++++++++++ 2 files changed, 238 insertions(+) create mode 100644 cluster/prod/app/telemetry/deploy/telemetry-system.hcl create mode 100644 cluster/prod/app/telemetry/deploy/telemetry.hcl (limited to 'cluster/prod/app/telemetry/deploy') diff --git a/cluster/prod/app/telemetry/deploy/telemetry-system.hcl b/cluster/prod/app/telemetry/deploy/telemetry-system.hcl new file mode 100644 index 0000000..e4bde1a --- /dev/null +++ b/cluster/prod/app/telemetry/deploy/telemetry-system.hcl @@ -0,0 +1,49 @@ +job "telemetry-system" { + datacenters = ["neptune", "orion"] + type = "system" + priority = "100" + + group "collector" { + network { + port "node_exporter" { static = 9100 } + } + + task "node_exporter" { + driver = "docker" + + config { + image = "quay.io/prometheus/node-exporter:v1.1.2" + network_mode = "host" + volumes = [ + "/:/host:ro,rslave" + ] + args = [ "--path.rootfs=/host" ] + } + + resources { + cpu = 50 + memory = 40 + } + + service { + tags = [ "telemetry" ] + port = 9100 + address_mode = "driver" + name = "node-exporter" + check { + type = "http" + path = "/" + port = 9100 + address_mode = "driver" + interval = "60s" + timeout = "5s" + check_restart { + limit = 3 + grace = "90s" + ignore_warnings = false + } + } + } + } + } +} diff --git a/cluster/prod/app/telemetry/deploy/telemetry.hcl b/cluster/prod/app/telemetry/deploy/telemetry.hcl new file mode 100644 index 0000000..e1f1000 --- /dev/null +++ b/cluster/prod/app/telemetry/deploy/telemetry.hcl @@ -0,0 +1,189 @@ +job "telemetry" { + datacenters = ["neptune"] + type = "service" + + group "prometheus" { + count = 1 + + network { + port "prometheus" { + static = 9090 + } + } + + task "prometheus" { + driver = "docker" + config { + image = "prom/prometheus:v2.38.0" + network_mode = "host" + ports = [ "prometheus" ] + volumes = [ + "secrets:/etc/prometheus" + ] + } + + template { + data = file("../config/prometheus.yml") + destination = "secrets/prometheus.yml" + } + + template { + data = "{{ key \"secrets/consul/consul.crt\" }}" + destination = "secrets/consul.crt" + } + + template { + data = "{{ key \"secrets/consul/consul-client.crt\" }}" + destination = "secrets/consul-client.crt" + } + + template { + data = "{{ key \"secrets/consul/consul-client.key\" }}" + destination = "secrets/consul-client.key" + } + + resources { + memory = 500 + cpu = 500 + } + + service { + port = 9090 + address_mode = "driver" + name = "prometheus" + check { + type = "http" + path = "/" + port = 9090 + address_mode = "driver" + interval = "60s" + timeout = "5s" + check_restart { + limit = 3 + grace = "90s" + ignore_warnings = false + } + } + } + } + } + + group "grafana" { + count = 1 + + network { + port "grafana" { + static = 3719 + } + } + + task "restore-db" { + lifecycle { + hook = "prestart" + sidecar = false + } + + driver = "docker" + config { + image = "litestream/litestream:0.3.7" + args = [ + "restore", "-config", "/etc/litestream.yml", "/ephemeral/grafana.db" + ] + volumes = [ + "../alloc/data:/ephemeral", + "secrets/litestream.yml:/etc/litestream.yml" + ] + } + user = "472" + + template { + data = file("../config/grafana-litestream.yml") + destination = "secrets/litestream.yml" + } + + resources { + memory = 200 + cpu = 1000 + } + } + + task "grafana" { + driver = "docker" + config { + image = "grafana/grafana:8.4.3" + network_mode = "host" + ports = [ "grafana" ] + volumes = [ + "../alloc/data:/var/lib/grafana", + "secrets/prometheus.yaml:/etc/grafana/provisioning/datasources/prometheus.yaml" + ] + } + + template { + data = file("../config/grafana-datasource-prometheus.yaml") + destination = "secrets/prometheus.yaml" + } + + template { + data = <