diff options
author | Alex Auvolat <alex@adnab.me> | 2022-08-25 13:59:40 +0200 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2022-08-25 13:59:40 +0200 |
commit | 72d033dcd40a65ccf7f41f51af356ffc20144c30 (patch) | |
tree | 2b1247bcae11a1f993590bc9db8bf08a53564350 /cluster/prod/app/telemetry/deploy | |
parent | fd3ed44dad783c15f2793788f9384d48760666a3 (diff) | |
download | nixcfg-72d033dcd40a65ccf7f41f51af356ffc20144c30.tar.gz nixcfg-72d033dcd40a65ccf7f41f51af356ffc20144c30.zip |
Remove garage files at bad location, add basic telemetry
Diffstat (limited to 'cluster/prod/app/telemetry/deploy')
-rw-r--r-- | cluster/prod/app/telemetry/deploy/telemetry-system.hcl | 49 | ||||
-rw-r--r-- | cluster/prod/app/telemetry/deploy/telemetry.hcl | 189 |
2 files changed, 238 insertions, 0 deletions
diff --git a/cluster/prod/app/telemetry/deploy/telemetry-system.hcl b/cluster/prod/app/telemetry/deploy/telemetry-system.hcl new file mode 100644 index 0000000..e4bde1a --- /dev/null +++ b/cluster/prod/app/telemetry/deploy/telemetry-system.hcl @@ -0,0 +1,49 @@ +job "telemetry-system" { + datacenters = ["neptune", "orion"] + type = "system" + priority = "100" + + group "collector" { + network { + port "node_exporter" { static = 9100 } + } + + task "node_exporter" { + driver = "docker" + + config { + image = "quay.io/prometheus/node-exporter:v1.1.2" + network_mode = "host" + volumes = [ + "/:/host:ro,rslave" + ] + args = [ "--path.rootfs=/host" ] + } + + resources { + cpu = 50 + memory = 40 + } + + service { + tags = [ "telemetry" ] + port = 9100 + address_mode = "driver" + name = "node-exporter" + check { + type = "http" + path = "/" + port = 9100 + address_mode = "driver" + interval = "60s" + timeout = "5s" + check_restart { + limit = 3 + grace = "90s" + ignore_warnings = false + } + } + } + } + } +} diff --git a/cluster/prod/app/telemetry/deploy/telemetry.hcl b/cluster/prod/app/telemetry/deploy/telemetry.hcl new file mode 100644 index 0000000..e1f1000 --- /dev/null +++ b/cluster/prod/app/telemetry/deploy/telemetry.hcl @@ -0,0 +1,189 @@ +job "telemetry" { + datacenters = ["neptune"] + type = "service" + + group "prometheus" { + count = 1 + + network { + port "prometheus" { + static = 9090 + } + } + + task "prometheus" { + driver = "docker" + config { + image = "prom/prometheus:v2.38.0" + network_mode = "host" + ports = [ "prometheus" ] + volumes = [ + "secrets:/etc/prometheus" + ] + } + + template { + data = file("../config/prometheus.yml") + destination = "secrets/prometheus.yml" + } + + template { + data = "{{ key \"secrets/consul/consul.crt\" }}" + destination = "secrets/consul.crt" + } + + template { + data = "{{ key \"secrets/consul/consul-client.crt\" }}" + destination = "secrets/consul-client.crt" + } + + template { + data = "{{ key \"secrets/consul/consul-client.key\" }}" + destination = "secrets/consul-client.key" + } + + resources { + memory = 500 + cpu = 500 + } + + service { + port = 9090 + address_mode = "driver" + name = "prometheus" + check { + type = "http" + path = "/" + port = 9090 + address_mode = "driver" + interval = "60s" + timeout = "5s" + check_restart { + limit = 3 + grace = "90s" + ignore_warnings = false + } + } + } + } + } + + group "grafana" { + count = 1 + + network { + port "grafana" { + static = 3719 + } + } + + task "restore-db" { + lifecycle { + hook = "prestart" + sidecar = false + } + + driver = "docker" + config { + image = "litestream/litestream:0.3.7" + args = [ + "restore", "-config", "/etc/litestream.yml", "/ephemeral/grafana.db" + ] + volumes = [ + "../alloc/data:/ephemeral", + "secrets/litestream.yml:/etc/litestream.yml" + ] + } + user = "472" + + template { + data = file("../config/grafana-litestream.yml") + destination = "secrets/litestream.yml" + } + + resources { + memory = 200 + cpu = 1000 + } + } + + task "grafana" { + driver = "docker" + config { + image = "grafana/grafana:8.4.3" + network_mode = "host" + ports = [ "grafana" ] + volumes = [ + "../alloc/data:/var/lib/grafana", + "secrets/prometheus.yaml:/etc/grafana/provisioning/datasources/prometheus.yaml" + ] + } + + template { + data = file("../config/grafana-datasource-prometheus.yaml") + destination = "secrets/prometheus.yaml" + } + + template { + data = <<EOH +GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel,grafana-worldmap-panel,grafana-polystat-panel +GF_SERVER_HTTP_PORT=3719 +EOH + destination = "secrets/env" + env = true + } + + resources { + memory = 500 + cpu = 100 + } + + service { + tags = [ + "grafana", + "tricot grafana-new.deuxfleurs.fr", + ] + port = 3719 + address_mode = "driver" + name = "grafana" + check { + type = "tcp" + port = 3719 + address_mode = "driver" + interval = "60s" + timeout = "5s" + check_restart { + limit = 3 + grace = "90s" + ignore_warnings = false + } + } + } + } + + task "replicate-db" { + driver = "docker" + config { + image = "litestream/litestream:0.3.7" + args = [ + "replicate", "-config", "/etc/litestream.yml" + ] + volumes = [ + "../alloc/data:/ephemeral", + "secrets/litestream.yml:/etc/litestream.yml" + ] + } + user = "472" + + template { + data = file("../config/grafana-litestream.yml") + destination = "secrets/litestream.yml" + } + + resources { + memory = 200 + cpu = 100 + } + } + } +} |