aboutsummaryrefslogtreecommitdiff
path: root/cluster/prod/app/telemetry/deploy
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2022-08-25 13:59:40 +0200
committerAlex Auvolat <alex@adnab.me>2022-08-25 13:59:40 +0200
commit72d033dcd40a65ccf7f41f51af356ffc20144c30 (patch)
tree2b1247bcae11a1f993590bc9db8bf08a53564350 /cluster/prod/app/telemetry/deploy
parentfd3ed44dad783c15f2793788f9384d48760666a3 (diff)
downloadnixcfg-72d033dcd40a65ccf7f41f51af356ffc20144c30.tar.gz
nixcfg-72d033dcd40a65ccf7f41f51af356ffc20144c30.zip
Remove garage files at bad location, add basic telemetry
Diffstat (limited to 'cluster/prod/app/telemetry/deploy')
-rw-r--r--cluster/prod/app/telemetry/deploy/telemetry-system.hcl49
-rw-r--r--cluster/prod/app/telemetry/deploy/telemetry.hcl189
2 files changed, 238 insertions, 0 deletions
diff --git a/cluster/prod/app/telemetry/deploy/telemetry-system.hcl b/cluster/prod/app/telemetry/deploy/telemetry-system.hcl
new file mode 100644
index 0000000..e4bde1a
--- /dev/null
+++ b/cluster/prod/app/telemetry/deploy/telemetry-system.hcl
@@ -0,0 +1,49 @@
+job "telemetry-system" {
+ datacenters = ["neptune", "orion"]
+ type = "system"
+ priority = "100"
+
+ group "collector" {
+ network {
+ port "node_exporter" { static = 9100 }
+ }
+
+ task "node_exporter" {
+ driver = "docker"
+
+ config {
+ image = "quay.io/prometheus/node-exporter:v1.1.2"
+ network_mode = "host"
+ volumes = [
+ "/:/host:ro,rslave"
+ ]
+ args = [ "--path.rootfs=/host" ]
+ }
+
+ resources {
+ cpu = 50
+ memory = 40
+ }
+
+ service {
+ tags = [ "telemetry" ]
+ port = 9100
+ address_mode = "driver"
+ name = "node-exporter"
+ check {
+ type = "http"
+ path = "/"
+ port = 9100
+ address_mode = "driver"
+ interval = "60s"
+ timeout = "5s"
+ check_restart {
+ limit = 3
+ grace = "90s"
+ ignore_warnings = false
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/cluster/prod/app/telemetry/deploy/telemetry.hcl b/cluster/prod/app/telemetry/deploy/telemetry.hcl
new file mode 100644
index 0000000..e1f1000
--- /dev/null
+++ b/cluster/prod/app/telemetry/deploy/telemetry.hcl
@@ -0,0 +1,189 @@
+job "telemetry" {
+ datacenters = ["neptune"]
+ type = "service"
+
+ group "prometheus" {
+ count = 1
+
+ network {
+ port "prometheus" {
+ static = 9090
+ }
+ }
+
+ task "prometheus" {
+ driver = "docker"
+ config {
+ image = "prom/prometheus:v2.38.0"
+ network_mode = "host"
+ ports = [ "prometheus" ]
+ volumes = [
+ "secrets:/etc/prometheus"
+ ]
+ }
+
+ template {
+ data = file("../config/prometheus.yml")
+ destination = "secrets/prometheus.yml"
+ }
+
+ template {
+ data = "{{ key \"secrets/consul/consul.crt\" }}"
+ destination = "secrets/consul.crt"
+ }
+
+ template {
+ data = "{{ key \"secrets/consul/consul-client.crt\" }}"
+ destination = "secrets/consul-client.crt"
+ }
+
+ template {
+ data = "{{ key \"secrets/consul/consul-client.key\" }}"
+ destination = "secrets/consul-client.key"
+ }
+
+ resources {
+ memory = 500
+ cpu = 500
+ }
+
+ service {
+ port = 9090
+ address_mode = "driver"
+ name = "prometheus"
+ check {
+ type = "http"
+ path = "/"
+ port = 9090
+ address_mode = "driver"
+ interval = "60s"
+ timeout = "5s"
+ check_restart {
+ limit = 3
+ grace = "90s"
+ ignore_warnings = false
+ }
+ }
+ }
+ }
+ }
+
+ group "grafana" {
+ count = 1
+
+ network {
+ port "grafana" {
+ static = 3719
+ }
+ }
+
+ task "restore-db" {
+ lifecycle {
+ hook = "prestart"
+ sidecar = false
+ }
+
+ driver = "docker"
+ config {
+ image = "litestream/litestream:0.3.7"
+ args = [
+ "restore", "-config", "/etc/litestream.yml", "/ephemeral/grafana.db"
+ ]
+ volumes = [
+ "../alloc/data:/ephemeral",
+ "secrets/litestream.yml:/etc/litestream.yml"
+ ]
+ }
+ user = "472"
+
+ template {
+ data = file("../config/grafana-litestream.yml")
+ destination = "secrets/litestream.yml"
+ }
+
+ resources {
+ memory = 200
+ cpu = 1000
+ }
+ }
+
+ task "grafana" {
+ driver = "docker"
+ config {
+ image = "grafana/grafana:8.4.3"
+ network_mode = "host"
+ ports = [ "grafana" ]
+ volumes = [
+ "../alloc/data:/var/lib/grafana",
+ "secrets/prometheus.yaml:/etc/grafana/provisioning/datasources/prometheus.yaml"
+ ]
+ }
+
+ template {
+ data = file("../config/grafana-datasource-prometheus.yaml")
+ destination = "secrets/prometheus.yaml"
+ }
+
+ template {
+ data = <<EOH
+GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel,grafana-worldmap-panel,grafana-polystat-panel
+GF_SERVER_HTTP_PORT=3719
+EOH
+ destination = "secrets/env"
+ env = true
+ }
+
+ resources {
+ memory = 500
+ cpu = 100
+ }
+
+ service {
+ tags = [
+ "grafana",
+ "tricot grafana-new.deuxfleurs.fr",
+ ]
+ port = 3719
+ address_mode = "driver"
+ name = "grafana"
+ check {
+ type = "tcp"
+ port = 3719
+ address_mode = "driver"
+ interval = "60s"
+ timeout = "5s"
+ check_restart {
+ limit = 3
+ grace = "90s"
+ ignore_warnings = false
+ }
+ }
+ }
+ }
+
+ task "replicate-db" {
+ driver = "docker"
+ config {
+ image = "litestream/litestream:0.3.7"
+ args = [
+ "replicate", "-config", "/etc/litestream.yml"
+ ]
+ volumes = [
+ "../alloc/data:/ephemeral",
+ "secrets/litestream.yml:/etc/litestream.yml"
+ ]
+ }
+ user = "472"
+
+ template {
+ data = file("../config/grafana-litestream.yml")
+ destination = "secrets/litestream.yml"
+ }
+
+ resources {
+ memory = 200
+ cpu = 100
+ }
+ }
+ }
+}