From 7dcb4072dcced7a4d92ae2c356722ccb6bfd5e2b Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Sat, 26 Feb 2022 20:31:57 +0100 Subject: Scrape meterics from system process on each node --- app/telemetry/config/otel-config.yaml | 4 +- app/telemetry/deploy/telemetry-system.hcl | 59 ++++++++++++++++++++ app/telemetry/deploy/telemetry.hcl | 93 +++++++++++-------------------- doc/telemetry.md | 24 ++++++++ 4 files changed, 118 insertions(+), 62 deletions(-) diff --git a/app/telemetry/config/otel-config.yaml b/app/telemetry/config/otel-config.yaml index e1d6d78..574d01c 100644 --- a/app/telemetry/config/otel-config.yaml +++ b/app/telemetry/config/otel-config.yaml @@ -14,9 +14,7 @@ receivers: scrape_interval: 5s static_configs: - targets: - - "10.42.2.21:3909" - - "10.42.2.22:3909" - - "10.42.2.23:3909" + - "localhost:3909" exporters: logging: diff --git a/app/telemetry/deploy/telemetry-system.hcl b/app/telemetry/deploy/telemetry-system.hcl index 9dd379d..af9acff 100644 --- a/app/telemetry/deploy/telemetry-system.hcl +++ b/app/telemetry/deploy/telemetry-system.hcl @@ -57,5 +57,64 @@ EOH } } } + + group "collector" { + network { + port "otel_grpc" { + static = 4317 + } + port "apm" { + static = 8200 + } + } + + task "otel" { + driver = "docker" + config { + image = "otel/opentelemetry-collector-contrib:0.44.0" + args = [ + "--config=/etc/otel-config.yaml", + ] + network_mode = "host" + ports= [ "otel_grpc" ] + volumes = [ + "secrets/otel-config.yaml:/etc/otel-config.yaml" + ] + } + + template { + data = file("../config/otel-config.yaml") + destination = "secrets/otel-config.yaml" + } + + resources { + memory = 200 + cpu = 100 + } + } + + task "apm" { + driver = "docker" + config { + image = "docker.elastic.co/apm/apm-server:7.17.0" + network_mode = "host" + ports = [ "apm" ] + args = [ "--strict.perms=false" ] + volumes = [ + "secrets/apm-config.yaml:/usr/share/apm-server/apm-server.yml:ro" + ] + } + + template { + data = file("../config/apm-config.yaml") + destination = "secrets/apm-config.yaml" + } + + resources { + memory = 200 + cpu = 100 + } + } + } } diff --git a/app/telemetry/deploy/telemetry.hcl b/app/telemetry/deploy/telemetry.hcl index 90cde54..c3edd4c 100644 --- a/app/telemetry/deploy/telemetry.hcl +++ b/app/telemetry/deploy/telemetry.hcl @@ -1,54 +1,14 @@ job "telemetry" { datacenters = ["neptune"] type = "service" - - group "grafana" { - count = 1 - constraint { - attribute = "${attr.unique.hostname}" - operator = "=" - value = "carcajou" - } + group "kibana" { + count = 1 network { - port "otel_grpc" { - static = 4317 - } port "kibana" { static = 5601 } - port "apm" { - static = 8200 - } - port "grafana" { - static = 3333 - } - } - - task "otel" { - driver = "docker" - config { - image = "otel/opentelemetry-collector-contrib:0.44.0" - args = [ - "--config=/etc/otel-config.yaml", - ] - network_mode = "host" - ports= [ "otel_grpc" ] - volumes = [ - "secrets/otel-config.yaml:/etc/otel-config.yaml" - ] - } - - template { - data = file("../config/otel-config.yaml") - destination = "secrets/otel-config.yaml" - } - - resources { - memory = 200 - cpu = 100 - } } task "kibana" { @@ -74,28 +34,43 @@ EOH memory = 1500 cpu = 500 } - } - task "apm" { - driver = "docker" - config { - image = "docker.elastic.co/apm/apm-server:7.17.0" - network_mode = "host" - ports = [ "apm" ] - args = [ "--strict.perms=false" ] - volumes = [ - "secrets/apm-config.yaml:/usr/share/apm-server/apm-server.yml:ro" + service { + tags = [ + "kibana", + "tricot kibana.home.adnab.me", ] + port = 5601 + address_mode = "driver" + name = "kibana" + check { + type = "tcp" + port = 5601 + address_mode = "driver" + interval = "60s" + timeout = "5s" + check_restart { + limit = 3 + grace = "90s" + ignore_warnings = false + } + } } + } + } + + group "grafana" { + count = 1 - template { - data = file("../config/apm-config.yaml") - destination = "secrets/apm-config.yaml" - } + constraint { + attribute = "${attr.unique.hostname}" + operator = "=" + value = "carcajou" + } - resources { - memory = 200 - cpu = 100 + network { + port "grafana" { + static = 3333 } } diff --git a/doc/telemetry.md b/doc/telemetry.md index ee8d0dd..1e405a5 100644 --- a/doc/telemetry.md +++ b/doc/telemetry.md @@ -1,3 +1,27 @@ +# create elasticsearch folders on all nodes + +```bash +mkdir -p /mnt/ssd/telemetry/es_data/nodes +chown 1000 /mnt/ssd/telemetry/es_data/nodes +``` + +# generate ca and tls certs for elasticsearch cluster + +start a `bash` in an elasticsearch image, such as `docker.elastic.co/elasticsearch/elasticsearch:7.17.0`: `docker run -ti docker.elastic.co/elasticsearch/elasticsearch:7.17.0 bash` + +generate a ca and node certs: + +```bash +./bin/elasticsearch-certutil ca +./bin/elasticsearch-certutil cert --ca elastic-stack-ca.p12 +``` + +copy `elastic-certificates.p12` to `/mnt/ssd/telemetry/es_data` in all nodes, and chown it: + +```bash +chown 1000 /mnt/ssd/telemetry/es_data/elastic-certificates.p12 +``` + # create elasticsearch passwords in elasticsearch container -- cgit v1.2.3