aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2022-02-26 20:31:57 +0100
committerAlex Auvolat <alex@adnab.me>2022-02-26 20:31:57 +0100
commit7dcb4072dcced7a4d92ae2c356722ccb6bfd5e2b (patch)
tree65c4b542097d1e1bea1ad6c9908c2c0f97b52fc6
parentbd2abf344915f502693a17a5f141518e1609910d (diff)
downloadnixcfg-7dcb4072dcced7a4d92ae2c356722ccb6bfd5e2b.tar.gz
nixcfg-7dcb4072dcced7a4d92ae2c356722ccb6bfd5e2b.zip
Scrape meterics from system process on each node
-rw-r--r--app/telemetry/config/otel-config.yaml4
-rw-r--r--app/telemetry/deploy/telemetry-system.hcl59
-rw-r--r--app/telemetry/deploy/telemetry.hcl93
-rw-r--r--doc/telemetry.md24
4 files changed, 118 insertions, 62 deletions
diff --git a/app/telemetry/config/otel-config.yaml b/app/telemetry/config/otel-config.yaml
index e1d6d78..574d01c 100644
--- a/app/telemetry/config/otel-config.yaml
+++ b/app/telemetry/config/otel-config.yaml
@@ -14,9 +14,7 @@ receivers:
scrape_interval: 5s
static_configs:
- targets:
- - "10.42.2.21:3909"
- - "10.42.2.22:3909"
- - "10.42.2.23:3909"
+ - "localhost:3909"
exporters:
logging:
diff --git a/app/telemetry/deploy/telemetry-system.hcl b/app/telemetry/deploy/telemetry-system.hcl
index 9dd379d..af9acff 100644
--- a/app/telemetry/deploy/telemetry-system.hcl
+++ b/app/telemetry/deploy/telemetry-system.hcl
@@ -57,5 +57,64 @@ EOH
}
}
}
+
+ group "collector" {
+ network {
+ port "otel_grpc" {
+ static = 4317
+ }
+ port "apm" {
+ static = 8200
+ }
+ }
+
+ task "otel" {
+ driver = "docker"
+ config {
+ image = "otel/opentelemetry-collector-contrib:0.44.0"
+ args = [
+ "--config=/etc/otel-config.yaml",
+ ]
+ network_mode = "host"
+ ports= [ "otel_grpc" ]
+ volumes = [
+ "secrets/otel-config.yaml:/etc/otel-config.yaml"
+ ]
+ }
+
+ template {
+ data = file("../config/otel-config.yaml")
+ destination = "secrets/otel-config.yaml"
+ }
+
+ resources {
+ memory = 200
+ cpu = 100
+ }
+ }
+
+ task "apm" {
+ driver = "docker"
+ config {
+ image = "docker.elastic.co/apm/apm-server:7.17.0"
+ network_mode = "host"
+ ports = [ "apm" ]
+ args = [ "--strict.perms=false" ]
+ volumes = [
+ "secrets/apm-config.yaml:/usr/share/apm-server/apm-server.yml:ro"
+ ]
+ }
+
+ template {
+ data = file("../config/apm-config.yaml")
+ destination = "secrets/apm-config.yaml"
+ }
+
+ resources {
+ memory = 200
+ cpu = 100
+ }
+ }
+ }
}
diff --git a/app/telemetry/deploy/telemetry.hcl b/app/telemetry/deploy/telemetry.hcl
index 90cde54..c3edd4c 100644
--- a/app/telemetry/deploy/telemetry.hcl
+++ b/app/telemetry/deploy/telemetry.hcl
@@ -1,54 +1,14 @@
job "telemetry" {
datacenters = ["neptune"]
type = "service"
-
- group "grafana" {
- count = 1
- constraint {
- attribute = "${attr.unique.hostname}"
- operator = "="
- value = "carcajou"
- }
+ group "kibana" {
+ count = 1
network {
- port "otel_grpc" {
- static = 4317
- }
port "kibana" {
static = 5601
}
- port "apm" {
- static = 8200
- }
- port "grafana" {
- static = 3333
- }
- }
-
- task "otel" {
- driver = "docker"
- config {
- image = "otel/opentelemetry-collector-contrib:0.44.0"
- args = [
- "--config=/etc/otel-config.yaml",
- ]
- network_mode = "host"
- ports= [ "otel_grpc" ]
- volumes = [
- "secrets/otel-config.yaml:/etc/otel-config.yaml"
- ]
- }
-
- template {
- data = file("../config/otel-config.yaml")
- destination = "secrets/otel-config.yaml"
- }
-
- resources {
- memory = 200
- cpu = 100
- }
}
task "kibana" {
@@ -74,28 +34,43 @@ EOH
memory = 1500
cpu = 500
}
- }
- task "apm" {
- driver = "docker"
- config {
- image = "docker.elastic.co/apm/apm-server:7.17.0"
- network_mode = "host"
- ports = [ "apm" ]
- args = [ "--strict.perms=false" ]
- volumes = [
- "secrets/apm-config.yaml:/usr/share/apm-server/apm-server.yml:ro"
+ service {
+ tags = [
+ "kibana",
+ "tricot kibana.home.adnab.me",
]
+ port = 5601
+ address_mode = "driver"
+ name = "kibana"
+ check {
+ type = "tcp"
+ port = 5601
+ address_mode = "driver"
+ interval = "60s"
+ timeout = "5s"
+ check_restart {
+ limit = 3
+ grace = "90s"
+ ignore_warnings = false
+ }
+ }
}
+ }
+ }
+
+ group "grafana" {
+ count = 1
- template {
- data = file("../config/apm-config.yaml")
- destination = "secrets/apm-config.yaml"
- }
+ constraint {
+ attribute = "${attr.unique.hostname}"
+ operator = "="
+ value = "carcajou"
+ }
- resources {
- memory = 200
- cpu = 100
+ network {
+ port "grafana" {
+ static = 3333
}
}
diff --git a/doc/telemetry.md b/doc/telemetry.md
index ee8d0dd..1e405a5 100644
--- a/doc/telemetry.md
+++ b/doc/telemetry.md
@@ -1,3 +1,27 @@
+# create elasticsearch folders on all nodes
+
+```bash
+mkdir -p /mnt/ssd/telemetry/es_data/nodes
+chown 1000 /mnt/ssd/telemetry/es_data/nodes
+```
+
+# generate ca and tls certs for elasticsearch cluster
+
+start a `bash` in an elasticsearch image, such as `docker.elastic.co/elasticsearch/elasticsearch:7.17.0`: `docker run -ti docker.elastic.co/elasticsearch/elasticsearch:7.17.0 bash`
+
+generate a ca and node certs:
+
+```bash
+./bin/elasticsearch-certutil ca
+./bin/elasticsearch-certutil cert --ca elastic-stack-ca.p12
+```
+
+copy `elastic-certificates.p12` to `/mnt/ssd/telemetry/es_data` in all nodes, and chown it:
+
+```bash
+chown 1000 /mnt/ssd/telemetry/es_data/elastic-certificates.p12
+```
+
# create elasticsearch passwords
in elasticsearch container