aboutsummaryrefslogtreecommitdiff
path: root/experimental/app/telemetry-elastic
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2022-12-24 22:59:37 +0100
committerAlex Auvolat <alex@adnab.me>2022-12-24 22:59:37 +0100
commit8d17a07c9be5cd9d400644c34ea50177535d15f6 (patch)
treecac734f62d4c04c898d4e70d1e2ba65f933317ca /experimental/app/telemetry-elastic
parent4b527c4db8060679d21e5bb596bde91ce39df393 (diff)
downloadnixcfg-8d17a07c9be5cd9d400644c34ea50177535d15f6.tar.gz
nixcfg-8d17a07c9be5cd9d400644c34ea50177535d15f6.zip
reorganize some things
Diffstat (limited to 'experimental/app/telemetry-elastic')
-rw-r--r--experimental/app/telemetry-elastic/config/apm-config.yaml20
-rw-r--r--experimental/app/telemetry-elastic/config/filebeat.yml46
-rw-r--r--experimental/app/telemetry-elastic/config/grafana-litestream.yml10
-rw-r--r--experimental/app/telemetry-elastic/config/grafana/provisioning/datasources/elastic.yaml21
-rw-r--r--experimental/app/telemetry-elastic/config/otel-config.yaml56
-rw-r--r--experimental/app/telemetry-elastic/deploy/telemetry-system.hcl182
-rw-r--r--experimental/app/telemetry-elastic/deploy/telemetry.hcl181
7 files changed, 516 insertions, 0 deletions
diff --git a/experimental/app/telemetry-elastic/config/apm-config.yaml b/experimental/app/telemetry-elastic/config/apm-config.yaml
new file mode 100644
index 0000000..07a88bd
--- /dev/null
+++ b/experimental/app/telemetry-elastic/config/apm-config.yaml
@@ -0,0 +1,20 @@
+apm-server:
+ # Defines the host and port the server is listening on. Use "unix:/path/to.sock" to listen on a unix domain socket.
+ host: "0.0.0.0:8200"
+#-------------------------- Elasticsearch output --------------------------
+output.elasticsearch:
+ # Array of hosts to connect to.
+ # Scheme and port can be left out and will be set to the default (`http` and `9200`).
+ # In case you specify and additional path, the scheme is required: `http://localhost:9200/path`.
+ # IPv6 addresses should always be defined as: `https://[2001:db8::1]:9200`.
+ hosts: ["localhost:9200"]
+ username: "elastic"
+ password: "{{ key "secrets/telemetry/elastic_passwords/elastic" }}"
+
+instrumentation:
+ enabled: true
+ environment: staging
+
+logging:
+ level: warning
+ to_stderr: true
diff --git a/experimental/app/telemetry-elastic/config/filebeat.yml b/experimental/app/telemetry-elastic/config/filebeat.yml
new file mode 100644
index 0000000..310afd1
--- /dev/null
+++ b/experimental/app/telemetry-elastic/config/filebeat.yml
@@ -0,0 +1,46 @@
+# see https://github.com/elastic/beats/blob/master/filebeat/filebeat.reference.yml
+filebeat.modules:
+- module: system
+ syslog:
+ enabled: true
+ auth:
+ enabled: true
+
+#filebeat.inputs:
+#- type: container
+# enabled: true
+# paths:
+# -/var/lib/docker/containers/*/*.log
+# stream: all # can be all, stdout or stderr
+
+#========================== Filebeat autodiscover ==============================
+filebeat.autodiscover:
+ providers:
+ - type: docker
+ # https://www.elastic.co/guide/en/beats/filebeat/current/configuration-autodiscover-hints.html
+ # This URL alos contains instructions on multi-line logs
+ hints.enabled: true
+
+#================================ Processors ===================================
+processors:
+# - add_cloud_metadata: ~
+- add_docker_metadata: ~
+- add_locale:
+ format: offset
+- add_host_metadata:
+ netinfo.enabled: true
+
+#========================== Elasticsearch output ===============================
+output.elasticsearch:
+ hosts: ["localhost:9200"]
+ username: elastic
+ password: {{ key "secrets/telemetry/elastic_passwords/elastic" }}
+
+#============================== Dashboards =====================================
+setup.dashboards:
+ enabled: false
+
+#============================== Xpack Monitoring ===============================
+xpack.monitoring:
+ enabled: true
+ elasticsearch:
diff --git a/experimental/app/telemetry-elastic/config/grafana-litestream.yml b/experimental/app/telemetry-elastic/config/grafana-litestream.yml
new file mode 100644
index 0000000..a537d9c
--- /dev/null
+++ b/experimental/app/telemetry-elastic/config/grafana-litestream.yml
@@ -0,0 +1,10 @@
+dbs:
+ - path: /ephemeral/grafana.db
+ replicas:
+ - url: s3://grafana-db/grafana.db
+ region: garage-staging
+ endpoint: http://{{ env "attr.unique.network.ip-address" }}:3990
+ access-key-id: {{ key "secrets/telemetry/grafana/s3_access_key" | trimSpace }}
+ secret-access-key: {{ key "secrets/telemetry/grafana/s3_secret_key" | trimSpace }}
+ force-path-style: true
+ sync-interval: 60s
diff --git a/experimental/app/telemetry-elastic/config/grafana/provisioning/datasources/elastic.yaml b/experimental/app/telemetry-elastic/config/grafana/provisioning/datasources/elastic.yaml
new file mode 100644
index 0000000..7d2277c
--- /dev/null
+++ b/experimental/app/telemetry-elastic/config/grafana/provisioning/datasources/elastic.yaml
@@ -0,0 +1,21 @@
+apiVersion: 1
+
+datasources:
+ - name: DS_ELASTICSEARCH
+ type: elasticsearch
+ access: proxy
+ url: http://localhost:9200
+ password: '{{ key "secrets/telemetry/elastic_passwords/elastic" }}'
+ user: 'elastic'
+ database: metrics-*
+ basicAuth: false
+ isDefault: true
+ jsonData:
+ esVersion: "8.2.0"
+ includeFrozen: false
+ logLevelField: ''
+ logMessageField: ''
+ maxConcurrentShardRequests: 5
+ timeField: "@timestamp"
+ timeInterval: "5s"
+ readOnly: false
diff --git a/experimental/app/telemetry-elastic/config/otel-config.yaml b/experimental/app/telemetry-elastic/config/otel-config.yaml
new file mode 100644
index 0000000..bcf1baa
--- /dev/null
+++ b/experimental/app/telemetry-elastic/config/otel-config.yaml
@@ -0,0 +1,56 @@
+receivers:
+ # Data sources: metrics, traces
+ otlp:
+ protocols:
+ grpc:
+ endpoint: ":4317"
+ http:
+ endpoint: ":55681"
+ # Data sources: metrics
+ prometheus:
+ config:
+ scrape_configs:
+ - job_name: "garage"
+ scrape_interval: 5s
+ static_configs:
+ - targets:
+ - "{{ env "attr.unique.network.ip-address" }}:3909"
+ - job_name: "node_exporter"
+ scrape_interval: 5s
+ static_configs:
+ - targets:
+ - "{{ env "attr.unique.network.ip-address" }}:9100"
+
+exporters:
+ logging:
+ logLevel: info
+ # see https://www.elastic.co/guide/en/apm/get-started/current/open-telemetry-elastic.html#open-telemetry-collector
+ otlp/elastic:
+ endpoint: "localhost:8200"
+ tls:
+ insecure: true
+
+processors:
+ batch:
+ probabilistic_sampler:
+ hash_seed: 42
+ sampling_percentage: 10
+
+extensions:
+ health_check:
+ pprof:
+ endpoint: :1888
+ zpages:
+ endpoint: :55679
+
+service:
+ extensions: [pprof, zpages, health_check]
+ pipelines:
+ traces:
+ receivers: [otlp]
+ processors: [probabilistic_sampler, batch]
+ exporters: [logging, otlp/elastic]
+ metrics:
+ receivers: [otlp, prometheus]
+ processors: [batch]
+ exporters: [logging, otlp/elastic]
diff --git a/experimental/app/telemetry-elastic/deploy/telemetry-system.hcl b/experimental/app/telemetry-elastic/deploy/telemetry-system.hcl
new file mode 100644
index 0000000..3e26c2e
--- /dev/null
+++ b/experimental/app/telemetry-elastic/deploy/telemetry-system.hcl
@@ -0,0 +1,182 @@
+job "telemetry-system" {
+ datacenters = ["neptune"]
+ type = "system"
+
+ group "elasticsearch" {
+ network {
+ port "elastic" {
+ static = 9200
+ }
+ port "elastic_internal" {
+ static = 9300
+ }
+ }
+
+ task "elastic" {
+ driver = "docker"
+ config {
+ image = "docker.elastic.co/elasticsearch/elasticsearch:8.2.0"
+ network_mode = "host"
+ volumes = [
+ "/mnt/ssd/telemetry/es_data:/usr/share/elasticsearch/data",
+ "secrets/elastic-certificates.p12:/usr/share/elasticsearch/config/elastic-certificates.p12",
+ ]
+ ports = [ "elastic", "elastic_internal" ]
+ sysctl = {
+ #"vm.max_map_count" = "262144",
+ }
+ ulimit = {
+ memlock = "9223372036854775807:9223372036854775807",
+ }
+ }
+
+ user = "1000"
+
+ resources {
+ memory = 1500
+ cpu = 500
+ }
+
+ template {
+ data = "{{ key \"secrets/telemetry/elasticsearch/elastic-certificates.p12\" }}"
+ destination = "secrets/elastic-certificates.p12"
+ }
+
+ template {
+ data = <<EOH
+node.name={{ env "attr.unique.hostname" }}
+http.port=9200
+transport.port=9300
+cluster.name=es-deuxfleurs
+cluster.initial_master_nodes=carcajou,caribou,cariacou
+discovery.seed_hosts=carcajou,caribou,cariacou
+bootstrap.memory_lock=true
+xpack.security.enabled=true
+xpack.security.authc.api_key.enabled=true
+xpack.security.transport.ssl.enabled=true
+xpack.security.transport.ssl.verification_mode=certificate
+xpack.security.transport.ssl.client_authentication=required
+xpack.security.transport.ssl.keystore.path=/usr/share/elasticsearch/config/elastic-certificates.p12
+xpack.security.transport.ssl.truststore.path=/usr/share/elasticsearch/config/elastic-certificates.p12
+cluster.routing.allocation.disk.watermark.high=75%
+cluster.routing.allocation.disk.watermark.low=65%
+ES_JAVA_OPTS=-Xms512M -Xmx512M
+EOH
+ destination = "secrets/env"
+ env = true
+ }
+ }
+ }
+
+ group "collector" {
+ network {
+ port "otel_grpc" {
+ static = 4317
+ }
+ port "apm" {
+ static = 8200
+ }
+ port "node_exporter" {
+ static = 9100
+ }
+ }
+
+ task "otel" {
+ driver = "docker"
+ config {
+ image = "otel/opentelemetry-collector-contrib:0.46.0"
+ args = [
+ "--config=/etc/otel-config.yaml",
+ ]
+ network_mode = "host"
+ ports= [ "otel_grpc" ]
+ volumes = [
+ "secrets/otel-config.yaml:/etc/otel-config.yaml"
+ ]
+ }
+
+ template {
+ data = file("../config/otel-config.yaml")
+ destination = "secrets/otel-config.yaml"
+ }
+
+ resources {
+ memory = 100
+ cpu = 100
+ }
+ }
+
+ task "apm" {
+ driver = "docker"
+ config {
+ image = "docker.elastic.co/apm/apm-server:8.2.0"
+ network_mode = "host"
+ ports = [ "apm" ]
+ args = [ "--strict.perms=false" ]
+ volumes = [
+ "secrets/apm-config.yaml:/usr/share/apm-server/apm-server.yml:ro"
+ ]
+ }
+
+ template {
+ data = file("../config/apm-config.yaml")
+ destination = "secrets/apm-config.yaml"
+ }
+
+ resources {
+ memory = 100
+ cpu = 100
+ }
+ }
+
+/*
+ task "node_exporter" {
+ driver = "docker"
+ config {
+ image = "quay.io/prometheus/node-exporter:v1.1.2"
+ network_mode = "host"
+ ports = [ "node_exporter" ]
+ volumes = [
+ "/:/host:ro,rslave"
+ ]
+ args = [ "--path.rootfs=/host" ]
+ }
+
+ resources {
+ cpu = 50
+ memory = 40
+ }
+ }
+*/
+
+ task "filebeat" {
+ driver = "docker"
+ config {
+ image = "docker.elastic.co/beats/filebeat:8.2.0"
+ network_mode = "host"
+ volumes = [
+ "/mnt/ssd/telemetry/filebeat:/usr/share/filebeat/data",
+ "secrets/filebeat.yml:/usr/share/filebeat/filebeat.yml",
+ "/var/run/docker.sock:/var/run/docker.sock",
+ "/var/lib/docker/containers/:/var/lib/docker/containers/:ro",
+ "/var/log/:/var/log/:ro",
+ ]
+ args = [ "--strict.perms=false" ]
+ privileged = true
+ }
+ user = "root"
+
+
+ template {
+ data = file("../config/filebeat.yml")
+ destination = "secrets/filebeat.yml"
+ }
+
+ resources {
+ memory = 100
+ cpu = 100
+ }
+ }
+ }
+}
+
diff --git a/experimental/app/telemetry-elastic/deploy/telemetry.hcl b/experimental/app/telemetry-elastic/deploy/telemetry.hcl
new file mode 100644
index 0000000..21685a1
--- /dev/null
+++ b/experimental/app/telemetry-elastic/deploy/telemetry.hcl
@@ -0,0 +1,181 @@
+job "telemetry" {
+ datacenters = ["neptune"]
+ type = "service"
+
+ group "kibana" {
+ count = 1
+
+ network {
+ port "kibana" {
+ static = 5601
+ }
+ }
+
+ task "kibana" {
+ driver = "docker"
+ config {
+ image = "docker.elastic.co/kibana/kibana:8.2.0"
+ network_mode = "host"
+ ports = [ "kibana" ]
+ }
+
+ template {
+ data = <<EOH
+SERVER_NAME={{ env "attr.unique.hostname" }}
+ELASTICSEARCH_HOSTS=http://localhost:9200
+ELASTICSEARCH_USERNAME=kibana_system
+ELASTICSEARCH_PASSWORD={{ key "secrets/telemetry/elastic_passwords/kibana_system" }}
+SERVER_PUBLICBASEURL=https://kibana.home.adnab.me
+EOH
+ destination = "secrets/env"
+ env = true
+ }
+
+ resources {
+ memory = 1000
+ cpu = 500
+ }
+
+ service {
+ tags = [
+ "kibana",
+ "tricot kibana.staging.deuxfleurs.org",
+ ]
+ port = 5601
+ address_mode = "driver"
+ name = "kibana"
+ check {
+ type = "tcp"
+ port = 5601
+ address_mode = "driver"
+ interval = "60s"
+ timeout = "5s"
+ check_restart {
+ limit = 3
+ grace = "90s"
+ ignore_warnings = false
+ }
+ }
+ }
+ }
+ }
+
+ group "grafana" {
+ count = 1
+
+ network {
+ port "grafana" {
+ static = 3333
+ }
+ }
+
+ task "restore-db" {
+ lifecycle {
+ hook = "prestart"
+ sidecar = false
+ }
+
+ driver = "docker"
+ config {
+ image = "litestream/litestream:0.3.7"
+ args = [
+ "restore", "-config", "/etc/litestream.yml", "/ephemeral/grafana.db"
+ ]
+ volumes = [
+ "../alloc/data:/ephemeral",
+ "secrets/litestream.yml:/etc/litestream.yml"
+ ]
+ }
+ user = "472"
+
+ template {
+ data = file("../config/grafana-litestream.yml")
+ destination = "secrets/litestream.yml"
+ }
+
+ resources {
+ memory = 200
+ cpu = 1000
+ }
+ }
+
+ task "grafana" {
+ driver = "docker"
+ config {
+ image = "grafana/grafana:8.4.3"
+ network_mode = "host"
+ ports = [ "grafana" ]
+ volumes = [
+ "../alloc/data:/var/lib/grafana",
+ "secrets/elastic.yaml:/etc/grafana/provisioning/datasources/elastic.yaml"
+ ]
+ }
+
+ template {
+ data = file("../config/grafana/provisioning/datasources/elastic.yaml")
+ destination = "secrets/elastic.yaml"
+ }
+
+ template {
+ data = <<EOH
+GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel,grafana-worldmap-panel,grafana-polystat-panel
+GF_SERVER_HTTP_PORT=3333
+EOH
+ destination = "secrets/env"
+ env = true
+ }
+
+ resources {
+ memory = 500
+ cpu = 100
+ }
+
+ service {
+ tags = [
+ "grafana",
+ "tricot grafana.staging.deuxfleurs.org",
+ ]
+ port = 3333
+ address_mode = "driver"
+ name = "grafana"
+ check {
+ type = "tcp"
+ port = 3333
+ address_mode = "driver"
+ interval = "60s"
+ timeout = "5s"
+ check_restart {
+ limit = 3
+ grace = "90s"
+ ignore_warnings = false
+ }
+ }
+ }
+ }
+
+ task "replicate-db" {
+ driver = "docker"
+ config {
+ image = "litestream/litestream:0.3.7"
+ args = [
+ "replicate", "-config", "/etc/litestream.yml"
+ ]
+ volumes = [
+ "../alloc/data:/ephemeral",
+ "secrets/litestream.yml:/etc/litestream.yml"
+ ]
+ }
+ user = "472"
+
+ template {
+ data = file("../config/grafana-litestream.yml")
+ destination = "secrets/litestream.yml"
+ }
+
+ resources {
+ memory = 200
+ cpu = 100
+ }
+ }
+ }
+}