From 65ca536b3b3bf92115d0fd2618555ea056dc6b6f Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 17 Feb 2022 15:06:09 +0100 Subject: Add telemetry to garage staging --- app/garage-staging/config/garage.toml | 3 + app/garage-staging/deploy/garage.hcl | 12 +- app/telemetry/config/apm-config.yaml | 10 ++ .../grafana/provisioning/datasources/elastic.yaml | 21 +++ app/telemetry/config/otel-config.yaml | 50 +++++++ app/telemetry/deploy/docker-compose.yml | 69 +++++++++ app/telemetry/deploy/telemetry.hcl | 166 +++++++++++++++++++++ 7 files changed, 330 insertions(+), 1 deletion(-) create mode 100644 app/telemetry/config/apm-config.yaml create mode 100644 app/telemetry/config/grafana/provisioning/datasources/elastic.yaml create mode 100644 app/telemetry/config/otel-config.yaml create mode 100644 app/telemetry/deploy/docker-compose.yml create mode 100644 app/telemetry/deploy/telemetry.hcl diff --git a/app/garage-staging/config/garage.toml b/app/garage-staging/config/garage.toml index fffcf0c..ca28b79 100644 --- a/app/garage-staging/config/garage.toml +++ b/app/garage-staging/config/garage.toml @@ -21,3 +21,6 @@ api_bind_addr = "0.0.0.0:3990" bind_addr = "0.0.0.0:3992" root_domain = ".garage-staging-web.home.adnab.me" index = "index.html" + +[admin_api] +bind_addr = "0.0.0.0:3909" diff --git a/app/garage-staging/deploy/garage.hcl b/app/garage-staging/deploy/garage.hcl index 3412a4a..44aa0c9 100644 --- a/app/garage-staging/deploy/garage.hcl +++ b/app/garage-staging/deploy/garage.hcl @@ -15,6 +15,7 @@ job "garage-staging" { port "s3" { static = 3990 } port "rpc" { static = 3991 } port "web" { static = 3992 } + port "admin" { static = 3909 } } update { @@ -27,7 +28,7 @@ job "garage-staging" { driver = "docker" config { - image = "dxflrs/amd64_garage:v0.6.0-rc1" + image = "dxflrs/amd64_garage:37011a2f6ec680e4b0bbc96fa7fa86d3738d9de8" command = "/garage" args = [ "server" ] network_mode = "host" @@ -120,6 +121,15 @@ job "garage-staging" { } } + service { + tags = [ + "garage-staging-admin", + ] + port = 3909 + address_mode = "driver" + name = "garage-staging-admin" + } + restart { interval = "30m" attempts = 10 diff --git a/app/telemetry/config/apm-config.yaml b/app/telemetry/config/apm-config.yaml new file mode 100644 index 0000000..1c1e645 --- /dev/null +++ b/app/telemetry/config/apm-config.yaml @@ -0,0 +1,10 @@ +apm-server: + # Defines the host and port the server is listening on. Use "unix:/path/to.sock" to listen on a unix domain socket. + host: "0.0.0.0:8200" +#-------------------------- Elasticsearch output -------------------------- +output.elasticsearch: + # Array of hosts to connect to. + # Scheme and port can be left out and will be set to the default (`http` and `9200`). + # In case you specify and additional path, the scheme is required: `http://localhost:9200/path`. + # IPv6 addresses should always be defined as: `https://[2001:db8::1]:9200`. + hosts: ["localhost:9200"] diff --git a/app/telemetry/config/grafana/provisioning/datasources/elastic.yaml b/app/telemetry/config/grafana/provisioning/datasources/elastic.yaml new file mode 100644 index 0000000..8108a53 --- /dev/null +++ b/app/telemetry/config/grafana/provisioning/datasources/elastic.yaml @@ -0,0 +1,21 @@ +apiVersion: 1 + +datasources: + - name: DS_ELASTICSEARCH + type: elasticsearch + access: proxy + url: http://localhost:9200 + password: '' + user: '' + database: apm-* + basicAuth: false + isDefault: true + jsonData: + esVersion: "7.10.0" + includeFrozen: false + logLevelField: '' + logMessageField: '' + maxConcurrentShardRequests: 5 + timeField: "@timestamp" + timeInterval: "5s" + readOnly: false diff --git a/app/telemetry/config/otel-config.yaml b/app/telemetry/config/otel-config.yaml new file mode 100644 index 0000000..6749ee3 --- /dev/null +++ b/app/telemetry/config/otel-config.yaml @@ -0,0 +1,50 @@ +receivers: + # Data sources: metrics, traces + otlp: + protocols: + grpc: + endpoint: ":4317" + http: + endpoint: ":55681" + # Data sources: metrics + prometheus: + config: + scrape_configs: + - job_name: "garage" + scrape_interval: 5s + static_configs: + - targets: + - "10.42.2.21:3909" + - "10.42.2.22:3909" + - "10.42.2.23:3909" + +exporters: + logging: + logLevel: info + # see https://www.elastic.co/guide/en/apm/get-started/current/open-telemetry-elastic.html#open-telemetry-collector + otlp/elastic: + endpoint: "localhost:8200" + tls: + insecure: true + +processors: + batch: + +extensions: + health_check: + pprof: + endpoint: :1888 + zpages: + endpoint: :55679 + +service: + extensions: [pprof, zpages, health_check] + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [logging, otlp/elastic] + metrics: + receivers: [otlp, prometheus] + processors: [batch] + exporters: [logging, otlp/elastic] diff --git a/app/telemetry/deploy/docker-compose.yml b/app/telemetry/deploy/docker-compose.yml new file mode 100644 index 0000000..576e0a9 --- /dev/null +++ b/app/telemetry/deploy/docker-compose.yml @@ -0,0 +1,69 @@ +version: "2" +services: + + otel: + image: otel/opentelemetry-collector-contrib:${OTEL_COLLECT_TAG} + command: [ "--config=/etc/otel-config.yaml" ] + volumes: + - ../config/otel-config.yaml:/etc/otel-config.yaml + network_mode: "host" + + elastic: + image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTIC_BUNDLE_TAG} + container_name: elastic + environment: + - "node.name=elastic" + - "http.port=9200" + - "cluster.name=es-docker-cluster" + - "discovery.type=single-node" + - "bootstrap.memory_lock=true" + - "ES_JAVA_OPTS=-Xms512m -Xmx512m" + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: 65536 + volumes: + - "es_data:/usr/share/elasticsearch/data" + network_mode: "host" + + # kibana instance and collectors + # see https://www.elastic.co/guide/en/elastic-stack-get-started/current/get-started-docker.html + kibana: + image: docker.elastic.co/kibana/kibana:${ELASTIC_BUNDLE_TAG} + container_name: kibana + environment: + SERVER_NAME: "kibana.local" + # ELASTICSEARCH_URL: "http://localhost:9700" + ELASTICSEARCH_HOSTS: "http://localhost:9200" + depends_on: [ 'elastic' ] + network_mode: "host" + + apm: + image: docker.elastic.co/apm/apm-server:${ELASTIC_BUNDLE_TAG} + container_name: apm + volumes: + - "../config/apm-config.yaml:/usr/share/apm-server/apm-server.yml:ro" + depends_on: [ 'elastic' ] + network_mode: "host" + + grafana: + # see https://grafana.com/docs/grafana/latest/installation/docker/ + image: "grafana/grafana:8.3.5" + container_name: grafana + # restart: unless-stopped + environment: + - "GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel,grafana-worldmap-panel,grafana-polystat-panel" + network_mode: "host" + volumes: + # chown 472:472 if needed + - grafana:/var/lib/grafana + - ../config/grafana/provisioning/:/etc/grafana/provisioning/ + +volumes: + es_data: + driver: local + grafana: + driver: local + metricbeat: + driver: local diff --git a/app/telemetry/deploy/telemetry.hcl b/app/telemetry/deploy/telemetry.hcl new file mode 100644 index 0000000..bcfe3f6 --- /dev/null +++ b/app/telemetry/deploy/telemetry.hcl @@ -0,0 +1,166 @@ +job "telemetry" { + datacenters = ["neptune"] + type = "service" + + group "grafana" { + count = 1 + + constraint { + attribute = "${attr.unique.hostname}" + operator = "=" + value = "cariacou" + } + + network { + port "otel_grpc" { + static = 4317 + } + port "elastic" { + static = 9200 + } + port "kibana" { + static = 5601 + } + port "apm" { + static = 8200 + } + port "grafana" { + static = 3333 + } + } + + task "otel" { + driver = "docker" + config { + image = "otel/opentelemetry-collector-contrib:0.44.0" + args = [ + "--config=/etc/otel-config.yaml", + ] + network_mode = "host" + ports= [ "otel_grpc" ] + volumes = [ + "secrets/otel-config.yaml:/etc/otel-config.yaml" + ] + } + + template { + data = file("../config/otel-config.yaml") + destination = "secrets/otel-config.yaml" + } + + resources { + memory = 200 + cpu = 100 + } + } + + task "elastic" { + driver = "docker" + config { + image = "docker.elastic.co/elasticsearch/elasticsearch:7.17.0" + network_mode = "host" + volumes = [ + "/mnt/ssd/telemetry/es_data:/usr/share/elasticsearch/data", + ] + ports = [ "elastic" ] + } + + resources { + memory = 2500 + cpu = 500 + } + + template { + data = <