From 56ff4c5cfdfc7fd84a10bd1d69418109e25c2560 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 20 Sep 2022 17:13:46 +0200 Subject: Prod-like telemetry into staging --- .../bad.telemetry-elastic/config/apm-config.yaml | 20 +++ .../bad.telemetry-elastic/config/filebeat.yml | 46 ++++++ .../config/grafana-litestream.yml | 10 ++ .../grafana/provisioning/datasources/elastic.yaml | 21 +++ .../bad.telemetry-elastic/config/otel-config.yaml | 56 +++++++ .../deploy/telemetry-system.hcl | 182 +++++++++++++++++++++ .../bad.telemetry-elastic/deploy/telemetry.hcl | 181 ++++++++++++++++++++ 7 files changed, 516 insertions(+) create mode 100644 experimental/bad.telemetry-elastic/config/apm-config.yaml create mode 100644 experimental/bad.telemetry-elastic/config/filebeat.yml create mode 100644 experimental/bad.telemetry-elastic/config/grafana-litestream.yml create mode 100644 experimental/bad.telemetry-elastic/config/grafana/provisioning/datasources/elastic.yaml create mode 100644 experimental/bad.telemetry-elastic/config/otel-config.yaml create mode 100644 experimental/bad.telemetry-elastic/deploy/telemetry-system.hcl create mode 100644 experimental/bad.telemetry-elastic/deploy/telemetry.hcl (limited to 'experimental') diff --git a/experimental/bad.telemetry-elastic/config/apm-config.yaml b/experimental/bad.telemetry-elastic/config/apm-config.yaml new file mode 100644 index 0000000..07a88bd --- /dev/null +++ b/experimental/bad.telemetry-elastic/config/apm-config.yaml @@ -0,0 +1,20 @@ +apm-server: + # Defines the host and port the server is listening on. Use "unix:/path/to.sock" to listen on a unix domain socket. + host: "0.0.0.0:8200" +#-------------------------- Elasticsearch output -------------------------- +output.elasticsearch: + # Array of hosts to connect to. + # Scheme and port can be left out and will be set to the default (`http` and `9200`). + # In case you specify and additional path, the scheme is required: `http://localhost:9200/path`. + # IPv6 addresses should always be defined as: `https://[2001:db8::1]:9200`. + hosts: ["localhost:9200"] + username: "elastic" + password: "{{ key "secrets/telemetry/elastic_passwords/elastic" }}" + +instrumentation: + enabled: true + environment: staging + +logging: + level: warning + to_stderr: true diff --git a/experimental/bad.telemetry-elastic/config/filebeat.yml b/experimental/bad.telemetry-elastic/config/filebeat.yml new file mode 100644 index 0000000..310afd1 --- /dev/null +++ b/experimental/bad.telemetry-elastic/config/filebeat.yml @@ -0,0 +1,46 @@ +# see https://github.com/elastic/beats/blob/master/filebeat/filebeat.reference.yml +filebeat.modules: +- module: system + syslog: + enabled: true + auth: + enabled: true + +#filebeat.inputs: +#- type: container +# enabled: true +# paths: +# -/var/lib/docker/containers/*/*.log +# stream: all # can be all, stdout or stderr + +#========================== Filebeat autodiscover ============================== +filebeat.autodiscover: + providers: + - type: docker + # https://www.elastic.co/guide/en/beats/filebeat/current/configuration-autodiscover-hints.html + # This URL alos contains instructions on multi-line logs + hints.enabled: true + +#================================ Processors =================================== +processors: +# - add_cloud_metadata: ~ +- add_docker_metadata: ~ +- add_locale: + format: offset +- add_host_metadata: + netinfo.enabled: true + +#========================== Elasticsearch output =============================== +output.elasticsearch: + hosts: ["localhost:9200"] + username: elastic + password: {{ key "secrets/telemetry/elastic_passwords/elastic" }} + +#============================== Dashboards ===================================== +setup.dashboards: + enabled: false + +#============================== Xpack Monitoring =============================== +xpack.monitoring: + enabled: true + elasticsearch: diff --git a/experimental/bad.telemetry-elastic/config/grafana-litestream.yml b/experimental/bad.telemetry-elastic/config/grafana-litestream.yml new file mode 100644 index 0000000..a537d9c --- /dev/null +++ b/experimental/bad.telemetry-elastic/config/grafana-litestream.yml @@ -0,0 +1,10 @@ +dbs: + - path: /ephemeral/grafana.db + replicas: + - url: s3://grafana-db/grafana.db + region: garage-staging + endpoint: http://{{ env "attr.unique.network.ip-address" }}:3990 + access-key-id: {{ key "secrets/telemetry/grafana/s3_access_key" | trimSpace }} + secret-access-key: {{ key "secrets/telemetry/grafana/s3_secret_key" | trimSpace }} + force-path-style: true + sync-interval: 60s diff --git a/experimental/bad.telemetry-elastic/config/grafana/provisioning/datasources/elastic.yaml b/experimental/bad.telemetry-elastic/config/grafana/provisioning/datasources/elastic.yaml new file mode 100644 index 0000000..7d2277c --- /dev/null +++ b/experimental/bad.telemetry-elastic/config/grafana/provisioning/datasources/elastic.yaml @@ -0,0 +1,21 @@ +apiVersion: 1 + +datasources: + - name: DS_ELASTICSEARCH + type: elasticsearch + access: proxy + url: http://localhost:9200 + password: '{{ key "secrets/telemetry/elastic_passwords/elastic" }}' + user: 'elastic' + database: metrics-* + basicAuth: false + isDefault: true + jsonData: + esVersion: "8.2.0" + includeFrozen: false + logLevelField: '' + logMessageField: '' + maxConcurrentShardRequests: 5 + timeField: "@timestamp" + timeInterval: "5s" + readOnly: false diff --git a/experimental/bad.telemetry-elastic/config/otel-config.yaml b/experimental/bad.telemetry-elastic/config/otel-config.yaml new file mode 100644 index 0000000..bcf1baa --- /dev/null +++ b/experimental/bad.telemetry-elastic/config/otel-config.yaml @@ -0,0 +1,56 @@ +receivers: + # Data sources: metrics, traces + otlp: + protocols: + grpc: + endpoint: ":4317" + http: + endpoint: ":55681" + # Data sources: metrics + prometheus: + config: + scrape_configs: + - job_name: "garage" + scrape_interval: 5s + static_configs: + - targets: + - "{{ env "attr.unique.network.ip-address" }}:3909" + - job_name: "node_exporter" + scrape_interval: 5s + static_configs: + - targets: + - "{{ env "attr.unique.network.ip-address" }}:9100" + +exporters: + logging: + logLevel: info + # see https://www.elastic.co/guide/en/apm/get-started/current/open-telemetry-elastic.html#open-telemetry-collector + otlp/elastic: + endpoint: "localhost:8200" + tls: + insecure: true + +processors: + batch: + probabilistic_sampler: + hash_seed: 42 + sampling_percentage: 10 + +extensions: + health_check: + pprof: + endpoint: :1888 + zpages: + endpoint: :55679 + +service: + extensions: [pprof, zpages, health_check] + pipelines: + traces: + receivers: [otlp] + processors: [probabilistic_sampler, batch] + exporters: [logging, otlp/elastic] + metrics: + receivers: [otlp, prometheus] + processors: [batch] + exporters: [logging, otlp/elastic] diff --git a/experimental/bad.telemetry-elastic/deploy/telemetry-system.hcl b/experimental/bad.telemetry-elastic/deploy/telemetry-system.hcl new file mode 100644 index 0000000..3e26c2e --- /dev/null +++ b/experimental/bad.telemetry-elastic/deploy/telemetry-system.hcl @@ -0,0 +1,182 @@ +job "telemetry-system" { + datacenters = ["neptune"] + type = "system" + + group "elasticsearch" { + network { + port "elastic" { + static = 9200 + } + port "elastic_internal" { + static = 9300 + } + } + + task "elastic" { + driver = "docker" + config { + image = "docker.elastic.co/elasticsearch/elasticsearch:8.2.0" + network_mode = "host" + volumes = [ + "/mnt/ssd/telemetry/es_data:/usr/share/elasticsearch/data", + "secrets/elastic-certificates.p12:/usr/share/elasticsearch/config/elastic-certificates.p12", + ] + ports = [ "elastic", "elastic_internal" ] + sysctl = { + #"vm.max_map_count" = "262144", + } + ulimit = { + memlock = "9223372036854775807:9223372036854775807", + } + } + + user = "1000" + + resources { + memory = 1500 + cpu = 500 + } + + template { + data = "{{ key \"secrets/telemetry/elasticsearch/elastic-certificates.p12\" }}" + destination = "secrets/elastic-certificates.p12" + } + + template { + data = <