diff options
-rw-r--r-- | cluster/prod/app/woodpecker-ci/deploy/server.hcl | 2 | ||||
-rw-r--r-- | cluster/prod/cluster.nix | 3 | ||||
-rw-r--r-- | cluster/staging/app/telemetry/deploy/telemetry-service.hcl | 158 | ||||
-rw-r--r-- | cluster/staging/app/telemetry/deploy/telemetry-storage.hcl | 97 | ||||
-rw-r--r-- | cluster/staging/app/telemetry/deploy/telemetry-system.hcl | 75 | ||||
-rw-r--r-- | cluster/staging/cluster.nix | 3 |
6 files changed, 178 insertions, 160 deletions
diff --git a/cluster/prod/app/woodpecker-ci/deploy/server.hcl b/cluster/prod/app/woodpecker-ci/deploy/server.hcl index e0788de..60806b9 100644 --- a/cluster/prod/app/woodpecker-ci/deploy/server.hcl +++ b/cluster/prod/app/woodpecker-ci/deploy/server.hcl @@ -23,7 +23,7 @@ job "woodpecker-ci" { task "server" { driver = "docker" config { - image = "woodpeckerci/woodpecker-server:v2.7.1" + image = "woodpeckerci/woodpecker-server:v3.0.1" ports = [ "web_port", "grpc_port" ] network_mode = "host" } diff --git a/cluster/prod/cluster.nix b/cluster/prod/cluster.nix index 080b258..66da48d 100644 --- a/cluster/prod/cluster.nix +++ b/cluster/prod/cluster.nix @@ -133,6 +133,9 @@ kokakiwi = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFPTsEgcOtb2bij+Ih8eg8ZqO7d3IMiWykv6deMzlSSS kokakiwi@kira" ]; + stitch = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILdT28Emp9yJqTPrxz+oDP08KZaN1kbsNyVqt9p9IMED" + ]; }; # For Garage external communication diff --git a/cluster/staging/app/telemetry/deploy/telemetry-service.hcl b/cluster/staging/app/telemetry/deploy/telemetry-service.hcl index 47554e2..4bc9f8a 100644 --- a/cluster/staging/app/telemetry/deploy/telemetry-service.hcl +++ b/cluster/staging/app/telemetry/deploy/telemetry-service.hcl @@ -2,95 +2,6 @@ job "telemetry-service" { datacenters = ["neptune", "dathomir", "corrin", "bespin"] type = "service" - group "prometheus" { - count = 2 - - network { - port "prometheus" { - static = 9090 - } - } - - constraint { - attribute = "${attr.unique.hostname}" - operator = "set_contains_any" - value = "df-pw5,origan" - } - - task "prometheus" { - driver = "nix2" - config { - nixpkgs = "github:nixos/nixpkgs/nixos-22.11" - packages = [ "#prometheus", "#coreutils", "#findutils", "#bash" ] - command = "prometheus" - args = [ - "--config.file=/etc/prom/prometheus.yml", - "--storage.tsdb.path=/data", - "--storage.tsdb.retention.size=5GB", - ] - bind = { - "/mnt/ssd/prometheus" = "/data" - } - } - - template { - data = file("../config/prometheus.yml") - destination = "etc/prom/prometheus.yml" - } - - template { - data = "{{ key \"secrets/consul/consul-ca.crt\" }}" - destination = "etc/prom/consul.crt" - } - - template { - data = "{{ key \"secrets/consul/consul-client.crt\" }}" - destination = "etc/prom/consul-client.crt" - } - - template { - data = "{{ key \"secrets/consul/consul-client.key\" }}" - destination = "etc/prom/consul-client.key" - } - - template { - data = "{{ key \"secrets/nomad/nomad-ca.crt\" }}" - destination = "etc/prom/nomad-ca.crt" - } - - template { - data = "{{ key \"secrets/nomad/nomad-client.crt\" }}" - destination = "etc/prom/nomad-client.crt" - } - - template { - data = "{{ key \"secrets/nomad/nomad-client.key\" }}" - destination = "etc/prom/nomad-client.key" - } - - resources { - memory = 500 - cpu = 200 - } - - service { - port = "prometheus" - name = "prometheus" - check { - type = "http" - path = "/" - interval = "60s" - timeout = "5s" - check_restart { - limit = 3 - grace = "90s" - ignore_warnings = false - } - } - } - } - } - group "grafana" { count = 1 @@ -106,50 +17,46 @@ job "telemetry-service" { sidecar = false } - driver = "nix2" + driver = "docker" config { - packages = [ "#litestream" ] - command = "litestream" + image = "litestream/litestream:0.3.13" args = [ "restore", "-config", "/etc/litestream.yml", "/ephemeral/grafana.db" ] - bind = { - "../alloc/data" = "/ephemeral", - } + volumes = [ + "../alloc/data:/ephemeral", + "secrets/litestream.yml:/etc/litestream.yml" + ] } + user = "472" template { data = file("../config/grafana-litestream.yml") - destination = "etc/litestream.yml" + destination = "secrets/litestream.yml" } resources { - memory = 100 - memory_max = 1000 + memory = 50 + memory_max = 200 cpu = 100 } } task "grafana" { - driver = "nix2" + driver = "docker" config { - nixpkgs = "github:nixos/nixpkgs/nixos-22.11" - packages = [ "#grafana" ] - command = "grafana-server" - args = [ - "-homepath", "/share/grafana", - "cfg:default.paths.data=/grafana", - "cfg:default.paths.provisioning=/grafana-provisioning" + image = "grafana/grafana:11.4.0" + network_mode = "host" + ports = [ "grafana" ] + volumes = [ + "../alloc/data:/var/lib/grafana", + "secrets/prometheus.yaml:/etc/grafana/provisioning/datasources/prometheus.yaml" ] - - bind = { - "../alloc/data" = "/grafana", - } } template { data = file("../config/grafana-datasource-prometheus.yaml") - destination = "grafana-provisioning/datasources/prometheus.yaml" + destination = "secrets/prometheus.yaml" } template { @@ -163,8 +70,9 @@ GF_SECURITY_ADMIN_PASSWORD={{ key "secrets/telemetry/grafana/admin_password" }} } resources { - memory = 300 - cpu = 300 + memory = 100 + memory_max = 400 + cpu = 300 } restart { @@ -181,9 +89,12 @@ GF_SECURITY_ADMIN_PASSWORD={{ key "secrets/telemetry/grafana/admin_password" }} "tricot grafana.staging.deuxfleurs.org", "d53-cname grafana.staging.deuxfleurs.org", ] - port = "grafana" + port = 3719 + address_mode = "driver" check { type = "tcp" + port = 3719 + address_mode = "driver" interval = "60s" timeout = "5s" check_restart { @@ -196,26 +107,27 @@ GF_SECURITY_ADMIN_PASSWORD={{ key "secrets/telemetry/grafana/admin_password" }} } task "replicate-db" { - driver = "nix2" + driver = "docker" config { - packages = [ "#litestream" ] - command = "litestream" + image = "litestream/litestream:0.3.13" args = [ "replicate", "-config", "/etc/litestream.yml" ] - bind = { - "../alloc/data" = "/ephemeral", - } + volumes = [ + "../alloc/data:/ephemeral", + "secrets/litestream.yml:/etc/litestream.yml" + ] } + user = "472" template { data = file("../config/grafana-litestream.yml") - destination = "etc/litestream.yml" + destination = "secrets/litestream.yml" } resources { - memory = 100 - memory_max = 500 + memory = 50 + memory_max = 200 cpu = 100 } } diff --git a/cluster/staging/app/telemetry/deploy/telemetry-storage.hcl b/cluster/staging/app/telemetry/deploy/telemetry-storage.hcl new file mode 100644 index 0000000..fbde697 --- /dev/null +++ b/cluster/staging/app/telemetry/deploy/telemetry-storage.hcl @@ -0,0 +1,97 @@ +job "telemetry-storage" { + datacenters = ["neptune", "dathomir", "corrin", "bespin"] + type = "service" + + group "prometheus" { + count = 2 + + network { + port "prometheus" { + static = 9090 + } + } + + constraint { + attribute = "${attr.unique.hostname}" + operator = "set_contains_any" + value = "df-pw5,origan" + } + + task "prometheus" { + driver = "docker" + config { + image = "prom/prometheus:v3.1.0" + network_mode = "host" + ports = [ "prometheus" ] + args = [ + "--config.file=/etc/prometheus/prometheus.yml", + "--storage.tsdb.path=/data", + "--storage.tsdb.retention.size=20GB", + ] + volumes = [ + "secrets:/etc/prometheus", + "/mnt/ssd/prometheus:/data" + ] + } + + template { + data = file("../config/prometheus.yml") + destination = "secrets/prometheus.yml" + } + + template { + data = "{{ key \"secrets/consul/consul-ca.crt\" }}" + destination = "secrets/consul.crt" + } + + template { + data = "{{ key \"secrets/consul/consul-client.crt\" }}" + destination = "secrets/consul-client.crt" + } + + template { + data = "{{ key \"secrets/consul/consul-client.key\" }}" + destination = "secrets/consul-client.key" + } + + template { + data = "{{ key \"secrets/nomad/nomad-ca.crt\" }}" + destination = "secrets/nomad-ca.crt" + } + + template { + data = "{{ key \"secrets/nomad/nomad-client.crt\" }}" + destination = "secrets/nomad-client.crt" + } + + template { + data = "{{ key \"secrets/nomad/nomad-client.key\" }}" + destination = "secrets/nomad-client.key" + } + + resources { + memory = 500 + cpu = 200 + } + + service { + port = 9090 + address_mode = "driver" + name = "prometheus" + check { + type = "http" + path = "/" + port = 9090 + address_mode = "driver" + interval = "60s" + timeout = "5s" + check_restart { + limit = 3 + grace = "90s" + ignore_warnings = false + } + } + } + } + } +} diff --git a/cluster/staging/app/telemetry/deploy/telemetry-system.hcl b/cluster/staging/app/telemetry/deploy/telemetry-system.hcl index a97c7b1..9cd254a 100644 --- a/cluster/staging/app/telemetry/deploy/telemetry-system.hcl +++ b/cluster/staging/app/telemetry/deploy/telemetry-system.hcl @@ -4,43 +4,46 @@ job "telemetry-system" { priority = "100" group "collector" { - network { - port "node_exporter" { static = 9100 } - } + network { + port "node_exporter" { static = 9100 } + } - task "node_exporter" { - driver = "nix2" + task "node_exporter" { + driver = "docker" - config { - packages = [ "#prometheus-node-exporter" ] - command = "node_exporter" - args = [ "--path.rootfs=/host" ] - bind_read_only = { - "/" = "/host" - } - } + config { + image = "quay.io/prometheus/node-exporter:v1.8.1" + network_mode = "host" + volumes = [ + "/:/host:ro,rslave" + ] + args = [ "--path.rootfs=/host" ] + } - resources { - cpu = 50 - memory = 40 - } + resources { + cpu = 50 + memory = 40 + } - service { - name = "node-exporter" - tags = [ "telemetry" ] - port = "node_exporter" - check { - type = "http" - path = "/" - interval = "60s" - timeout = "5s" - check_restart { - limit = 3 - grace = "90s" - ignore_warnings = false - } - } - } - } - } -} + service { + tags = [ "telemetry" ] + port = 9100 + address_mode = "driver" + name = "node-exporter" + check { + type = "http" + path = "/" + port = 9100 + address_mode = "driver" + interval = "60s" + timeout = "5s" + check_restart { + limit = 3 + grace = "90s" + ignore_warnings = false + } + } + } + } + } + } diff --git a/cluster/staging/cluster.nix b/cluster/staging/cluster.nix index 26011d2..8a71424 100644 --- a/cluster/staging/cluster.nix +++ b/cluster/staging/cluster.nix @@ -90,6 +90,9 @@ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJX0A2P59or83EKhh32o8XumGz0ToTEsoq89hMbMtr7h" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIB540H9kn+Ocs4Wjc1Y3f3OkHFYEqc5IM/FiCyoVVoh3" ]; + stitch = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILdT28Emp9yJqTPrxz+oDP08KZaN1kbsNyVqt9p9IMED" + ]; }; # For Garage ipv6 communication |