aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cluster/prod/app/woodpecker-ci/deploy/server.hcl2
-rw-r--r--cluster/prod/cluster.nix3
-rw-r--r--cluster/staging/app/telemetry/deploy/telemetry-service.hcl158
-rw-r--r--cluster/staging/app/telemetry/deploy/telemetry-storage.hcl97
-rw-r--r--cluster/staging/app/telemetry/deploy/telemetry-system.hcl75
-rw-r--r--cluster/staging/cluster.nix3
6 files changed, 178 insertions, 160 deletions
diff --git a/cluster/prod/app/woodpecker-ci/deploy/server.hcl b/cluster/prod/app/woodpecker-ci/deploy/server.hcl
index e0788de..60806b9 100644
--- a/cluster/prod/app/woodpecker-ci/deploy/server.hcl
+++ b/cluster/prod/app/woodpecker-ci/deploy/server.hcl
@@ -23,7 +23,7 @@ job "woodpecker-ci" {
task "server" {
driver = "docker"
config {
- image = "woodpeckerci/woodpecker-server:v2.7.1"
+ image = "woodpeckerci/woodpecker-server:v3.0.1"
ports = [ "web_port", "grpc_port" ]
network_mode = "host"
}
diff --git a/cluster/prod/cluster.nix b/cluster/prod/cluster.nix
index 080b258..66da48d 100644
--- a/cluster/prod/cluster.nix
+++ b/cluster/prod/cluster.nix
@@ -133,6 +133,9 @@
kokakiwi = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFPTsEgcOtb2bij+Ih8eg8ZqO7d3IMiWykv6deMzlSSS kokakiwi@kira"
];
+ stitch = [
+ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILdT28Emp9yJqTPrxz+oDP08KZaN1kbsNyVqt9p9IMED"
+ ];
};
# For Garage external communication
diff --git a/cluster/staging/app/telemetry/deploy/telemetry-service.hcl b/cluster/staging/app/telemetry/deploy/telemetry-service.hcl
index 47554e2..4bc9f8a 100644
--- a/cluster/staging/app/telemetry/deploy/telemetry-service.hcl
+++ b/cluster/staging/app/telemetry/deploy/telemetry-service.hcl
@@ -2,95 +2,6 @@ job "telemetry-service" {
datacenters = ["neptune", "dathomir", "corrin", "bespin"]
type = "service"
- group "prometheus" {
- count = 2
-
- network {
- port "prometheus" {
- static = 9090
- }
- }
-
- constraint {
- attribute = "${attr.unique.hostname}"
- operator = "set_contains_any"
- value = "df-pw5,origan"
- }
-
- task "prometheus" {
- driver = "nix2"
- config {
- nixpkgs = "github:nixos/nixpkgs/nixos-22.11"
- packages = [ "#prometheus", "#coreutils", "#findutils", "#bash" ]
- command = "prometheus"
- args = [
- "--config.file=/etc/prom/prometheus.yml",
- "--storage.tsdb.path=/data",
- "--storage.tsdb.retention.size=5GB",
- ]
- bind = {
- "/mnt/ssd/prometheus" = "/data"
- }
- }
-
- template {
- data = file("../config/prometheus.yml")
- destination = "etc/prom/prometheus.yml"
- }
-
- template {
- data = "{{ key \"secrets/consul/consul-ca.crt\" }}"
- destination = "etc/prom/consul.crt"
- }
-
- template {
- data = "{{ key \"secrets/consul/consul-client.crt\" }}"
- destination = "etc/prom/consul-client.crt"
- }
-
- template {
- data = "{{ key \"secrets/consul/consul-client.key\" }}"
- destination = "etc/prom/consul-client.key"
- }
-
- template {
- data = "{{ key \"secrets/nomad/nomad-ca.crt\" }}"
- destination = "etc/prom/nomad-ca.crt"
- }
-
- template {
- data = "{{ key \"secrets/nomad/nomad-client.crt\" }}"
- destination = "etc/prom/nomad-client.crt"
- }
-
- template {
- data = "{{ key \"secrets/nomad/nomad-client.key\" }}"
- destination = "etc/prom/nomad-client.key"
- }
-
- resources {
- memory = 500
- cpu = 200
- }
-
- service {
- port = "prometheus"
- name = "prometheus"
- check {
- type = "http"
- path = "/"
- interval = "60s"
- timeout = "5s"
- check_restart {
- limit = 3
- grace = "90s"
- ignore_warnings = false
- }
- }
- }
- }
- }
-
group "grafana" {
count = 1
@@ -106,50 +17,46 @@ job "telemetry-service" {
sidecar = false
}
- driver = "nix2"
+ driver = "docker"
config {
- packages = [ "#litestream" ]
- command = "litestream"
+ image = "litestream/litestream:0.3.13"
args = [
"restore", "-config", "/etc/litestream.yml", "/ephemeral/grafana.db"
]
- bind = {
- "../alloc/data" = "/ephemeral",
- }
+ volumes = [
+ "../alloc/data:/ephemeral",
+ "secrets/litestream.yml:/etc/litestream.yml"
+ ]
}
+ user = "472"
template {
data = file("../config/grafana-litestream.yml")
- destination = "etc/litestream.yml"
+ destination = "secrets/litestream.yml"
}
resources {
- memory = 100
- memory_max = 1000
+ memory = 50
+ memory_max = 200
cpu = 100
}
}
task "grafana" {
- driver = "nix2"
+ driver = "docker"
config {
- nixpkgs = "github:nixos/nixpkgs/nixos-22.11"
- packages = [ "#grafana" ]
- command = "grafana-server"
- args = [
- "-homepath", "/share/grafana",
- "cfg:default.paths.data=/grafana",
- "cfg:default.paths.provisioning=/grafana-provisioning"
+ image = "grafana/grafana:11.4.0"
+ network_mode = "host"
+ ports = [ "grafana" ]
+ volumes = [
+ "../alloc/data:/var/lib/grafana",
+ "secrets/prometheus.yaml:/etc/grafana/provisioning/datasources/prometheus.yaml"
]
-
- bind = {
- "../alloc/data" = "/grafana",
- }
}
template {
data = file("../config/grafana-datasource-prometheus.yaml")
- destination = "grafana-provisioning/datasources/prometheus.yaml"
+ destination = "secrets/prometheus.yaml"
}
template {
@@ -163,8 +70,9 @@ GF_SECURITY_ADMIN_PASSWORD={{ key "secrets/telemetry/grafana/admin_password" }}
}
resources {
- memory = 300
- cpu = 300
+ memory = 100
+ memory_max = 400
+ cpu = 300
}
restart {
@@ -181,9 +89,12 @@ GF_SECURITY_ADMIN_PASSWORD={{ key "secrets/telemetry/grafana/admin_password" }}
"tricot grafana.staging.deuxfleurs.org",
"d53-cname grafana.staging.deuxfleurs.org",
]
- port = "grafana"
+ port = 3719
+ address_mode = "driver"
check {
type = "tcp"
+ port = 3719
+ address_mode = "driver"
interval = "60s"
timeout = "5s"
check_restart {
@@ -196,26 +107,27 @@ GF_SECURITY_ADMIN_PASSWORD={{ key "secrets/telemetry/grafana/admin_password" }}
}
task "replicate-db" {
- driver = "nix2"
+ driver = "docker"
config {
- packages = [ "#litestream" ]
- command = "litestream"
+ image = "litestream/litestream:0.3.13"
args = [
"replicate", "-config", "/etc/litestream.yml"
]
- bind = {
- "../alloc/data" = "/ephemeral",
- }
+ volumes = [
+ "../alloc/data:/ephemeral",
+ "secrets/litestream.yml:/etc/litestream.yml"
+ ]
}
+ user = "472"
template {
data = file("../config/grafana-litestream.yml")
- destination = "etc/litestream.yml"
+ destination = "secrets/litestream.yml"
}
resources {
- memory = 100
- memory_max = 500
+ memory = 50
+ memory_max = 200
cpu = 100
}
}
diff --git a/cluster/staging/app/telemetry/deploy/telemetry-storage.hcl b/cluster/staging/app/telemetry/deploy/telemetry-storage.hcl
new file mode 100644
index 0000000..fbde697
--- /dev/null
+++ b/cluster/staging/app/telemetry/deploy/telemetry-storage.hcl
@@ -0,0 +1,97 @@
+job "telemetry-storage" {
+ datacenters = ["neptune", "dathomir", "corrin", "bespin"]
+ type = "service"
+
+ group "prometheus" {
+ count = 2
+
+ network {
+ port "prometheus" {
+ static = 9090
+ }
+ }
+
+ constraint {
+ attribute = "${attr.unique.hostname}"
+ operator = "set_contains_any"
+ value = "df-pw5,origan"
+ }
+
+ task "prometheus" {
+ driver = "docker"
+ config {
+ image = "prom/prometheus:v3.1.0"
+ network_mode = "host"
+ ports = [ "prometheus" ]
+ args = [
+ "--config.file=/etc/prometheus/prometheus.yml",
+ "--storage.tsdb.path=/data",
+ "--storage.tsdb.retention.size=20GB",
+ ]
+ volumes = [
+ "secrets:/etc/prometheus",
+ "/mnt/ssd/prometheus:/data"
+ ]
+ }
+
+ template {
+ data = file("../config/prometheus.yml")
+ destination = "secrets/prometheus.yml"
+ }
+
+ template {
+ data = "{{ key \"secrets/consul/consul-ca.crt\" }}"
+ destination = "secrets/consul.crt"
+ }
+
+ template {
+ data = "{{ key \"secrets/consul/consul-client.crt\" }}"
+ destination = "secrets/consul-client.crt"
+ }
+
+ template {
+ data = "{{ key \"secrets/consul/consul-client.key\" }}"
+ destination = "secrets/consul-client.key"
+ }
+
+ template {
+ data = "{{ key \"secrets/nomad/nomad-ca.crt\" }}"
+ destination = "secrets/nomad-ca.crt"
+ }
+
+ template {
+ data = "{{ key \"secrets/nomad/nomad-client.crt\" }}"
+ destination = "secrets/nomad-client.crt"
+ }
+
+ template {
+ data = "{{ key \"secrets/nomad/nomad-client.key\" }}"
+ destination = "secrets/nomad-client.key"
+ }
+
+ resources {
+ memory = 500
+ cpu = 200
+ }
+
+ service {
+ port = 9090
+ address_mode = "driver"
+ name = "prometheus"
+ check {
+ type = "http"
+ path = "/"
+ port = 9090
+ address_mode = "driver"
+ interval = "60s"
+ timeout = "5s"
+ check_restart {
+ limit = 3
+ grace = "90s"
+ ignore_warnings = false
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/cluster/staging/app/telemetry/deploy/telemetry-system.hcl b/cluster/staging/app/telemetry/deploy/telemetry-system.hcl
index a97c7b1..9cd254a 100644
--- a/cluster/staging/app/telemetry/deploy/telemetry-system.hcl
+++ b/cluster/staging/app/telemetry/deploy/telemetry-system.hcl
@@ -4,43 +4,46 @@ job "telemetry-system" {
priority = "100"
group "collector" {
- network {
- port "node_exporter" { static = 9100 }
- }
+ network {
+ port "node_exporter" { static = 9100 }
+ }
- task "node_exporter" {
- driver = "nix2"
+ task "node_exporter" {
+ driver = "docker"
- config {
- packages = [ "#prometheus-node-exporter" ]
- command = "node_exporter"
- args = [ "--path.rootfs=/host" ]
- bind_read_only = {
- "/" = "/host"
- }
- }
+ config {
+ image = "quay.io/prometheus/node-exporter:v1.8.1"
+ network_mode = "host"
+ volumes = [
+ "/:/host:ro,rslave"
+ ]
+ args = [ "--path.rootfs=/host" ]
+ }
- resources {
- cpu = 50
- memory = 40
- }
+ resources {
+ cpu = 50
+ memory = 40
+ }
- service {
- name = "node-exporter"
- tags = [ "telemetry" ]
- port = "node_exporter"
- check {
- type = "http"
- path = "/"
- interval = "60s"
- timeout = "5s"
- check_restart {
- limit = 3
- grace = "90s"
- ignore_warnings = false
- }
- }
- }
- }
- }
-}
+ service {
+ tags = [ "telemetry" ]
+ port = 9100
+ address_mode = "driver"
+ name = "node-exporter"
+ check {
+ type = "http"
+ path = "/"
+ port = 9100
+ address_mode = "driver"
+ interval = "60s"
+ timeout = "5s"
+ check_restart {
+ limit = 3
+ grace = "90s"
+ ignore_warnings = false
+ }
+ }
+ }
+ }
+ }
+ }
diff --git a/cluster/staging/cluster.nix b/cluster/staging/cluster.nix
index 26011d2..8a71424 100644
--- a/cluster/staging/cluster.nix
+++ b/cluster/staging/cluster.nix
@@ -90,6 +90,9 @@
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJX0A2P59or83EKhh32o8XumGz0ToTEsoq89hMbMtr7h"
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIB540H9kn+Ocs4Wjc1Y3f3OkHFYEqc5IM/FiCyoVVoh3"
];
+ stitch = [
+ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILdT28Emp9yJqTPrxz+oDP08KZaN1kbsNyVqt9p9IMED"
+ ];
};
# For Garage ipv6 communication