aboutsummaryrefslogtreecommitdiff
path: root/cluster
diff options
context:
space:
mode:
Diffstat (limited to 'cluster')
-rw-r--r--cluster/prod/app/plume/config/app.env2
-rw-r--r--cluster/prod/app/plume/deploy/plume.hcl58
-rw-r--r--cluster/prod/app/telemetry/deploy/telemetry-service.hcl6
-rw-r--r--cluster/prod/app/woodpecker-ci/deploy/server.hcl2
-rw-r--r--cluster/prod/app/woodpecker-ci/integration/docker-compose.yml2
-rw-r--r--cluster/prod/cluster.nix3
-rw-r--r--cluster/staging/app/telemetry/deploy/telemetry-service.hcl158
-rw-r--r--cluster/staging/app/telemetry/deploy/telemetry-storage.hcl97
-rw-r--r--cluster/staging/app/telemetry/deploy/telemetry-system.hcl75
-rw-r--r--cluster/staging/cluster.nix3
10 files changed, 230 insertions, 176 deletions
diff --git a/cluster/prod/app/plume/config/app.env b/cluster/prod/app/plume/config/app.env
index b663d81..36000c2 100644
--- a/cluster/prod/app/plume/config/app.env
+++ b/cluster/prod/app/plume/config/app.env
@@ -28,7 +28,7 @@ MIGRATION_DIRECTORY=migrations/postgres
USE_HTTPS=0
ROCKET_ADDRESS=::
-ROCKET_PORT={{ env "NOMAD_PORT_web_port" }}
+ROCKET_PORT={{ env "NOMAD_PORT_back_port" }}
MEDIA_UPLOAD_DIRECTORY=/app/static/media
SEARCH_INDEX=/app/search_index
diff --git a/cluster/prod/app/plume/deploy/plume.hcl b/cluster/prod/app/plume/deploy/plume.hcl
index d9e276e..c759a02 100644
--- a/cluster/prod/app/plume/deploy/plume.hcl
+++ b/cluster/prod/app/plume/deploy/plume.hcl
@@ -6,7 +6,45 @@ job "plume-blog" {
count = 1
network {
- port "web_port" { }
+ port "back_port" { }
+ port "cache_port" { }
+ }
+
+ task "varnish" {
+ driver = "docker"
+ config {
+ image = "varnish:7.6.1"
+ network_mode = "host"
+ ports = [ "cache_port" ]
+
+ # cache
+ mount {
+ type = "tmpfs"
+ target = "/var/lib/varnish/varnishd:exec"
+ readonly = false
+ tmpfs_options {
+ size = 2684354559 # 2.5GB in bytes
+ }
+ }
+ }
+
+ env {
+ VARNISH_SIZE = "2G"
+ VARNISH_BACKEND_HOST = "localhost"
+ VARNISH_BACKEND_PORT = "${NOMAD_PORT_back_port}"
+ VARNISH_HTTP_PORT = "${NOMAD_PORT_cache_port}"
+ }
+
+ service {
+ name = "plume-cache"
+ tags = [
+ "plume",
+ "tricot plume.deuxfleurs.fr",
+ "d53-cname plume.deuxfleurs.fr",
+ ]
+ port = "cache_port"
+ address_mode = "host"
+ }
}
task "plume" {
@@ -14,9 +52,9 @@ job "plume-blog" {
config {
image = "lxpz/plume_s3:v1"
network_mode = "host"
- ports = [ "web_port" ]
+ ports = [ "back_port" ]
command = "sh"
- args = [ "-c", "plm search init; plm search refill; plume" ]
+ args = [ "-c", "plm search init; plume" ]
}
template {
@@ -26,24 +64,22 @@ job "plume-blog" {
}
resources {
- memory = 1024
- memory_max = 1024
+ memory = 512
+ memory_max = 512
cpu = 100
}
service {
- name = "plume"
+ name = "plume-back"
tags = [
"plume",
- "tricot plume.deuxfleurs.fr",
- "d53-cname plume.deuxfleurs.fr",
]
- port = "web_port"
+ port = "back_port"
address_mode = "host"
check {
type = "http"
protocol = "http"
- port = "web_port"
+ port = "back_port"
path = "/"
interval = "60s"
timeout = "5s"
@@ -55,7 +91,7 @@ job "plume-blog" {
}
}
restart {
- interval = "30m"
+ interval = "20m"
attempts = 20
delay = "15s"
mode = "delay"
diff --git a/cluster/prod/app/telemetry/deploy/telemetry-service.hcl b/cluster/prod/app/telemetry/deploy/telemetry-service.hcl
index 8b120e6..0744abc 100644
--- a/cluster/prod/app/telemetry/deploy/telemetry-service.hcl
+++ b/cluster/prod/app/telemetry/deploy/telemetry-service.hcl
@@ -45,7 +45,7 @@ job "telemetry-service" {
task "grafana" {
driver = "docker"
config {
- image = "grafana/grafana:11.4.0"
+ image = "grafana/grafana:11.4.1"
network_mode = "host"
ports = [ "grafana" ]
volumes = [
@@ -76,9 +76,9 @@ EOH
}
resources {
- memory = 100
+ memory = 200
memory_max = 400
- cpu = 500
+ cpu = 300
}
service {
diff --git a/cluster/prod/app/woodpecker-ci/deploy/server.hcl b/cluster/prod/app/woodpecker-ci/deploy/server.hcl
index e0788de..60806b9 100644
--- a/cluster/prod/app/woodpecker-ci/deploy/server.hcl
+++ b/cluster/prod/app/woodpecker-ci/deploy/server.hcl
@@ -23,7 +23,7 @@ job "woodpecker-ci" {
task "server" {
driver = "docker"
config {
- image = "woodpeckerci/woodpecker-server:v2.7.1"
+ image = "woodpeckerci/woodpecker-server:v3.0.1"
ports = [ "web_port", "grpc_port" ]
network_mode = "host"
}
diff --git a/cluster/prod/app/woodpecker-ci/integration/docker-compose.yml b/cluster/prod/app/woodpecker-ci/integration/docker-compose.yml
index 7b825df..5756b25 100644
--- a/cluster/prod/app/woodpecker-ci/integration/docker-compose.yml
+++ b/cluster/prod/app/woodpecker-ci/integration/docker-compose.yml
@@ -10,7 +10,7 @@ services:
- "./nix.conf:/etc/nix/nix.conf:ro"
woodpecker-runner:
- image: woodpeckerci/woodpecker-agent:v2.4.1
+ image: woodpeckerci/woodpecker-agent:v3.0.1
restart: always
environment:
# -- change these for each agent
diff --git a/cluster/prod/cluster.nix b/cluster/prod/cluster.nix
index 080b258..66da48d 100644
--- a/cluster/prod/cluster.nix
+++ b/cluster/prod/cluster.nix
@@ -133,6 +133,9 @@
kokakiwi = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFPTsEgcOtb2bij+Ih8eg8ZqO7d3IMiWykv6deMzlSSS kokakiwi@kira"
];
+ stitch = [
+ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILdT28Emp9yJqTPrxz+oDP08KZaN1kbsNyVqt9p9IMED"
+ ];
};
# For Garage external communication
diff --git a/cluster/staging/app/telemetry/deploy/telemetry-service.hcl b/cluster/staging/app/telemetry/deploy/telemetry-service.hcl
index 47554e2..5fcaa7a 100644
--- a/cluster/staging/app/telemetry/deploy/telemetry-service.hcl
+++ b/cluster/staging/app/telemetry/deploy/telemetry-service.hcl
@@ -2,95 +2,6 @@ job "telemetry-service" {
datacenters = ["neptune", "dathomir", "corrin", "bespin"]
type = "service"
- group "prometheus" {
- count = 2
-
- network {
- port "prometheus" {
- static = 9090
- }
- }
-
- constraint {
- attribute = "${attr.unique.hostname}"
- operator = "set_contains_any"
- value = "df-pw5,origan"
- }
-
- task "prometheus" {
- driver = "nix2"
- config {
- nixpkgs = "github:nixos/nixpkgs/nixos-22.11"
- packages = [ "#prometheus", "#coreutils", "#findutils", "#bash" ]
- command = "prometheus"
- args = [
- "--config.file=/etc/prom/prometheus.yml",
- "--storage.tsdb.path=/data",
- "--storage.tsdb.retention.size=5GB",
- ]
- bind = {
- "/mnt/ssd/prometheus" = "/data"
- }
- }
-
- template {
- data = file("../config/prometheus.yml")
- destination = "etc/prom/prometheus.yml"
- }
-
- template {
- data = "{{ key \"secrets/consul/consul-ca.crt\" }}"
- destination = "etc/prom/consul.crt"
- }
-
- template {
- data = "{{ key \"secrets/consul/consul-client.crt\" }}"
- destination = "etc/prom/consul-client.crt"
- }
-
- template {
- data = "{{ key \"secrets/consul/consul-client.key\" }}"
- destination = "etc/prom/consul-client.key"
- }
-
- template {
- data = "{{ key \"secrets/nomad/nomad-ca.crt\" }}"
- destination = "etc/prom/nomad-ca.crt"
- }
-
- template {
- data = "{{ key \"secrets/nomad/nomad-client.crt\" }}"
- destination = "etc/prom/nomad-client.crt"
- }
-
- template {
- data = "{{ key \"secrets/nomad/nomad-client.key\" }}"
- destination = "etc/prom/nomad-client.key"
- }
-
- resources {
- memory = 500
- cpu = 200
- }
-
- service {
- port = "prometheus"
- name = "prometheus"
- check {
- type = "http"
- path = "/"
- interval = "60s"
- timeout = "5s"
- check_restart {
- limit = 3
- grace = "90s"
- ignore_warnings = false
- }
- }
- }
- }
- }
-
group "grafana" {
count = 1
@@ -106,50 +17,46 @@ job "telemetry-service" {
sidecar = false
}
- driver = "nix2"
+ driver = "docker"
config {
- packages = [ "#litestream" ]
- command = "litestream"
+ image = "litestream/litestream:0.3.13"
args = [
"restore", "-config", "/etc/litestream.yml", "/ephemeral/grafana.db"
]
- bind = {
- "../alloc/data" = "/ephemeral",
- }
+ volumes = [
+ "../alloc/data:/ephemeral",
+ "secrets/litestream.yml:/etc/litestream.yml"
+ ]
}
+ user = "472"
template {
data = file("../config/grafana-litestream.yml")
- destination = "etc/litestream.yml"
+ destination = "secrets/litestream.yml"
}
resources {
- memory = 100
- memory_max = 1000
+ memory = 50
+ memory_max = 200
cpu = 100
}
}
task "grafana" {
- driver = "nix2"
+ driver = "docker"
config {
- nixpkgs = "github:nixos/nixpkgs/nixos-22.11"
- packages = [ "#grafana" ]
- command = "grafana-server"
- args = [
- "-homepath", "/share/grafana",
- "cfg:default.paths.data=/grafana",
- "cfg:default.paths.provisioning=/grafana-provisioning"
+ image = "grafana/grafana:11.4.1"
+ network_mode = "host"
+ ports = [ "grafana" ]
+ volumes = [
+ "../alloc/data:/var/lib/grafana",
+ "secrets/prometheus.yaml:/etc/grafana/provisioning/datasources/prometheus.yaml"
]
-
- bind = {
- "../alloc/data" = "/grafana",
- }
}
template {
data = file("../config/grafana-datasource-prometheus.yaml")
- destination = "grafana-provisioning/datasources/prometheus.yaml"
+ destination = "secrets/prometheus.yaml"
}
template {
@@ -163,8 +70,9 @@ GF_SECURITY_ADMIN_PASSWORD={{ key "secrets/telemetry/grafana/admin_password" }}
}
resources {
- memory = 300
- cpu = 300
+ memory = 100
+ memory_max = 400
+ cpu = 300
}
restart {
@@ -181,9 +89,12 @@ GF_SECURITY_ADMIN_PASSWORD={{ key "secrets/telemetry/grafana/admin_password" }}
"tricot grafana.staging.deuxfleurs.org",
"d53-cname grafana.staging.deuxfleurs.org",
]
- port = "grafana"
+ port = 3719
+ address_mode = "driver"
check {
type = "tcp"
+ port = 3719
+ address_mode = "driver"
interval = "60s"
timeout = "5s"
check_restart {
@@ -196,26 +107,27 @@ GF_SECURITY_ADMIN_PASSWORD={{ key "secrets/telemetry/grafana/admin_password" }}
}
task "replicate-db" {
- driver = "nix2"
+ driver = "docker"
config {
- packages = [ "#litestream" ]
- command = "litestream"
+ image = "litestream/litestream:0.3.13"
args = [
"replicate", "-config", "/etc/litestream.yml"
]
- bind = {
- "../alloc/data" = "/ephemeral",
- }
+ volumes = [
+ "../alloc/data:/ephemeral",
+ "secrets/litestream.yml:/etc/litestream.yml"
+ ]
}
+ user = "472"
template {
data = file("../config/grafana-litestream.yml")
- destination = "etc/litestream.yml"
+ destination = "secrets/litestream.yml"
}
resources {
- memory = 100
- memory_max = 500
+ memory = 50
+ memory_max = 200
cpu = 100
}
}
diff --git a/cluster/staging/app/telemetry/deploy/telemetry-storage.hcl b/cluster/staging/app/telemetry/deploy/telemetry-storage.hcl
new file mode 100644
index 0000000..fbde697
--- /dev/null
+++ b/cluster/staging/app/telemetry/deploy/telemetry-storage.hcl
@@ -0,0 +1,97 @@
+job "telemetry-storage" {
+ datacenters = ["neptune", "dathomir", "corrin", "bespin"]
+ type = "service"
+
+ group "prometheus" {
+ count = 2
+
+ network {
+ port "prometheus" {
+ static = 9090
+ }
+ }
+
+ constraint {
+ attribute = "${attr.unique.hostname}"
+ operator = "set_contains_any"
+ value = "df-pw5,origan"
+ }
+
+ task "prometheus" {
+ driver = "docker"
+ config {
+ image = "prom/prometheus:v3.1.0"
+ network_mode = "host"
+ ports = [ "prometheus" ]
+ args = [
+ "--config.file=/etc/prometheus/prometheus.yml",
+ "--storage.tsdb.path=/data",
+ "--storage.tsdb.retention.size=20GB",
+ ]
+ volumes = [
+ "secrets:/etc/prometheus",
+ "/mnt/ssd/prometheus:/data"
+ ]
+ }
+
+ template {
+ data = file("../config/prometheus.yml")
+ destination = "secrets/prometheus.yml"
+ }
+
+ template {
+ data = "{{ key \"secrets/consul/consul-ca.crt\" }}"
+ destination = "secrets/consul.crt"
+ }
+
+ template {
+ data = "{{ key \"secrets/consul/consul-client.crt\" }}"
+ destination = "secrets/consul-client.crt"
+ }
+
+ template {
+ data = "{{ key \"secrets/consul/consul-client.key\" }}"
+ destination = "secrets/consul-client.key"
+ }
+
+ template {
+ data = "{{ key \"secrets/nomad/nomad-ca.crt\" }}"
+ destination = "secrets/nomad-ca.crt"
+ }
+
+ template {
+ data = "{{ key \"secrets/nomad/nomad-client.crt\" }}"
+ destination = "secrets/nomad-client.crt"
+ }
+
+ template {
+ data = "{{ key \"secrets/nomad/nomad-client.key\" }}"
+ destination = "secrets/nomad-client.key"
+ }
+
+ resources {
+ memory = 500
+ cpu = 200
+ }
+
+ service {
+ port = 9090
+ address_mode = "driver"
+ name = "prometheus"
+ check {
+ type = "http"
+ path = "/"
+ port = 9090
+ address_mode = "driver"
+ interval = "60s"
+ timeout = "5s"
+ check_restart {
+ limit = 3
+ grace = "90s"
+ ignore_warnings = false
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/cluster/staging/app/telemetry/deploy/telemetry-system.hcl b/cluster/staging/app/telemetry/deploy/telemetry-system.hcl
index a97c7b1..9cd254a 100644
--- a/cluster/staging/app/telemetry/deploy/telemetry-system.hcl
+++ b/cluster/staging/app/telemetry/deploy/telemetry-system.hcl
@@ -4,43 +4,46 @@ job "telemetry-system" {
priority = "100"
group "collector" {
- network {
- port "node_exporter" { static = 9100 }
- }
+ network {
+ port "node_exporter" { static = 9100 }
+ }
- task "node_exporter" {
- driver = "nix2"
+ task "node_exporter" {
+ driver = "docker"
- config {
- packages = [ "#prometheus-node-exporter" ]
- command = "node_exporter"
- args = [ "--path.rootfs=/host" ]
- bind_read_only = {
- "/" = "/host"
- }
- }
+ config {
+ image = "quay.io/prometheus/node-exporter:v1.8.1"
+ network_mode = "host"
+ volumes = [
+ "/:/host:ro,rslave"
+ ]
+ args = [ "--path.rootfs=/host" ]
+ }
- resources {
- cpu = 50
- memory = 40
- }
+ resources {
+ cpu = 50
+ memory = 40
+ }
- service {
- name = "node-exporter"
- tags = [ "telemetry" ]
- port = "node_exporter"
- check {
- type = "http"
- path = "/"
- interval = "60s"
- timeout = "5s"
- check_restart {
- limit = 3
- grace = "90s"
- ignore_warnings = false
- }
- }
- }
- }
- }
-}
+ service {
+ tags = [ "telemetry" ]
+ port = 9100
+ address_mode = "driver"
+ name = "node-exporter"
+ check {
+ type = "http"
+ path = "/"
+ port = 9100
+ address_mode = "driver"
+ interval = "60s"
+ timeout = "5s"
+ check_restart {
+ limit = 3
+ grace = "90s"
+ ignore_warnings = false
+ }
+ }
+ }
+ }
+ }
+ }
diff --git a/cluster/staging/cluster.nix b/cluster/staging/cluster.nix
index 26011d2..8a71424 100644
--- a/cluster/staging/cluster.nix
+++ b/cluster/staging/cluster.nix
@@ -90,6 +90,9 @@
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJX0A2P59or83EKhh32o8XumGz0ToTEsoq89hMbMtr7h"
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIB540H9kn+Ocs4Wjc1Y3f3OkHFYEqc5IM/FiCyoVVoh3"
];
+ stitch = [
+ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILdT28Emp9yJqTPrxz+oDP08KZaN1kbsNyVqt9p9IMED"
+ ];
};
# For Garage ipv6 communication