aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBaptiste Jonglez <git@bitsofnetworks.org>2024-06-08 16:42:49 +0200
committerBaptiste Jonglez <git@bitsofnetworks.org>2024-06-08 16:43:18 +0200
commit9fc22d72d44e3c987bb9fa589fde8af1626cb98f (patch)
tree0a18619fa74e147d93f328a6f0d882e113bc8666
parentcbb0093f2c6a31a0f2ce3ad1e12f2137f2f18c14 (diff)
downloadnixcfg-9fc22d72d44e3c987bb9fa589fde8af1626cb98f.tar.gz
nixcfg-9fc22d72d44e3c987bb9fa589fde8af1626cb98f.zip
garage: harmonize staging and prod (checks, services)
-rw-r--r--cluster/prod/app/garage/deploy/garage.hcl73
-rw-r--r--cluster/staging/app/garage/deploy/garage.hcl102
2 files changed, 104 insertions, 71 deletions
diff --git a/cluster/prod/app/garage/deploy/garage.hcl b/cluster/prod/app/garage/deploy/garage.hcl
index b6ea7f6..aad5c9b 100644
--- a/cluster/prod/app/garage/deploy/garage.hcl
+++ b/cluster/prod/app/garage/deploy/garage.hcl
@@ -80,9 +80,9 @@ job "garage" {
#### Configuration for service ports: admin port (internal use only)
service {
+ name = "garage-admin"
port = "admin"
address_mode = "host"
- name = "garage-admin"
# Check that Garage is alive and answering TCP connections
check {
type = "tcp"
@@ -96,9 +96,10 @@ job "garage" {
}
}
- #### Configuration for service ports: externally available ports (API, web)
+ #### Configuration for service ports: externally available ports (S3 API, K2V, web)
service {
+ name = "garage-api"
tags = [
"garage_api",
"tricot garage.deuxfleurs.fr",
@@ -107,7 +108,6 @@ job "garage" {
]
port = "s3"
address_mode = "host"
- name = "garage-api"
# Check 1: Garage is alive and answering TCP connections
check {
name = "garage-api-live"
@@ -132,6 +132,39 @@ job "garage" {
}
service {
+ name = "garage-k2v"
+ tags = [
+ "garage_k2v",
+ "tricot k2v.deuxfleurs.fr",
+ "tricot-site-lb",
+ ]
+ port = "k2v"
+ address_mode = "host"
+ # Check 1: Garage is alive and answering TCP connections
+ check {
+ name = "garage-k2v-live"
+ type = "tcp"
+ interval = "60s"
+ timeout = "5s"
+ check_restart {
+ limit = 3
+ grace = "90s"
+ ignore_warnings = false
+ }
+ }
+ # Check 2: Garage is in a healthy state and requests should be routed here
+ check {
+ name = "garage-k2v-healthy"
+ port = "admin"
+ type = "http"
+ path = "/health"
+ interval = "60s"
+ timeout = "5s"
+ }
+ }
+
+ service {
+ name = "garage-web"
tags = [
"garage-web",
"tricot * 1",
@@ -144,7 +177,6 @@ job "garage" {
]
port = "web"
address_mode = "host"
- name = "garage-web"
# Check 1: Garage is alive and answering TCP connections
check {
name = "garage-web-live"
@@ -183,39 +215,6 @@ job "garage" {
port = "web"
on_update = "ignore"
}
-
-
- service {
- tags = [
- "garage_k2v",
- "tricot k2v.deuxfleurs.fr",
- "tricot-site-lb",
- ]
- port = "k2v"
- address_mode = "host"
- name = "garage-k2v"
- # Check 1: Garage is alive and answering TCP connections
- check {
- name = "garage-k2v-live"
- type = "tcp"
- interval = "60s"
- timeout = "5s"
- check_restart {
- limit = 3
- grace = "90s"
- ignore_warnings = false
- }
- }
- # Check 2: Garage is in a healthy state and requests should be routed here
- check {
- name = "garage-k2v-healthy"
- port = "admin"
- type = "http"
- path = "/health"
- interval = "60s"
- timeout = "5s"
- }
- }
}
}
}
diff --git a/cluster/staging/app/garage/deploy/garage.hcl b/cluster/staging/app/garage/deploy/garage.hcl
index 7a7b44b..b8e7227 100644
--- a/cluster/staging/app/garage/deploy/garage.hcl
+++ b/cluster/staging/app/garage/deploy/garage.hcl
@@ -73,12 +73,42 @@ job "garage-staging" {
kill_signal = "SIGINT"
kill_timeout = "20s"
+ restart {
+ interval = "5m"
+ attempts = 10
+ delay = "1m"
+ mode = "delay"
+ }
+
service {
name = "garage-staging-rpc"
tags = ["garage-staging-rpc"]
port = "rpc"
}
+ #### Configuration for service ports: admin port (internal use only)
+
+ service {
+ name = "garage-staging-admin"
+ tags = [
+ "garage-staging-admin",
+ ]
+ port = "admin"
+ check {
+ name = "garage-tcp-liveness-check"
+ type = "tcp"
+ interval = "60s"
+ timeout = "5s"
+ check_restart {
+ limit = 3
+ grace = "90s"
+ ignore_warnings = false
+ }
+ }
+ }
+
+ #### Configuration for service ports: externally available ports (S3 API, K2V, web)
+
service {
name = "garage-staging-s3-api"
tags = [
@@ -90,7 +120,21 @@ job "garage-staging" {
"tricot-site-lb",
]
port = "s3"
+ # Check 1: Garage is alive and answering TCP connections
+ check {
+ name = "garage-staging-api-live"
+ type = "tcp"
+ interval = "60s"
+ timeout = "5s"
+ check_restart {
+ limit = 3
+ grace = "90s"
+ ignore_warnings = false
+ }
+ }
+ # Check 2: Garage is in a healthy state and requests should be routed here
check {
+ name = "garage-staging-api-healthy"
port = "admin"
type = "http"
path = "/health"
@@ -108,7 +152,21 @@ job "garage-staging" {
"tricot-site-lb",
]
port = "k2v"
+ # Check 1: Garage is alive and answering TCP connections
+ check {
+ name = "garage-staging-k2v-live"
+ type = "tcp"
+ interval = "60s"
+ timeout = "5s"
+ check_restart {
+ limit = 3
+ grace = "90s"
+ ignore_warnings = false
+ }
+ }
+ # Check 2: Garage is in a healthy state and requests should be routed here
check {
+ name = "garage-staging-k2v-healthy"
port = "admin"
type = "http"
path = "/health"
@@ -134,52 +192,28 @@ job "garage-staging" {
"tricot-site-lb",
]
port = "web"
+ # Check 1: Garage is alive and answering TCP connections
check {
- port = "admin"
- type = "http"
- path = "/health"
- interval = "60s"
- timeout = "5s"
- }
- }
-
- service {
- name = "garage-staging-admin"
- tags = [
- "garage-staging-admin",
- ]
- port = "admin"
- check {
- name = "garage-admin-health-check"
- type = "http"
- path = "/health"
+ name = "garage-staging-web-live"
+ type = "tcp"
interval = "60s"
timeout = "5s"
check_restart {
- limit = 10
+ limit = 3
grace = "90s"
- ignore_warnings = true
+ ignore_warnings = false
}
}
+ # Check 2: Garage is in a healthy state and requests should be routed here
check {
- name = "garage-tcp-liveness-check"
- type = "tcp"
+ name = "garage-staging-web-healthy"
+ port = "admin"
+ type = "http"
+ path = "/health"
interval = "60s"
timeout = "5s"
- check_restart {
- limit = 3
- grace = "90s"
- ignore_warnings = true
- }
}
}
-
- restart {
- interval = "5m"
- attempts = 10
- delay = "1m"
- mode = "delay"
- }
}
}
}