aboutsummaryrefslogtreecommitdiff
path: root/cluster/prod/app/garage
diff options
context:
space:
mode:
authorAlex Auvolat <alex@adnab.me>2023-08-27 13:56:51 +0200
committerAlex Auvolat <alex@adnab.me>2023-08-27 13:56:51 +0200
commitecb4cabcf0ea52226d95f1e0e0f2f5d1695133a5 (patch)
tree1f68760a2e53bacd9a774c2a6edc5117ccb4e8f6 /cluster/prod/app/garage
parent8e304e8f5f365bd9bf851aaf556a15e2f51e8e85 (diff)
downloadnixcfg-ecb4cabcf0ea52226d95f1e0e0f2f5d1695133a5.tar.gz
nixcfg-ecb4cabcf0ea52226d95f1e0e0f2f5d1695133a5.zip
prod garage: add health check using admin api's '/health'
Diffstat (limited to 'cluster/prod/app/garage')
-rw-r--r--cluster/prod/app/garage/deploy/garage.hcl99
1 files changed, 64 insertions, 35 deletions
diff --git a/cluster/prod/app/garage/deploy/garage.hcl b/cluster/prod/app/garage/deploy/garage.hcl
index 26f76de..7ed963c 100644
--- a/cluster/prod/app/garage/deploy/garage.hcl
+++ b/cluster/prod/app/garage/deploy/garage.hcl
@@ -14,7 +14,7 @@ job "garage" {
port "rpc" { static = 3901 }
port "web" { static = 3902 }
port "admin" { static = 3903 }
- port "k2v" { static = 3904 }
+ port "k2v" { static = 3904 }
}
update {
@@ -26,7 +26,6 @@ job "garage" {
task "server" {
driver = "docker"
config {
- advertise_ipv6_address = true
image = "dxflrs/garage:v0.8.2"
command = "/garage"
args = [ "server" ]
@@ -70,6 +69,34 @@ job "garage" {
kill_timeout = "20s"
+ restart {
+ interval = "30m"
+ attempts = 10
+ delay = "15s"
+ mode = "delay"
+ }
+
+ #### Configuration for service ports: admin port (internal use only)
+
+ service {
+ port = "admin"
+ address_mode = "host"
+ name = "garage-admin"
+ # Check that Garage is alive and answering TCP connections
+ check {
+ type = "tcp"
+ interval = "60s"
+ timeout = "5s"
+ check_restart {
+ limit = 3
+ grace = "90s"
+ ignore_warnings = false
+ }
+ }
+ }
+
+ #### Configuration for service ports: externally available ports (API, web)
+
service {
tags = [
"garage_api",
@@ -77,13 +104,13 @@ job "garage" {
"tricot *.garage.deuxfleurs.fr",
"tricot-site-lb",
]
- port = 3900
- address_mode = "driver"
+ port = "s3"
+ address_mode = "host"
name = "garage-api"
+ # Check 1: Garage is alive and answering TCP connections
check {
+ name = "garage-api-live"
type = "tcp"
- port = 3900
- address_mode = "driver"
interval = "60s"
timeout = "5s"
check_restart {
@@ -92,6 +119,15 @@ job "garage" {
ignore_warnings = false
}
}
+ # Check 2: Garage is in a healthy state and requests should be routed here
+ check {
+ name = "garage-api-healthy"
+ port = "admin"
+ type = "http"
+ path = "/health"
+ interval = "60s"
+ timeout = "5s"
+ }
}
service {
@@ -105,13 +141,13 @@ job "garage" {
"tricot-add-header X-Content-Type-Options nosniff",
"tricot-site-lb",
]
- port = 3902
- address_mode = "driver"
+ port = "web"
+ address_mode = "host"
name = "garage-web"
+ # Check 1: Garage is alive and answering TCP connections
check {
+ name = "garage-web-live"
type = "tcp"
- port = 3902
- address_mode = "driver"
interval = "60s"
timeout = "5s"
check_restart {
@@ -120,23 +156,14 @@ job "garage" {
ignore_warnings = false
}
}
- }
-
- service {
- port = 3903
- address_mode = "driver"
- name = "garage-admin"
+ # Check 2: Garage is in a healthy state and requests should be routed here
check {
- type = "tcp"
- port = 3903
- address_mode = "driver"
+ name = "garage-web-healthy"
+ port = "admin"
+ type = "http"
+ path = "/health"
interval = "60s"
timeout = "5s"
- check_restart {
- limit = 3
- grace = "90s"
- ignore_warnings = false
- }
}
}
@@ -146,13 +173,13 @@ job "garage" {
"tricot k2v.deuxfleurs.fr",
"tricot-site-lb",
]
- port = 3904
- address_mode = "driver"
+ port = "k2v"
+ address_mode = "host"
name = "garage-k2v"
+ # Check 1: Garage is alive and answering TCP connections
check {
+ name = "garage-k2v-live"
type = "tcp"
- port = 3904
- address_mode = "driver"
interval = "60s"
timeout = "5s"
check_restart {
@@ -161,13 +188,15 @@ job "garage" {
ignore_warnings = false
}
}
- }
-
- restart {
- interval = "30m"
- attempts = 10
- delay = "15s"
- mode = "delay"
+ # Check 2: Garage is in a healthy state and requests should be routed here
+ check {
+ name = "garage-k2v-healthy"
+ port = "admin"
+ type = "http"
+ path = "/health"
+ interval = "60s"
+ timeout = "5s"
+ }
}
}
}