diff options
author | Baptiste Jonglez <git@bitsofnetworks.org> | 2024-06-08 16:42:49 +0200 |
---|---|---|
committer | Baptiste Jonglez <git@bitsofnetworks.org> | 2024-06-08 16:43:18 +0200 |
commit | 9fc22d72d44e3c987bb9fa589fde8af1626cb98f (patch) | |
tree | 0a18619fa74e147d93f328a6f0d882e113bc8666 /cluster/staging | |
parent | cbb0093f2c6a31a0f2ce3ad1e12f2137f2f18c14 (diff) | |
download | nixcfg-9fc22d72d44e3c987bb9fa589fde8af1626cb98f.tar.gz nixcfg-9fc22d72d44e3c987bb9fa589fde8af1626cb98f.zip |
garage: harmonize staging and prod (checks, services)
Diffstat (limited to 'cluster/staging')
-rw-r--r-- | cluster/staging/app/garage/deploy/garage.hcl | 102 |
1 files changed, 68 insertions, 34 deletions
diff --git a/cluster/staging/app/garage/deploy/garage.hcl b/cluster/staging/app/garage/deploy/garage.hcl index 7a7b44b..b8e7227 100644 --- a/cluster/staging/app/garage/deploy/garage.hcl +++ b/cluster/staging/app/garage/deploy/garage.hcl @@ -73,12 +73,42 @@ job "garage-staging" { kill_signal = "SIGINT" kill_timeout = "20s" + restart { + interval = "5m" + attempts = 10 + delay = "1m" + mode = "delay" + } + service { name = "garage-staging-rpc" tags = ["garage-staging-rpc"] port = "rpc" } + #### Configuration for service ports: admin port (internal use only) + + service { + name = "garage-staging-admin" + tags = [ + "garage-staging-admin", + ] + port = "admin" + check { + name = "garage-tcp-liveness-check" + type = "tcp" + interval = "60s" + timeout = "5s" + check_restart { + limit = 3 + grace = "90s" + ignore_warnings = false + } + } + } + + #### Configuration for service ports: externally available ports (S3 API, K2V, web) + service { name = "garage-staging-s3-api" tags = [ @@ -90,7 +120,21 @@ job "garage-staging" { "tricot-site-lb", ] port = "s3" + # Check 1: Garage is alive and answering TCP connections + check { + name = "garage-staging-api-live" + type = "tcp" + interval = "60s" + timeout = "5s" + check_restart { + limit = 3 + grace = "90s" + ignore_warnings = false + } + } + # Check 2: Garage is in a healthy state and requests should be routed here check { + name = "garage-staging-api-healthy" port = "admin" type = "http" path = "/health" @@ -108,7 +152,21 @@ job "garage-staging" { "tricot-site-lb", ] port = "k2v" + # Check 1: Garage is alive and answering TCP connections + check { + name = "garage-staging-k2v-live" + type = "tcp" + interval = "60s" + timeout = "5s" + check_restart { + limit = 3 + grace = "90s" + ignore_warnings = false + } + } + # Check 2: Garage is in a healthy state and requests should be routed here check { + name = "garage-staging-k2v-healthy" port = "admin" type = "http" path = "/health" @@ -134,52 +192,28 @@ job "garage-staging" { "tricot-site-lb", ] port = "web" + # Check 1: Garage is alive and answering TCP connections check { - port = "admin" - type = "http" - path = "/health" - interval = "60s" - timeout = "5s" - } - } - - service { - name = "garage-staging-admin" - tags = [ - "garage-staging-admin", - ] - port = "admin" - check { - name = "garage-admin-health-check" - type = "http" - path = "/health" + name = "garage-staging-web-live" + type = "tcp" interval = "60s" timeout = "5s" check_restart { - limit = 10 + limit = 3 grace = "90s" - ignore_warnings = true + ignore_warnings = false } } + # Check 2: Garage is in a healthy state and requests should be routed here check { - name = "garage-tcp-liveness-check" - type = "tcp" + name = "garage-staging-web-healthy" + port = "admin" + type = "http" + path = "/health" interval = "60s" timeout = "5s" - check_restart { - limit = 3 - grace = "90s" - ignore_warnings = true - } } } - - restart { - interval = "5m" - attempts = 10 - delay = "1m" - mode = "delay" - } } } } |