From ecb4cabcf0ea52226d95f1e0e0f2f5d1695133a5 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Sun, 27 Aug 2023 13:56:51 +0200 Subject: prod garage: add health check using admin api's '/health' --- cluster/prod/app/garage/deploy/garage.hcl | 99 ++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 35 deletions(-) diff --git a/cluster/prod/app/garage/deploy/garage.hcl b/cluster/prod/app/garage/deploy/garage.hcl index 26f76de..7ed963c 100644 --- a/cluster/prod/app/garage/deploy/garage.hcl +++ b/cluster/prod/app/garage/deploy/garage.hcl @@ -14,7 +14,7 @@ job "garage" { port "rpc" { static = 3901 } port "web" { static = 3902 } port "admin" { static = 3903 } - port "k2v" { static = 3904 } + port "k2v" { static = 3904 } } update { @@ -26,7 +26,6 @@ job "garage" { task "server" { driver = "docker" config { - advertise_ipv6_address = true image = "dxflrs/garage:v0.8.2" command = "/garage" args = [ "server" ] @@ -70,6 +69,34 @@ job "garage" { kill_timeout = "20s" + restart { + interval = "30m" + attempts = 10 + delay = "15s" + mode = "delay" + } + + #### Configuration for service ports: admin port (internal use only) + + service { + port = "admin" + address_mode = "host" + name = "garage-admin" + # Check that Garage is alive and answering TCP connections + check { + type = "tcp" + interval = "60s" + timeout = "5s" + check_restart { + limit = 3 + grace = "90s" + ignore_warnings = false + } + } + } + + #### Configuration for service ports: externally available ports (API, web) + service { tags = [ "garage_api", @@ -77,13 +104,13 @@ job "garage" { "tricot *.garage.deuxfleurs.fr", "tricot-site-lb", ] - port = 3900 - address_mode = "driver" + port = "s3" + address_mode = "host" name = "garage-api" + # Check 1: Garage is alive and answering TCP connections check { + name = "garage-api-live" type = "tcp" - port = 3900 - address_mode = "driver" interval = "60s" timeout = "5s" check_restart { @@ -92,6 +119,15 @@ job "garage" { ignore_warnings = false } } + # Check 2: Garage is in a healthy state and requests should be routed here + check { + name = "garage-api-healthy" + port = "admin" + type = "http" + path = "/health" + interval = "60s" + timeout = "5s" + } } service { @@ -105,13 +141,13 @@ job "garage" { "tricot-add-header X-Content-Type-Options nosniff", "tricot-site-lb", ] - port = 3902 - address_mode = "driver" + port = "web" + address_mode = "host" name = "garage-web" + # Check 1: Garage is alive and answering TCP connections check { + name = "garage-web-live" type = "tcp" - port = 3902 - address_mode = "driver" interval = "60s" timeout = "5s" check_restart { @@ -120,23 +156,14 @@ job "garage" { ignore_warnings = false } } - } - - service { - port = 3903 - address_mode = "driver" - name = "garage-admin" + # Check 2: Garage is in a healthy state and requests should be routed here check { - type = "tcp" - port = 3903 - address_mode = "driver" + name = "garage-web-healthy" + port = "admin" + type = "http" + path = "/health" interval = "60s" timeout = "5s" - check_restart { - limit = 3 - grace = "90s" - ignore_warnings = false - } } } @@ -146,13 +173,13 @@ job "garage" { "tricot k2v.deuxfleurs.fr", "tricot-site-lb", ] - port = 3904 - address_mode = "driver" + port = "k2v" + address_mode = "host" name = "garage-k2v" + # Check 1: Garage is alive and answering TCP connections check { + name = "garage-k2v-live" type = "tcp" - port = 3904 - address_mode = "driver" interval = "60s" timeout = "5s" check_restart { @@ -161,13 +188,15 @@ job "garage" { ignore_warnings = false } } - } - - restart { - interval = "30m" - attempts = 10 - delay = "15s" - mode = "delay" + # Check 2: Garage is in a healthy state and requests should be routed here + check { + name = "garage-k2v-healthy" + port = "admin" + type = "http" + path = "/health" + interval = "60s" + timeout = "5s" + } } } } -- cgit v1.2.3