aboutsummaryrefslogblamecommitdiff
path: root/cluster/staging/app/telemetry/deploy/telemetry.hcl
blob: 7ee289d601be1ee7b8fb32d9ed6a895dff0c4c15 (plain) (tree)
1
2
3
4
5
6
7
8
9
10


                           
 
                      
             

             

                         
       

     





                                           
                       
                     
              


                                                                         
                
                                                   
                                      
                                              
         


                                         


                
                                               
                                               



                                                        
                                           



                                                               
                                                  



                                                               
                                                  


                 
                    
                 
       
 
               
                           
                           
               

                       







                                   
       

     
 

                   
 

                      
                     


       





                         
                     
              

                                    


                                                                              


                                         
       


                                                       
                                          


                 
                    
                         
                 


       
                    
                     
              






                                                                
         



                                       


                
                                                                   
                                                                        



                    




                                                                                                                                                    


                 
                    
                   
       

               
                        
                

                                                  
                                                     
         
                        

                      








                                   

     
                         
                     
              

                                    


                                                       


                                         
       


                                                       
                                          


                 
                    
                        


                 
   

























                                                                             
                  













                                     



                                                    


















































                                   
 
job "telemetry" {
  datacenters = ["neptune"]
  type = "service"

  group "prometheus" {
    count = 2

    network {
      port "prometheus" {
        static = 9090
      }
    }

    constraint {
      attribute = "${attr.unique.hostname}"
      operator = "set_contains_any"
      value = "cariacou,carcajou"
    }

    task "prometheus" {
      driver = "nix2"
      config {
        nixpkgs = "github:nixos/nixpkgs/nixos-22.11"
        packages = [ "#prometheus", "#coreutils", "#findutils", "#bash" ]
        command = "prometheus"
        args = [
          "--config.file=/etc/prom/prometheus.yml",
          "--storage.tsdb.path=/data",
          "--storage.tsdb.retention.size=5GB",
        ]
        bind = {
          "/mnt/ssd/prometheus" = "/data"
        }
      }

      template {
        data = file("../config/prometheus.yml")
        destination = "etc/prom/prometheus.yml"
      }

      template {
        data = "{{ key \"secrets/consul/consul.crt\" }}"
        destination = "etc/prom/consul.crt"
      }

      template {
        data = "{{ key \"secrets/consul/consul-client.crt\" }}"
        destination = "etc/prom/consul-client.crt"
      }

      template {
        data = "{{ key \"secrets/consul/consul-client.key\" }}"
        destination = "etc/prom/consul-client.key"
      }

      resources {
        memory = 500
        cpu = 200
      }

      service {
        port = "prometheus"
        name = "prometheus"
        check {
          type = "http"
          path = "/"
          interval = "60s"
          timeout = "5s"
          check_restart {
            limit = 3
            grace = "90s"
            ignore_warnings = false
          }
        }
      }
    }
  }

  group "grafana" {
    count = 1

    network {
      port "grafana" {
        static = 3719
      }
    }

    task "restore-db" {
      lifecycle {
        hook = "prestart"
        sidecar = false
      }

      driver = "nix2"
      config {
        packages = [ "#litestream" ]
        command = "litestream"
        args = [
          "restore", "-config", "/etc/litestream.yml", "/ephemeral/grafana.db"
        ]
        bind = {
          "../alloc/data" = "/ephemeral",
        }
      }

      template {
        data = file("../config/grafana-litestream.yml")
        destination = "etc/litestream.yml"
      }

      resources {
        memory = 100
        memory_max = 1000
        cpu = 100
      }
    }

    task "grafana" {
      driver = "nix2"
      config {
        nixpkgs = "github:nixos/nixpkgs/nixos-22.11"
        packages = [ "#grafana" ]
        command = "grafana-server"
        args = [
          "-homepath", "/share/grafana",
          "cfg:default.paths.data=/grafana",
          "cfg:default.paths.provisioning=/grafana-provisioning"
        ]

        bind = {
          "../alloc/data" = "/grafana",
        }
      }

      template {
        data = file("../config/grafana-datasource-prometheus.yaml")
        destination = "grafana-provisioning/datasources/prometheus.yaml"
      }

      template {
        data = <<EOH
          GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel,grafana-worldmap-panel,grafana-polystat-panel
          GF_SERVER_HTTP_PORT=3719
          EOH
          destination = "secrets/env"
          env = true
      }

      resources {
        memory = 300
          cpu = 800
      }

      service {
        name = "grafana"
        tags = [
          "grafana",
          "tricot grafana.staging.deuxfleurs.org",
          "d53-cname deuxfleurs.org grafana.staging",
        ]
        port = "grafana"
        check {
          type = "tcp"
          interval = "60s"
          timeout = "5s"
          check_restart {
            limit = 3
            grace = "90s"
            ignore_warnings = false
          }
        }
      }
    }

    task "replicate-db" {
      driver = "nix2"
      config {
        packages = [ "#litestream" ]
        command = "litestream"
        args = [
          "replicate", "-config", "/etc/litestream.yml"
        ]
        bind = {
          "../alloc/data" = "/ephemeral",
        }
      }

      template {
        data = file("../config/grafana-litestream.yml")
        destination = "etc/litestream.yml"
      }

      resources {
        memory = 100
        memory_max = 500
        cpu = 100
      }
    }
  }

  group "jaeger" {
    count = 1

    network {
      port "jaeger-frontend" {
        to = 16686
      }
      port "jaeger-otlp-grpc" {
        static = 4317
        to = 4317
      }
      port "jaeger-otlp-http" {
        static = 4318
        to = 4318
      }
    }

    task "jaeger" {
      driver = "docker"
      config {
        image = "jaegertracing/all-in-one:1.36"
        ports = [ "jaeger-frontend", "jaeger-otlp-grpc", "jaeger-otlp-http" ]
      }
      resources {
        memory = 2000
        cpu = 1000
      }

      template {
        data = <<EOH
COLLECTOR_OTLP_ENABLED=true
EOH
          destination = "secrets/env"
          env = true
      }

      service {
        port = "jaeger-frontend"
        address_mode = "host"
        name = "jaeger-frontend"
        tags = [
          "tricot jaeger.staging.deuxfleurs.org",
          "d53-cname deuxfleurs.org jaeger.staging",
        ]
        check {
          type = "tcp"
          port = "jaeger-frontend"
          address_mode = "host"
          interval = "60s"
          timeout = "5s"
          check_restart {
            limit = 3
            grace = "90s"
            ignore_warnings = false
          }
        }
      }

      service {
        port = "jaeger-otlp-grpc"
        address_mode = "host"
        name = "jaeger-otlp-grpc"
        check {
          type = "tcp"
          port = "jaeger-otlp-grpc"
          address_mode = "host"
          interval = "60s"
          timeout = "5s"
          check_restart {
            limit = 3
            grace = "90s"
            ignore_warnings = false
          }
        }
      }

      service {
        port = "jaeger-otlp-http"
        address_mode = "host"
        name = "jaeger-otlp-http"
        check {
          type = "tcp"
          port = "jaeger-otlp-grpc"
          address_mode = "host"
          interval = "60s"
          timeout = "5s"
          check_restart {
            limit = 3
            grace = "90s"
            ignore_warnings = false
          }
        }
      }
    }
  }
}