aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--app/postgres/deploy/postgres.hcl6
-rw-r--r--os/config/README.md11
-rw-r--r--os/config/cluster_nodes.yml10
-rw-r--r--os/config/production.yml4
-rw-r--r--os/config/roles/common/tasks/main.yml8
-rw-r--r--os/config/roles/consul/tasks/main.yml2
-rw-r--r--os/config/roles/network/templates/rules.v48
-rw-r--r--os/config/roles/network/templates/rules.v66
-rw-r--r--os/config/roles/nomad/tasks/main.yml2
-rw-r--r--os/config/roles/storage/handlers/main.yml3
-rw-r--r--os/config/roles/storage/tasks/main.yml72
11 files changed, 40 insertions, 92 deletions
diff --git a/app/postgres/deploy/postgres.hcl b/app/postgres/deploy/postgres.hcl
index 6672644..bf68a59 100644
--- a/app/postgres/deploy/postgres.hcl
+++ b/app/postgres/deploy/postgres.hcl
@@ -93,6 +93,12 @@ job "postgres13.3" {
"--pg-su-password", "${PG_SU_PWD}",
"--pg-repl-username", "${PG_REPL_USER}",
"--pg-repl-password", "${PG_REPL_PWD}",
+ /*
+ The postgres daemon accepts 0.0.0.0, ::, and * here but not Stolon.
+ Otherwise you will have the following error and your cluster will be broken (no replication)
+ WARN cmd/keeper.go:1979 provided --pg-listen-address "*": is not an ip address but a hostname. This will be advertized to the other components and may have undefined behaviors if resolved differently by other hosts
+ WARN cmd/keeper.go:1984 cannot resolve provided --pg-listen-address "*": lookup *: no such host
+ */
"--pg-listen-address", "${attr.unique.network.ip-address}",
"--pg-port", "${NOMAD_PORT_psql_port}",
"--pg-bin-path", "/usr/lib/postgresql/13/bin/"
diff --git a/os/config/README.md b/os/config/README.md
index a1078df..81fe9c9 100644
--- a/os/config/README.md
+++ b/os/config/README.md
@@ -4,9 +4,12 @@
For each machine, **one by one** do:
- Check that cluster is healthy
- - Check gluster
- - `sudo gluster peer status`
- - `sudo gluster volume status all` (check Online Col, only `Y` must appear)
+ - Check garage
+ - check that all nodes are online `docker exec -ti xxx /garage status`
+ - check that tables are in sync `docker exec -ti 63a4d7ecd795 /garage repair --yes tables`
+ - check garage logs
+ - no unknown errors or resync should be in progress
+ - the following line must appear `INFO garage_util::background > Worker exited: Repair worker`
- Check that Nomad is healthy
- `nomad server members`
- `nomad node status`
@@ -17,5 +20,5 @@ For each machine, **one by one** do:
- Run `nomad node drain -enable -force -self`
- Reboot
- Run `nomad node drain -self -disable`
- - Check that cluster is healthy
+ - Check that cluster is healthy (basically the whole first point)
diff --git a/os/config/cluster_nodes.yml b/os/config/cluster_nodes.yml
index ea58630..61d540d 100644
--- a/os/config/cluster_nodes.yml
+++ b/os/config/cluster_nodes.yml
@@ -14,6 +14,10 @@
- role: network
tags: net
-# UNSAFE!! This section configures glusterfs. Once done, don't run it ever again as it may break stuff.
-# - role: storage
-# tags: sto
+- hosts: extra_nodes
+ serial: 1
+ roles:
+ - role: common
+ tags: base
+ - role: users
+ tags: account
diff --git a/os/config/production.yml b/os/config/production.yml
index d59e153..446dd40 100644
--- a/os/config/production.yml
+++ b/os/config/production.yml
@@ -7,7 +7,7 @@ cluster_nodes:
ipv4: 192.168.0.2
gatewayv4: 192.168.0.254
ipv6: 2a01:e0a:260:b5b0::2
- gatewayv6: 2a01:e34:ec5c:dbe0::1
+ gatewayv6: 2a01:e0a:260:b5b0::1
interface: eno1
dns_1: 212.27.40.240
dns_2: 212.27.40.241
@@ -39,6 +39,8 @@ cluster_nodes:
dns_2: 212.27.40.241
ansible_python_interpreter: python3
+extra_nodes:
+ hosts:
io:
ansible_host: io.machine.deuxfleurs.fr
ansible_port: 22
diff --git a/os/config/roles/common/tasks/main.yml b/os/config/roles/common/tasks/main.yml
index c75ae81..5f46835 100644
--- a/os/config/roles/common/tasks/main.yml
+++ b/os/config/roles/common/tasks/main.yml
@@ -44,6 +44,14 @@
- pciutils
- pv
- zstd
+ - miniupnpc
+ - rsync
+ - ncdu
+ - smartmontools
+ - ioping
+ - lm-sensors
+ - netcat
+ - sysstat
state: present
- name: "Passwordless sudo"
diff --git a/os/config/roles/consul/tasks/main.yml b/os/config/roles/consul/tasks/main.yml
index e0d3b0a..6bc100b 100644
--- a/os/config/roles/consul/tasks/main.yml
+++ b/os/config/roles/consul/tasks/main.yml
@@ -1,6 +1,6 @@
- name: "Set consul version"
set_fact:
- consul_version: 1.11.2
+ consul_version: 1.11.4
- name: "Download and install Consul for x86_64"
unarchive:
diff --git a/os/config/roles/network/templates/rules.v4 b/os/config/roles/network/templates/rules.v4
index a5f138b..89d9ebf 100644
--- a/os/config/roles/network/templates/rules.v4
+++ b/os/config/roles/network/templates/rules.v4
@@ -10,12 +10,12 @@
-A INPUT -p tcp --dport 22 -j ACCEPT
# Diplonat needs everything open to communicate with IGD with the router
--A INPUT -s 192.168.1.254 -j ACCEPT
+-A INPUT -s 192.168.0.254 -j ACCEPT
# Cluster
-{% for selected_host in groups['cluster_nodes'] %}
--A INPUT -s {{ hostvars[selected_host]['ipv4'] }} -j ACCEPT
-{% endfor %}
+-A INPUT -s 192.168.0.2 -j ACCEPT
+-A INPUT -s 192.168.0.3 -j ACCEPT
+-A INPUT -s 192.168.0.4 -j ACCEPT
# Local
-A INPUT -i docker0 -j ACCEPT
diff --git a/os/config/roles/network/templates/rules.v6 b/os/config/roles/network/templates/rules.v6
index ef3de43..35bcb0d 100644
--- a/os/config/roles/network/templates/rules.v6
+++ b/os/config/roles/network/templates/rules.v6
@@ -16,9 +16,9 @@
-A INPUT -p tcp --dport 22 -j ACCEPT
# Cluster
-{% for selected_host in groups['cluster_nodes'] %}
--A INPUT -s {{ hostvars[selected_host]['ipv6'] }} -j ACCEPT
-{% endfor %}
+-A INPUT -s 2a01:e0a:260:b5b0::2 -j ACCEPT
+-A INPUT -s 2a01:e0a:260:b5b0::3 -j ACCEPT
+-A INPUT -s 2a01:e0a:260:b5b0::4 -j ACCEPT
# Local
-A INPUT -i docker0 -j ACCEPT
diff --git a/os/config/roles/nomad/tasks/main.yml b/os/config/roles/nomad/tasks/main.yml
index 7c90a86..a6f36b1 100644
--- a/os/config/roles/nomad/tasks/main.yml
+++ b/os/config/roles/nomad/tasks/main.yml
@@ -1,6 +1,6 @@
- name: "Set nomad version"
set_fact:
- nomad_version: 1.2.4
+ nomad_version: 1.2.6
- name: "Download and install Nomad for x86_64"
unarchive:
diff --git a/os/config/roles/storage/handlers/main.yml b/os/config/roles/storage/handlers/main.yml
deleted file mode 100644
index a395c93..0000000
--- a/os/config/roles/storage/handlers/main.yml
+++ /dev/null
@@ -1,3 +0,0 @@
----
-- name: umount gluster
- shell: umount --force --lazy /mnt/glusterfs ; true
diff --git a/os/config/roles/storage/tasks/main.yml b/os/config/roles/storage/tasks/main.yml
deleted file mode 100644
index d66011b..0000000
--- a/os/config/roles/storage/tasks/main.yml
+++ /dev/null
@@ -1,72 +0,0 @@
-- name: "Add GlusterFS Repo Key"
- apt_key:
- url: https://download.gluster.org/pub/gluster/glusterfs/5/rsa.pub
- state: present
-
-- name: "Add GlusterFS official repository"
- apt_repository:
- repo: "deb [arch=amd64] https://download.gluster.org/pub/gluster/glusterfs/5/LATEST/Debian/buster/amd64/apt buster main"
- state: present
- filename: gluster
-
-- name: "Install GlusterFS"
- apt:
- name:
- - glusterfs-server
- - glusterfs-client
- state: present
-
-- name: "Ensure Gluster Daemon started and enabled"
- service:
- name: glusterd
- enabled: yes
- state: started
-
-- name: "Create directory for GlusterFS bricks"
- file: path=/mnt/storage/glusterfs/brick1 recurse=yes state=directory
-
-- name: "Create GlusterFS volumes"
- gluster_volume:
- state: present
- name: donnees
- bricks: /mnt/storage/glusterfs/brick1/g1
- #rebalance: yes
- redundancies: 1
- disperses: 3
- #replicas: 3
- force: yes
- options:
- client.event-threads: "8"
- server.event-threads: "8"
- performance.stat-prefetch: "on"
- nfs.disable: "on"
- features.cache-invalidation: "on"
- performance.client-io-threads: "on"
- config.transport: tcp
- performance.quick-read: "on"
- performance.io-cache: "on"
- nfs.export-volumes: "off"
- cluster.lookup-optimize: "on"
-
- cluster: "{% for selected_host in groups['cluster_nodes'] %}{{ hostvars[selected_host]['ipv4'] }}{{ ',' if not loop.last else '' }}{% endfor %}"
- run_once: true
-
-- name: "Create mountpoint"
- file: path=/mnt/glusterfs recurse=yes state=directory
-
-- name: "Flush handlers (umount glusterfs and restart ganesha)"
- meta: flush_handlers
-
-- name: "Add fstab entry"
- tags: gluster-fstab
- mount:
- path: /mnt/glusterfs
- src: "{{ ipv4 }}:/donnees"
- fstype: glusterfs
- opts: "defaults,_netdev,noauto,x-systemd.automount"
- state: present
-
-- name: Mount everything
- command: mount -a
- args:
- warn: no