aboutsummaryrefslogtreecommitdiff
path: root/cluster/prod/app/backup/build/backup-psql/backup-psql.py
blob: a3a5b5f87a53999d16e349620c4e4f57394d23c9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/env python3
import shutil,sys,os,datetime,minio,subprocess

working_directory = "."
if 'CACHE_DIR' in os.environ: working_directory = os.environ['CACHE_DIR']
required_space_in_bytes = 20 * 1024 * 1024 * 1024
bucket = os.environ['AWS_BUCKET']
key = os.environ['AWS_ACCESS_KEY_ID']
secret = os.environ['AWS_SECRET_ACCESS_KEY']
endpoint = os.environ['AWS_ENDPOINT']
pubkey = os.environ['CRYPT_PUBLIC_KEY']
psql_host = os.environ['PSQL_HOST']
psql_user = os.environ['PSQL_USER']
s3_prefix = str(datetime.datetime.now())
files = [ "backup_manifest", "base.tar.gz", "pg_wal.tar.gz" ]
clear_paths = [ os.path.join(working_directory, f) for f in files ]
crypt_paths = [ os.path.join(working_directory, f) + ".age" for f in files ]
s3_keys = [ s3_prefix + "/" + f for f in files ]

def abort(msg):
  for p in clear_paths + crypt_paths:
    if os.path.exists(p):
        print(f"Remove {p}")
        os.remove(p)

  if msg: sys.exit(msg)
  else: print("success")

# Check we have enough space on disk
if shutil.disk_usage(working_directory).free < required_space_in_bytes:
  abort(f"Not enough space on disk at path {working_directory} to perform a backup, aborting")

# Check postgres password is set
if 'PGPASSWORD' not in os.environ:
  abort(f"You must pass postgres' password through the environment variable PGPASSWORD")

# Check our working directory is empty
if len(os.listdir(working_directory)) != 0:
  abort(f"Working directory {working_directory} is not empty, aborting")

# Check Minio
client = minio.Minio(endpoint, key, secret)
if not client.bucket_exists(bucket):
  abort(f"Bucket {bucket} does not exist or its access is forbidden, aborting")

# Perform the backup locally
# Via command-line:
# pg_basebackup --host=localhost --username=$PSQL_USER --pgdata=. --format=tar  --wal-method=stream --gzip --compress=6 --progress --max-rate=5M
try:
  ret = subprocess.run(["pg_basebackup",
    f"--host={psql_host}",
    f"--username={psql_user}",
    f"--pgdata={working_directory}",
    f"--format=tar",
    "--wal-method=stream",
    "--gzip",
    "--compress=6",
    "--progress",
    "--max-rate=5M",
  ])
  if ret.returncode != 0:
    abort(f"pg_basebackup exited, expected return code 0, got {ret.returncode}. aborting")
except Exception as e:
  abort(f"pg_basebackup raised exception {e}. aborting")

# Check that the expected files are here
for p in clear_paths:
  print(f"Checking that {p} exists locally")
  if not os.path.exists(p):
    abort(f"File {p} expected but not found, aborting")

# Cipher them
for c, e in zip(clear_paths, crypt_paths):
  print(f"Ciphering {c} to {e}")
  try:
    ret = subprocess.run(["age", "-r", pubkey, "-o", e, c])
    if ret.returncode != 0:
      abort(f"age exit code is {ret}, 0 expected. aborting")
  except Exception as e:
    abort(f"aged raised an exception. {e}. aborting")

# Upload the backup to S3
for p, k in zip(crypt_paths, s3_keys):
  try:
    print(f"Uploading {p} to {k}")
    result = client.fput_object(bucket, k, p)
    print(
      "created {0} object; etag: {1}, version-id: {2}".format(
        result.object_name, result.etag, result.version_id,
      ),
    )
  except Exception as e:
    abort(f"Exception {e} occured while upload {p}. aborting")

# Check that the files have been uploaded
for k in s3_keys:
  try:
    print(f"Checking that {k} exists remotely")
    result = client.stat_object(bucket, k)
    print(
      "last-modified: {0}, size: {1}".format(
        result.last_modified, result.size, 
      ),
    )
  except Exception as e:
    abort(f"{k} not found on S3. {e}. aborting")

abort(None)