Files
docker-postgres-backup/start.py
2022-02-14 08:07:43 +00:00

163 lines
5.0 KiB
Python

"""
The script provides functionality:
1. Backup your PostgresSQL database to sql file using pg_dump
2. Compress sql file using LZMA2
3. Upload compressed file to S3 storage
4. Scheduled to run every hour
You can set some envs vars:
- DEBUG - run every 1 minute if 1, default is 0
- DB_USER - user to connect DB, default is postgres
- DB_PASSWORD - password to connect DB, default is postgres
- DB_HOST - host to connect DB, default is localhost
- DB_PORT - port to connect DB, default is 5432
- DB_NAME - database to back up, default is postgres
- TIME_ZONE - timezone for datetime using in filenames, default is Europe/Tallinn
- COMPRESSION_LEVEL - level of LZMA compression, default is 7
Settings for S3 storage:
- AWS_S3_REGION_NAME - default nl-ams
- AWS_S3_ENDPOINT_URL - default https://s3.nl-ams.scw.cloud
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY
- AWS_BUCKET_NAME
"""
import lzma
import os
import shutil
import subprocess
import threading
import time
from datetime import datetime
import boto3
import pytz
import schedule
from boto3.exceptions import S3UploadFailedError
DEBUG = int(os.getenv("DEBUG", 0))
DB_USER = os.getenv("DB_USER", "postgres")
DB_PASSWORD = os.getenv("DB_PASSWORD", "postgres")
DB_HOST = os.getenv("DB_HOST", "localhost")
DB_PORT = os.getenv("DB_PORT", "5432")
DB_NAME = os.getenv("DB_NAME", "postgres")
TIME_ZONE = pytz.timezone(os.getenv("TIME_ZONE", "Europe/Tallinn"))
AWS_S3_REGION_NAME = os.getenv("AWS_S3_REGION_NAME", "nl-ams")
AWS_S3_ENDPOINT_URL = os.getenv("AWS_S3_ENDPOINT_URL", "https://s3.nl-ams.scw.cloud")
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_BUCKET_NAME = os.getenv("AWS_BUCKET_NAME")
COMPRESSION_SETTINGS = [
{
"id": lzma.FILTER_LZMA2,
"preset": int(os.getenv("COMPRESSION_LEVEL", 7)) | lzma.PRESET_EXTREME,
},
]
def backup_db_from_postgres(file_path: str) -> bool:
"""
Backup db from PostgresSQL to file using pg_dump
:param file:
:return:
"""
postgres_connection_url = (
f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
)
try:
process = subprocess.Popen(
[
"pg_dump",
f"--dbname={postgres_connection_url}",
"-f",
file_path,
"-Fp",
],
stdout=subprocess.PIPE,
)
process.communicate()
if process.returncode != 0:
print(f"Command failed. Return code : {process.returncode}")
return False
return True
except (subprocess.SubprocessError, OSError) as exception:
print(exception)
return False
def compress_file_to_xz(file_path: str) -> str:
compressed_file_path = f"{file_path}.xz"
with open(file_path, "rb") as origin_file:
with lzma.open(
compressed_file_path, "wb", filters=COMPRESSION_SETTINGS
) as compressed_file:
shutil.copyfileobj(origin_file, compressed_file)
os.remove(file_path)
return compressed_file_path
def upload_to_s3(compressed_file_path: str, filename: str) -> bool:
time_string = datetime.now(tz=TIME_ZONE).strftime("%Y/%m/%d")
destination_folder = f"{time_string}/{filename}"
try:
s3_client = boto3.client(
service_name="s3",
region_name=AWS_S3_REGION_NAME,
endpoint_url=AWS_S3_ENDPOINT_URL,
aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
)
s3_client.upload_file(
Filename=compressed_file_path,
Bucket=AWS_BUCKET_NAME,
Key=destination_folder,
)
os.remove(compressed_file_path)
return True
except S3UploadFailedError as exception:
print(exception)
return False
def run_backup_database() -> None:
"""
Run backup script
"""
scheduled_time = datetime.now(tz=TIME_ZONE).strftime("%y%m%d-%H%M")
filename = f"{DB_NAME}-{scheduled_time}.sql"
sql_file_path = f"/tmp/{filename}"
backup_success = backup_db_from_postgres(file_path=sql_file_path)
if not backup_success:
print("Backup failed")
return
compressed_file_path = compress_file_to_xz(file_path=sql_file_path)
upload_success = upload_to_s3(compressed_file_path, f"{filename}.xz")
if not upload_success:
print("Upload failed")
return
upload_time = datetime.now(tz=TIME_ZONE).strftime("%Y-%m-%d %H:%M")
print(f"Made backup at {scheduled_time} and uploaded to S3 at {upload_time}")
def run_threaded(job_func):
"""
Run the jobs in threading
:param job_func:
:return:
"""
job_thread = threading.Thread(target=job_func)
job_thread.start()
if not DEBUG:
print("Setting up task launch 'run_backup_database' every hour")
schedule.every().hour.at(":05").do(run_threaded, run_backup_database)
else:
print("Setting up task launch 'run_backup_database' every 5 minutes")
schedule.every(5).minutes.do(run_threaded, run_backup_database)
while True:
schedule.run_pending()
time.sleep(1)