""" The script provides functionality: 1. Backup your PostgresSQL database to sql file using pg_dump 2. Compress sql file using LZMA2 3. Upload compressed file to S3 storage 4. Scheduled to run every hour You can set some envs vars: - DEBUG - run every 1 minute if 1, default is 0 - DB_USER - user to connect DB, default is postgres - DB_PASSWORD - password to connect DB, default is postgres - DB_HOST - host to connect DB, default is localhost - DB_PORT - port to connect DB, default is 5432 - DB_NAME - database to back up, default is postgres - TIME_ZONE - timezone for datetime using in filenames, default is Europe/Tallinn - COMPRESSION_LEVEL - level of LZMA compression, default is 7 - PREFIX - prefix for backup filename, default is empty Settings for S3 storage: - AWS_S3_REGION_NAME - default nl-ams - AWS_S3_ENDPOINT_URL - default https://s3.nl-ams.scw.cloud - AWS_ACCESS_KEY_ID - AWS_SECRET_ACCESS_KEY - AWS_BUCKET_NAME """ import lzma import os import shutil import subprocess import threading import time from datetime import datetime import boto3 import pytz import schedule from boto3.exceptions import S3UploadFailedError DEBUG = int(os.getenv("DEBUG", 0)) DB_USER = os.getenv("DB_USER", "postgres") DB_PASSWORD = os.getenv("DB_PASSWORD", "postgres") DB_HOST = os.getenv("DB_HOST", "localhost") DB_PORT = os.getenv("DB_PORT", "5432") DB_NAME = os.getenv("DB_NAME", "postgres") TIME_ZONE = pytz.timezone(os.getenv("TIME_ZONE", "Europe/Tallinn")) PREFIX = os.getenv("PREFIX", "") AWS_S3_REGION_NAME = os.getenv("AWS_S3_REGION_NAME", "nl-ams") AWS_S3_ENDPOINT_URL = os.getenv("AWS_S3_ENDPOINT_URL", "https://s3.nl-ams.scw.cloud") AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID") AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY") AWS_BUCKET_NAME = os.getenv("AWS_BUCKET_NAME") COMPRESSION_SETTINGS = [ { "id": lzma.FILTER_LZMA2, "preset": int(os.getenv("COMPRESSION_LEVEL", 7)) | lzma.PRESET_EXTREME, }, ] def backup_db_from_postgres(file_path: str) -> bool: """ Backup db from PostgresSQL to file using pg_dump :param file: :return: """ postgres_connection_url = ( f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}" ) try: process = subprocess.Popen( [ "pg_dump", f"--dbname={postgres_connection_url}", "-f", file_path, "-Fp", ], stdout=subprocess.PIPE, ) process.communicate() if process.returncode != 0: print(f"Command failed. Return code : {process.returncode}") return False return True except (subprocess.SubprocessError, OSError) as exception: print(exception) return False def compress_file_to_xz(file_path: str) -> str: compressed_file_path = f"{file_path}.xz" with open(file_path, "rb") as origin_file: with lzma.open( compressed_file_path, "wb", filters=COMPRESSION_SETTINGS ) as compressed_file: shutil.copyfileobj(origin_file, compressed_file) os.remove(file_path) return compressed_file_path def upload_to_s3(compressed_file_path: str, filename: str) -> bool: time_string = datetime.now(tz=TIME_ZONE).strftime("%Y/%m/%d") destination_folder = f"{time_string}/{filename}" try: s3_client = boto3.client( service_name="s3", region_name=AWS_S3_REGION_NAME, endpoint_url=AWS_S3_ENDPOINT_URL, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY, ) s3_client.upload_file( Filename=compressed_file_path, Bucket=AWS_BUCKET_NAME, Key=destination_folder, ) os.remove(compressed_file_path) return True except S3UploadFailedError as exception: print(exception) return False def run_backup_database() -> None: """ Run backup script """ scheduled_time = datetime.now(tz=TIME_ZONE).strftime("%y%m%d-%H%M") filename = f"{DB_NAME}-{scheduled_time}.sql" if PREFIX: filename = f"{PREFIX}-{filename}" sql_file_path = f"/tmp/{filename}" backup_success = backup_db_from_postgres(file_path=sql_file_path) if not backup_success: print("Backup failed") return compressed_file_path = compress_file_to_xz(file_path=sql_file_path) upload_success = upload_to_s3(compressed_file_path, f"{filename}.xz") if not upload_success: print("Upload failed") return upload_time = datetime.now(tz=TIME_ZONE).strftime("%Y-%m-%d %H:%M") print(f"Made backup at {scheduled_time} and uploaded to S3 at {upload_time}") def run_threaded(job_func): """ Run the jobs in threading :param job_func: :return: """ job_thread = threading.Thread(target=job_func) job_thread.start() if not DEBUG: print("Setting up task launch 'run_backup_database' every hour") schedule.every().hour.at(":05").do(run_threaded, run_backup_database) else: print("Setting up task launch 'run_backup_database' every 5 minutes") schedule.every(5).minutes.do(run_threaded, run_backup_database) while True: schedule.run_pending() time.sleep(1)