Files
docker-postgres-backup/start.py
2024-05-28 10:59:02 +03:00

175 lines
5.6 KiB
Python

"""
The script provides functionality:
1. Backup your PostgresSQL database to sql file using pg_dump
2. Compress sql file using LZMA2
3. Upload compressed file to S3 storage
4. Scheduled to run every hour
You can set some envs vars:
- SCHEDULE - can be monthly, weekly, daily, hourly (by default)
- DB_USER - user to connect DB, default is postgres
- DB_PASSWORD - password to connect DB, default is postgres
- DB_HOST - host to connect DB, default is localhost
- DB_PORT - port to connect DB, default is 5432
- DB_NAME - database to back up, default is postgres
- TIME_ZONE - timezone for datetime using in filenames, default is Europe/Tallinn
- COMPRESSION_LEVEL - level of LZMA compression, default is 7
- PREFIX - prefix for backup filename, default is empty
Settings for S3 storage:
- AWS_S3_REGION_NAME - default nl-ams
- AWS_S3_ENDPOINT_URL - default https://s3.nl-ams.scw.cloud
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY
- AWS_BUCKET_NAME
"""
import lzma
import os
import shutil
import subprocess
import threading
import time
from datetime import datetime
import boto3
import pytz
import schedule
from boto3.exceptions import S3UploadFailedError
SCHEDULE = os.getenv("SCHEDULE", "HOURLY")
DB_USER = os.getenv("DB_USER", "postgres")
DB_PASSWORD = os.getenv("DB_PASSWORD", "postgres")
DB_HOST = os.getenv("DB_HOST", "localhost")
DB_PORT = os.getenv("DB_PORT", "5432")
DB_NAME = os.getenv("DB_NAME", "postgres")
TIME_ZONE = pytz.timezone(os.getenv("TIME_ZONE", "Europe/Tallinn"))
PREFIX = os.getenv("PREFIX", "")
AWS_S3_REGION_NAME = os.getenv("AWS_S3_REGION_NAME", "nl-ams")
AWS_S3_ENDPOINT_URL = os.getenv("AWS_S3_ENDPOINT_URL", "https://s3.nl-ams.scw.cloud")
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_BUCKET_NAME = os.getenv("AWS_BUCKET_NAME")
COMPRESSION_SETTINGS = [
{
"id": lzma.FILTER_LZMA2,
"preset": int(os.getenv("COMPRESSION_LEVEL", 7)) | lzma.PRESET_EXTREME,
},
]
def backup_db_from_postgres(file_path: str) -> bool:
"""
Backup db from PostgresSQL to file using pg_dump
:param file:
:return:
"""
postgres_connection_url = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
try:
process = subprocess.Popen(
[
"pg_dump",
f"--dbname={postgres_connection_url}",
"-f",
file_path,
"-Fp",
],
stdout=subprocess.PIPE,
)
process.communicate()
if process.returncode != 0:
print(f"Command failed. Return code : {process.returncode}")
return False
return True
except (subprocess.SubprocessError, OSError) as exception:
print(exception)
return False
def compress_file_to_xz(file_path: str) -> str:
compressed_file_path = f"{file_path}.xz"
with open(file_path, "rb") as origin_file:
with lzma.open(compressed_file_path, "wb", filters=COMPRESSION_SETTINGS) as compressed_file:
shutil.copyfileobj(origin_file, compressed_file)
os.remove(file_path)
return compressed_file_path
def upload_to_s3(compressed_file_path: str, filename: str) -> bool:
time_string = datetime.now(tz=TIME_ZONE).strftime("%Y/%m/%d")
destination_folder = f"{time_string}/{filename}"
try:
s3_client = boto3.client(
service_name="s3",
region_name=AWS_S3_REGION_NAME,
endpoint_url=AWS_S3_ENDPOINT_URL,
aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
)
s3_client.upload_file(
Filename=compressed_file_path,
Bucket=AWS_BUCKET_NAME,
Key=destination_folder,
)
os.remove(compressed_file_path)
return True
except S3UploadFailedError as exception:
print(exception)
return False
def run_backup_database() -> None:
"""
Run backup script
"""
scheduled_time = datetime.now(tz=TIME_ZONE).strftime("%y%m%d-%H%M")
filename = f"{DB_NAME}-{scheduled_time}.sql"
if PREFIX:
filename = f"{PREFIX}-{filename}"
sql_file_path = f"/tmp/{filename}"
backup_success = backup_db_from_postgres(file_path=sql_file_path)
if not backup_success:
print("Backup failed")
return
compressed_file_path = compress_file_to_xz(file_path=sql_file_path)
upload_success = upload_to_s3(compressed_file_path, f"{filename}.xz")
if not upload_success:
print("Upload failed")
return
upload_time = datetime.now(tz=TIME_ZONE).strftime("%Y-%m-%d %H:%M")
print(f"Made backup at {scheduled_time} and uploaded to S3 at {upload_time}")
def run_threaded(job_func):
"""
Run the jobs in threading
:param job_func:
:return:
"""
job_thread = threading.Thread(target=job_func)
job_thread.start()
match SCHEDULE:
case "MONTHLY":
print("Scheduled to run backup task every 4 weeks")
schedule.every(4).weeks.do(run_threaded, run_backup_database)
case "WEEKLY":
print("Scheduled to run backup task every Monday")
schedule.every().monday.at("02:00").do(run_threaded, run_backup_database)
case "DAILY":
print("Scheduled to run backup task every day at 02:00")
schedule.every().day.at("02:00").do(run_threaded, run_backup_database)
# For any other values, incl HOURLY - run hourly
case _:
print("Scheduled to run backup task every hour")
schedule.every().hour.at(":05").do(run_threaded, run_backup_database)
# Run first job immediately
schedule.run_all()
while True:
schedule.run_pending()
time.sleep(1)