179 lines
5.6 KiB
Python
179 lines
5.6 KiB
Python
"""
|
|
The script provides functionality:
|
|
1. Backup your PostgresSQL database to sql file using pg_dump
|
|
2. Compress sql file using LZMA2
|
|
3. Upload compressed file to S3 storage
|
|
4. Scheduled to run every hour
|
|
|
|
You can set some envs vars:
|
|
- SCHEDULE - can be monthly, weekly, daily, hourly (by default)
|
|
- DB_USER - user to connect DB, default is postgres
|
|
- DB_PASSWORD - password to connect DB, default is postgres
|
|
- DB_HOST - host to connect DB, default is localhost
|
|
- DB_PORT - port to connect DB, default is 5432
|
|
- DB_NAME - database to back up, default is postgres
|
|
- TIME_ZONE - timezone for datetime using in filenames, default is Europe/Tallinn
|
|
- COMPRESSION_LEVEL - level of LZMA compression, default is 7
|
|
- PREFIX - prefix for backup filename, default is empty
|
|
|
|
Settings for S3 storage:
|
|
- AWS_S3_REGION_NAME - default nl-ams
|
|
- AWS_S3_ENDPOINT_URL - default https://s3.nl-ams.scw.cloud
|
|
- AWS_ACCESS_KEY_ID
|
|
- AWS_SECRET_ACCESS_KEY
|
|
- AWS_BUCKET_NAME
|
|
"""
|
|
|
|
import lzma
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
import threading
|
|
import time
|
|
from datetime import datetime
|
|
|
|
import boto3
|
|
import pytz
|
|
import schedule
|
|
from boto3.exceptions import S3UploadFailedError
|
|
|
|
SCHEDULE = os.getenv("SCHEDULE", "HOURLY")
|
|
DB_USER = os.getenv("DB_USER", "postgres")
|
|
DB_PASSWORD = os.getenv("DB_PASSWORD", "postgres")
|
|
DB_HOST = os.getenv("DB_HOST", "localhost")
|
|
DB_PORT = os.getenv("DB_PORT", "5432")
|
|
DB_NAME = os.getenv("DB_NAME", "postgres")
|
|
TIME_ZONE = pytz.timezone(os.getenv("TIME_ZONE", "Europe/Tallinn"))
|
|
PREFIX = os.getenv("PREFIX", "")
|
|
AWS_S3_REGION_NAME = os.getenv("AWS_S3_REGION_NAME", "nl-ams")
|
|
AWS_S3_ENDPOINT_URL = os.getenv("AWS_S3_ENDPOINT_URL", "https://s3.nl-ams.scw.cloud")
|
|
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
|
|
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
|
|
AWS_BUCKET_NAME = os.getenv("AWS_BUCKET_NAME")
|
|
COMPRESSION_SETTINGS = [
|
|
{
|
|
"id": lzma.FILTER_LZMA2,
|
|
"preset": int(os.getenv("COMPRESSION_LEVEL", 7)) | lzma.PRESET_EXTREME,
|
|
},
|
|
]
|
|
|
|
|
|
def backup_db_from_postgres(file_path: str) -> bool:
|
|
"""
|
|
Backup db from PostgresSQL to file using pg_dump
|
|
:param file:
|
|
:return:
|
|
"""
|
|
postgres_connection_url = (
|
|
f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
|
|
)
|
|
try:
|
|
process = subprocess.Popen(
|
|
[
|
|
"pg_dump",
|
|
f"--dbname={postgres_connection_url}",
|
|
"-f",
|
|
file_path,
|
|
"-Fp",
|
|
],
|
|
stdout=subprocess.PIPE,
|
|
)
|
|
process.communicate()
|
|
if process.returncode != 0:
|
|
print(f"Command failed. Return code : {process.returncode}")
|
|
return False
|
|
return True
|
|
except (subprocess.SubprocessError, OSError) as exception:
|
|
print(exception)
|
|
return False
|
|
|
|
|
|
def compress_file_to_xz(file_path: str) -> str:
|
|
compressed_file_path = f"{file_path}.xz"
|
|
with open(file_path, "rb") as origin_file:
|
|
with lzma.open(
|
|
compressed_file_path, "wb", filters=COMPRESSION_SETTINGS
|
|
) as compressed_file:
|
|
shutil.copyfileobj(origin_file, compressed_file)
|
|
os.remove(file_path)
|
|
return compressed_file_path
|
|
|
|
|
|
def upload_to_s3(compressed_file_path: str, filename: str) -> bool:
|
|
time_string = datetime.now(tz=TIME_ZONE).strftime("%Y/%m/%d")
|
|
destination_folder = f"{time_string}/{filename}"
|
|
try:
|
|
s3_client = boto3.client(
|
|
service_name="s3",
|
|
region_name=AWS_S3_REGION_NAME,
|
|
endpoint_url=AWS_S3_ENDPOINT_URL,
|
|
aws_access_key_id=AWS_ACCESS_KEY_ID,
|
|
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
|
|
)
|
|
s3_client.upload_file(
|
|
Filename=compressed_file_path,
|
|
Bucket=AWS_BUCKET_NAME,
|
|
Key=destination_folder,
|
|
)
|
|
os.remove(compressed_file_path)
|
|
return True
|
|
except S3UploadFailedError as exception:
|
|
print(exception)
|
|
return False
|
|
|
|
|
|
def run_backup_database() -> None:
|
|
"""
|
|
Run backup script
|
|
"""
|
|
scheduled_time = datetime.now(tz=TIME_ZONE).strftime("%y%m%d-%H%M")
|
|
filename = f"{DB_NAME}-{scheduled_time}.sql"
|
|
if PREFIX:
|
|
filename = f"{PREFIX}-{filename}"
|
|
sql_file_path = f"/tmp/{filename}"
|
|
|
|
backup_success = backup_db_from_postgres(file_path=sql_file_path)
|
|
if not backup_success:
|
|
print("Backup failed")
|
|
return
|
|
compressed_file_path = compress_file_to_xz(file_path=sql_file_path)
|
|
upload_success = upload_to_s3(compressed_file_path, f"{filename}.xz")
|
|
if not upload_success:
|
|
print("Upload failed")
|
|
return
|
|
upload_time = datetime.now(tz=TIME_ZONE).strftime("%Y-%m-%d %H:%M")
|
|
print(f"Made backup at {scheduled_time} and uploaded to S3 at {upload_time}")
|
|
|
|
|
|
def run_threaded(job_func):
|
|
"""
|
|
Run the jobs in threading
|
|
:param job_func:
|
|
:return:
|
|
"""
|
|
job_thread = threading.Thread(target=job_func)
|
|
job_thread.start()
|
|
|
|
|
|
match SCHEDULE:
|
|
case "MONTHLY":
|
|
print("Scheduled to run backup task every 4 weeks")
|
|
schedule.every(4).weeks.do(run_threaded, run_backup_database)
|
|
case "WEEKLY":
|
|
print("Scheduled to run backup task every Monday")
|
|
schedule.every().monday.at("02:00").do(run_threaded, run_backup_database)
|
|
case "DAILY":
|
|
print("Scheduled to run backup task every day at 02:00")
|
|
schedule.every().day.at("02:00").do(run_threaded, run_backup_database)
|
|
# For any other values, incl HOURLY - run hourly
|
|
case _:
|
|
print("Scheduled to run backup task every hour")
|
|
schedule.every().hour.at(":05").do(run_threaded, run_backup_database)
|
|
|
|
# Run first job immediately
|
|
schedule.run_all()
|
|
|
|
while True:
|
|
schedule.run_pending()
|
|
time.sleep(1)
|