backup.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. #!/usr/bin/python
  2. import os
  3. import subprocess
  4. import sys
  5. from datetime import datetime
  6. dt = datetime.now()
  7. BACKUP_DIR = os.environ["BACKUP_DIR"]
  8. S3_PATH = os.environ["S3_PATH"]
  9. S3_STORAGE_CLASS = os.environ.get("S3_STORAGE_CLASS") or "STANDARD_IA"
  10. S3_EXTRA_OPTIONS = os.environ.get("S3_EXTRA_OPTIONS") or ""
  11. DB_USE_ENV = os.environ.get("DB_USE_ENV") or False
  12. DB_NAME = os.environ["DB_NAME"] if "DB_NAME" in os.environ else os.environ.get("PGDATABASE")
  13. if not DB_NAME:
  14. raise Exception("DB_NAME must be set")
  15. if not DB_USE_ENV:
  16. DB_HOST = os.environ["DB_HOST"]
  17. DB_PASS = os.environ["DB_PASS"]
  18. DB_USER = os.environ["DB_USER"]
  19. DB_PORT = os.environ.get("DB_PORT") or "5432"
  20. MAIL_TO = os.environ.get("MAIL_TO")
  21. MAIL_FROM = os.environ.get("MAIL_FROM")
  22. WEBHOOK = os.environ.get("WEBHOOK")
  23. WEBHOOK_METHOD = os.environ.get("WEBHOOK_METHOD")
  24. WEBHOOK_DATA = os.environ.get("WEBHOOK_DATA")
  25. WEBHOOK_CURL_OPTIONS = os.environ.get("WEBHOOK_CURL_OPTIONS") or ""
  26. KEEP_BACKUP_DAYS = int(os.environ.get("KEEP_BACKUP_DAYS", 7))
  27. FILENAME = os.environ.get("FILENAME", DB_NAME + "_%Y-%m-%d")
  28. file_name = dt.strftime(FILENAME)
  29. backup_file = os.path.join(BACKUP_DIR, file_name)
  30. if not S3_PATH.endswith("/"):
  31. S3_PATH = S3_PATH + "/"
  32. if WEBHOOK_DATA and not WEBHOOK_METHOD:
  33. WEBHOOK_METHOD = 'POST'
  34. else:
  35. WEBHOOK_METHOD = WEBHOOK_METHOD or 'GET'
  36. def cmd(command, **kwargs):
  37. try:
  38. subprocess.check_output([command], shell=True, stderr=subprocess.STDOUT, **kwargs)
  39. except subprocess.CalledProcessError as e:
  40. sys.stderr.write("\n".join([
  41. "Command execution failed. Output:",
  42. "-"*80,
  43. e.output,
  44. "-"*80,
  45. ""
  46. ]))
  47. raise
  48. def backup_exists():
  49. return os.path.exists(backup_file)
  50. def take_backup():
  51. env = os.environ.copy()
  52. if DB_USE_ENV:
  53. env.update({key: os.environ[key] for key in os.environ.keys() if key.startswith('PG') })
  54. else:
  55. env.update({'PGPASSWORD': DB_PASS, 'PGHOST': DB_HOST, 'PGUSER': DB_USER, 'PGDATABASE': DB_NAME, 'PGPORT': DB_PORT})
  56. # trigger postgres-backup
  57. cmd("pg_dump -Fc > %s" % backup_file, env=env)
  58. def upload_backup():
  59. opts = "--storage-class=%s %s" % (S3_STORAGE_CLASS, S3_EXTRA_OPTIONS)
  60. cmd("aws s3 cp %s %s %s" % (opts, backup_file, S3_PATH))
  61. def prune_local_backup_files():
  62. cmd("find %s -type f -prune -mtime +%i -exec rm -f {} \;" % (BACKUP_DIR, KEEP_BACKUP_DAYS))
  63. def send_email(to_address, from_address, subject, body):
  64. """
  65. Super simple, doesn't do any escaping
  66. """
  67. cmd("""aws --region us-east-1 ses send-email --from %(from)s --destination '{"ToAddresses":["%(to)s"]}' --message '{"Subject":{"Data":"%(subject)s","Charset":"UTF-8"},"Body":{"Text":{"Data":"%(body)s","Charset":"UTF-8"}}}'""" % {
  68. "to": to_address,
  69. "from": from_address,
  70. "subject": subject,
  71. "body": body,
  72. })
  73. def log(msg):
  74. print("[%s]: %s" % (datetime.now().strftime("%Y-%m-%d %H:%M:%S"), msg))
  75. def pretty_bytes(num):
  76. for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
  77. if num < 1024.0:
  78. return "%3.1f %s" % (num, x)
  79. num /= 1024.0
  80. def main():
  81. start_time = datetime.now()
  82. log("Dumping database")
  83. take_backup()
  84. backup_size=os.path.getsize(backup_file)
  85. log("Uploading to S3")
  86. upload_backup()
  87. log("Pruning local backup copies")
  88. prune_local_backup_files()
  89. end_time = datetime.now()
  90. meta = {
  91. "filename": file_name,
  92. "date": end_time.strftime("%Y-%m-%d"),
  93. "time": end_time.strftime('%H:%M:%S'),
  94. "duration": "%.2f" % ((end_time - start_time).total_seconds()),
  95. "size": pretty_bytes(backup_size)
  96. }
  97. if MAIL_TO and MAIL_FROM:
  98. log("Sending mail to %s" % MAIL_TO)
  99. send_email(
  100. MAIL_TO,
  101. MAIL_FROM,
  102. "Backup complete: %s" % DB_NAME,
  103. "Took %(duration)s seconds" % meta,
  104. )
  105. if WEBHOOK:
  106. if WEBHOOK_DATA:
  107. opts = "%s -d '%s'" % (WEBHOOK_CURL_OPTIONS, WEBHOOK_DATA % meta)
  108. else:
  109. opts = WEBHOOK_CURL_OPTIONS
  110. log("Making HTTP %s request to webhook: %s" % (WEBHOOK_METHOD, WEBHOOK))
  111. cmd("curl -X %s %s %s" % (WEBHOOK_METHOD, opts, WEBHOOK))
  112. log("Backup complete, took %(duration)s seconds, size %(size)s" % meta)
  113. if __name__ == "__main__":
  114. main()