123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108 |
- #! /usr/bin/env python
- """Heroku Postgres Backup to S3 Utility.
- Uses the Heroku PG-Backups system to pull the latest backup for a database, and put the file on Amazon S3.
- Unfortunately, depends on the heroku toolbelt, since there is no standard API for PGBackups (that we have found).
- Be sure that you are logged in to the heroku toolbelt before you run this script, and that it is in your $PATH.
- Usage:
- herokupostgres_s3_backup.py -r <path_to_heroku> -a <app_name> -b <bucket> -k <aws_key_id> -s <aws_secret> -p <s3_key_prefix>
- herokupostgres_s3_backup.py (-h | --help)
- Options:
- -h --help Show this screen.
- -a <app_name> --app=<app_name> Heroku App name.
- -r <path_to_heroku> --herokupath=<path_to_heroku> location where the heroku executable lives, needs trailing slash
- -b <bucket> --bucket=<bucket> S3 Bucket name
- -k <aws_key_id> --awskey=<aws_key_id> AWS Key ID
- -s <aws_secret> --awssecret=<aws_secret> AWS Secret Key
- -p <s3_key_prefix> --prefix=<s3_key_prefix Prefixes filename of S3 object
- """
- import requests
- import math
- import os
- import sys
- import datetime
- import subprocess
- from docopt import docopt
- import boto
- from filechunkio import FileChunkIO
- # Gets the latest backup for a given app
- # Relies on the heroku cli toolbelt to talk to PGBackups
- def get_backup(heroku_path, app_name):
- # first, get the heroku pgbackups:url from the heroku toolbelt
- print('Looking up backup URL for:{0}'.format(app_name))
- # 'Shelling out' isn't ideal in this situation, but it is the path of least resistance for now.
- backup_url = subprocess.check_output(heroku_path + 'heroku pgbackups:url --app {0}'.format(app_name),
- shell=True).rstrip()
- # download the file to disk. Stream, since the file could potentially be large
- print('Downloading backup from:{0}'.format(backup_url))
- # We need to timestamp our own, since the backup url just gets the 'latest'
- backup_filename = app_name + '-' + datetime.datetime.now().isoformat()
- r = requests.get(backup_url, stream=True)
- with open(backup_filename, 'wb') as f:
- for chunk in r.iter_content(chunk_size=1024):
- if chunk: # filter out keep-alive new chunks
- f.write(chunk)
- f.flush()
- print('saved backup to file: {0}'.format(backup_filename))
- return backup_filename
- # Using S3 Multipart upload to handle potentially large files
- def upload_to_s3(s3key, filename, bucket, aws_key, aws_secret):
- conn = boto.connect_s3(aws_key, aws_secret)
- bucket = conn.get_bucket(bucket)
- # Get file info
- source_path = filename
- source_size = os.stat(source_path).st_size
- # Create a multipart upload request
- mp = bucket.initiate_multipart_upload(s3key)
- # Use a chunk size of 50 MiB
- chunk_size = 52428800
- chunk_count = int(math.ceil(source_size / chunk_size))
- # Send the file parts, using FileChunkIO to create a file-like object
- # that points to a certain byte range within the original file. We
- # set bytes to never exceed the original file size.
- for i in range(chunk_count + 1):
- print('Uploading file chunk: {0} of {1}'.format(i + 1, chunk_count + 1))
- offset = chunk_size * i
- bytes = min(chunk_size, source_size - offset)
- with FileChunkIO(source_path, 'r', offset=offset, bytes=bytes) as fp:
- mp.upload_part_from_file(fp, part_num=i + 1)
- # Finish the upload
- completed_upload = mp.complete_upload()
- return completed_upload
- def delete_local_backup_file(filename):
- print('Deleting file from local filesystem:{0}'.format(filename))
- os.remove(filename)
- if __name__ == '__main__':
- # grab all the arguments
- arguments = docopt(__doc__, version='herokupostgres_s3_backup 0.0.1')
- app_name = arguments['--app']
- heroku_path = arguments['--herokupath']
- bucket = arguments['--bucket']
- aws_key = arguments['--awskey']
- aws_secret = arguments['--awssecret']
- prefix = arguments['--prefix']
- # first, fetch the backup
- filename = get_backup(heroku_path, app_name)
- if not filename:
- # we failed to save the backup successfully.
- sys.exit(1)
- # now, store the file we just downloaded up on S3
- print('Uploading file to S3. Bucket:{0}'.format(bucket))
- s3_success = upload_to_s3(prefix + filename, filename, bucket, aws_key, aws_secret)
- if not s3_success:
- # somehow failed the file upload
- print('Failure with S3 upload. Exiting...')
- sys.exit(1)
- print('Upload to S3 completed successfully')
- # Delete the local backup file, to not take up excessive disk space
- delete_local_backup_file(filename)
|