123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136 |
- #! /usr/bin/env python
- """Compose API Backup to S3 Utility.
- Uses the Compose (MongoHQ) API to pull the latest backup for a database, and put the file on Amazon S3
- Usage:
- mongoHQ_S3_backup.py -d <database_name> -t <oauthToken> -a <account_name> -b <bucket> -k <aws_key_id> -s <aws_secret> -p <s3_key_prefix>
- mongoHQ_S3_backup.py (-h | --help)
- Options:
- -h --help Show this screen.
- -d <database_name> --database=<database_name> Name of the database to find a backup for, or deployment name.
- -t <oauth_token> --token=<oauth_token> MongoHQ OAUTH Token
- -a <account_name> --account=<account_name> MongoHQ Account Name
- -b <bucket> --bucket=<bucket> S3 Bucket name
- -k <aws_key_id> --awskey=<aws_key_id> AWS Key ID
- -s <aws_secret> --awssecret=<aws_secret> AWS Secret Key
- -p <s3_key_prefix> --prefix=<s3_key_prefix Prefixes filename of S3 object [default: '']
- """
- import requests
- import math
- import os
- import sys
- from docopt import docopt
- import boto
- from filechunkio import FileChunkIO
- # Compose/MongoHQ API docs
- # http://support.mongohq.com/rest-api/2014-06/backups.html
- # Gets the latest backup for a given database and account.
- def get_backup(database_name, account_name, oauth_token):
- mongohq_url = 'https://api.mongohq.com/accounts/{0}/backups'.format(account_name)
- headers = {'Accept-Version': '2014-06', 'Content-Type': 'application/json',
- 'Authorization': 'Bearer {0}'.format(oauth_token)}
- # get the list of backups for our account.
- r = requests.get(mongohq_url, headers=headers)
- if r.status_code != 200:
- print('Unable to list backups!')
- return None
- all_backups = r.json()
- backups_for_this_database = list()
- for backup in all_backups:
- if database_name in backup['database_names'] or database_name == backup['deployment']:
- backups_for_this_database.append(
- {'id': backup['id'], 'created_at': backup['created_at'], 'filename': backup['filename']})
- if len(backups_for_this_database) == 0:
- print('No Backups found for database name:{0}. Exiting...'.format(database_name))
- sys.exit(1)
- # search for the latest backup for the given database name
- latest = sorted(backups_for_this_database, key=lambda k: k['created_at'])[-1]
- print('The latest backup for {0} is: {1} created at {2}'.format(database_name, latest['id'], latest['created_at']))
- backup_filename = latest['filename']
- # pull down the backup
- r2 = requests.get('{0}/{1}/download'.format(mongohq_url, latest['id']), headers=headers, allow_redirects=False)
- if r2.status_code != 302:
- return None
- # MongoHQ backup API redirects to a URL where the backup file can be downloaded.
- # TODO: Can the 302 be followed in one step?
- file_location = r2.headers['location']
- # download the file to disk. Stream, since the file could potentially be large
- print('Downloading Backup from:{0}'.format(file_location))
- r3 = requests.get(file_location, stream=True)
- with open(backup_filename, 'wb') as f:
- for chunk in r3.iter_content(chunk_size=1024):
- if chunk: # filter out keep-alive new chunks
- f.write(chunk)
- f.flush()
- print('saved backup to file: {0}'.format(backup_filename))
- return backup_filename
- # Using S3 Multipart upload to handle potentially large files
- def upload_to_s3(s3key, filename, bucket, aws_key, aws_secret):
- conn = boto.connect_s3(aws_key, aws_secret)
- bucket = conn.get_bucket(bucket)
- # Get file info
- source_path = filename
- source_size = os.stat(source_path).st_size
- # Create a multipart upload request
- mp = bucket.initiate_multipart_upload(s3key)
- # Use a chunk size of 50 MiB
- chunk_size = 52428800
- chunk_count = int(math.ceil(source_size / chunk_size))
- # Send the file parts, using FileChunkIO to create a file-like object
- # that points to a certain byte range within the original file. We
- # set bytes to never exceed the original file size.
- for i in range(chunk_count + 1):
- print('Uploading file chunk: {0} of {1}'.format(i + 1, chunk_count + 1))
- offset = chunk_size * i
- bytes = min(chunk_size, source_size - offset)
- with FileChunkIO(source_path, 'r', offset=offset, bytes=bytes) as fp:
- mp.upload_part_from_file(fp, part_num=i + 1)
- # Finish the upload
- completed_upload = mp.complete_upload()
- return completed_upload
- def delete_local_backup_file(filename):
- print('Deleting file from local filesystem:{0}'.format(filename))
- os.remove(filename)
- if __name__ == '__main__':
- # grab all the arguments
- arguments = docopt(__doc__, version='mongoHQ_s3_backup 0.0.1')
- database_name = arguments['--database']
- account_name = arguments['--account']
- oauth_token = arguments['--token']
- bucket = arguments['--bucket']
- aws_key = arguments['--awskey']
- aws_secret = arguments['--awssecret']
- prefix = arguments['--prefix']
- # first, fetch the backup
- filename = get_backup(database_name, account_name, oauth_token)
- if not filename:
- # we failed to save the backup successfully.
- sys.exit(1)
- # now, store the file we just downloaded up on S3
- print('Uploading file to S3. Bucket:{0}'.format(bucket))
- s3_success = upload_to_s3(prefix + filename, filename, bucket, aws_key, aws_secret)
- if not s3_success:
- # somehow failed the file upload
- print('Failure with S3 upload. Exiting...')
- sys.exit(1)
- print('Upload to S3 completed successfully')
- # Delete the local backup file, to not take up excessive disk space
- delete_local_backup_file(filename)
|