compose_s3_backup.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. #! /usr/bin/env python
  2. """Compose API Backup to S3 Utility.
  3. Uses the Compose (MongoHQ) API to pull the latest backup for a database, and put the file on Amazon S3
  4. Usage:
  5. mongoHQ_S3_backup.py -d <database_name> -t <oauthToken> -a <account_name> -b <bucket> -k <aws_key_id> -s <aws_secret> -p <s3_key_prefix>
  6. mongoHQ_S3_backup.py (-h | --help)
  7. Options:
  8. -h --help Show this screen.
  9. -d <database_name> --database=<database_name> Name of the database to find a backup for, or deployment name.
  10. -t <oauth_token> --token=<oauth_token> MongoHQ OAUTH Token
  11. -a <account_name> --account=<account_name> MongoHQ Account Name
  12. -b <bucket> --bucket=<bucket> S3 Bucket name
  13. -k <aws_key_id> --awskey=<aws_key_id> AWS Key ID
  14. -s <aws_secret> --awssecret=<aws_secret> AWS Secret Key
  15. -p <s3_key_prefix> --prefix=<s3_key_prefix Prefixes filename of S3 object [default: '']
  16. """
  17. import requests
  18. import math
  19. import os
  20. import sys
  21. from docopt import docopt
  22. import boto
  23. from filechunkio import FileChunkIO
  24. # Compose/MongoHQ API docs
  25. # http://support.mongohq.com/rest-api/2014-06/backups.html
  26. # Gets the latest backup for a given database and account.
  27. def get_backup(database_name, account_name, oauth_token):
  28. mongohq_url = 'https://api.mongohq.com/accounts/{0}/backups'.format(account_name)
  29. headers = {'Accept-Version': '2014-06', 'Content-Type': 'application/json',
  30. 'Authorization': 'Bearer {0}'.format(oauth_token)}
  31. # get the list of backups for our account.
  32. r = requests.get(mongohq_url, headers=headers)
  33. if r.status_code != 200:
  34. print('Unable to list backups!')
  35. return None
  36. all_backups = r.json()
  37. backups_for_this_database = list()
  38. for backup in all_backups:
  39. if database_name in backup['database_names'] or database_name == backup['deployment']:
  40. backups_for_this_database.append(
  41. {'id': backup['id'], 'created_at': backup['created_at'], 'filename': backup['filename']})
  42. if len(backups_for_this_database) == 0:
  43. print('No Backups found for database name:{0}. Exiting...'.format(database_name))
  44. sys.exit(1)
  45. # search for the latest backup for the given database name
  46. latest = sorted(backups_for_this_database, key=lambda k: k['created_at'])[-1]
  47. print('The latest backup for {0} is: {1} created at {2}'.format(database_name, latest['id'], latest['created_at']))
  48. backup_filename = latest['filename']
  49. # pull down the backup
  50. r2 = requests.get('{0}/{1}/download'.format(mongohq_url, latest['id']), headers=headers, allow_redirects=False)
  51. if r2.status_code != 302:
  52. return None
  53. # MongoHQ backup API redirects to a URL where the backup file can be downloaded.
  54. # TODO: Can the 302 be followed in one step?
  55. file_location = r2.headers['location']
  56. # download the file to disk. Stream, since the file could potentially be large
  57. print('Downloading Backup from:{0}'.format(file_location))
  58. r3 = requests.get(file_location, stream=True)
  59. with open(backup_filename, 'wb') as f:
  60. for chunk in r3.iter_content(chunk_size=1024):
  61. if chunk: # filter out keep-alive new chunks
  62. f.write(chunk)
  63. f.flush()
  64. print('saved backup to file: {0}'.format(backup_filename))
  65. return backup_filename
  66. # Using S3 Multipart upload to handle potentially large files
  67. def upload_to_s3(s3key, filename, bucket, aws_key, aws_secret):
  68. conn = boto.connect_s3(aws_key, aws_secret)
  69. bucket = conn.get_bucket(bucket)
  70. # Get file info
  71. source_path = filename
  72. source_size = os.stat(source_path).st_size
  73. # Create a multipart upload request
  74. mp = bucket.initiate_multipart_upload(s3key)
  75. # Use a chunk size of 50 MiB
  76. chunk_size = 52428800
  77. chunk_count = int(math.ceil(source_size / chunk_size))
  78. # Send the file parts, using FileChunkIO to create a file-like object
  79. # that points to a certain byte range within the original file. We
  80. # set bytes to never exceed the original file size.
  81. for i in range(chunk_count + 1):
  82. print('Uploading file chunk: {0} of {1}'.format(i + 1, chunk_count + 1))
  83. offset = chunk_size * i
  84. bytes = min(chunk_size, source_size - offset)
  85. with FileChunkIO(source_path, 'r', offset=offset, bytes=bytes) as fp:
  86. mp.upload_part_from_file(fp, part_num=i + 1)
  87. # Finish the upload
  88. completed_upload = mp.complete_upload()
  89. return completed_upload
  90. def delete_local_backup_file(filename):
  91. print('Deleting file from local filesystem:{0}'.format(filename))
  92. os.remove(filename)
  93. if __name__ == '__main__':
  94. # grab all the arguments
  95. arguments = docopt(__doc__, version='mongoHQ_s3_backup 0.0.1')
  96. database_name = arguments['--database']
  97. account_name = arguments['--account']
  98. oauth_token = arguments['--token']
  99. bucket = arguments['--bucket']
  100. aws_key = arguments['--awskey']
  101. aws_secret = arguments['--awssecret']
  102. prefix = arguments['--prefix']
  103. # first, fetch the backup
  104. filename = get_backup(database_name, account_name, oauth_token)
  105. if not filename:
  106. # we failed to save the backup successfully.
  107. sys.exit(1)
  108. # now, store the file we just downloaded up on S3
  109. print('Uploading file to S3. Bucket:{0}'.format(bucket))
  110. s3_success = upload_to_s3(prefix + filename, filename, bucket, aws_key, aws_secret)
  111. if not s3_success:
  112. # somehow failed the file upload
  113. print('Failure with S3 upload. Exiting...')
  114. sys.exit(1)
  115. print('Upload to S3 completed successfully')
  116. # Delete the local backup file, to not take up excessive disk space
  117. delete_local_backup_file(filename)