herokupostgres_s3_backup.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. #! /usr/bin/env python
  2. """Heroku Postgres Backup to S3 Utility.
  3. Uses the Heroku PG-Backups system to pull the latest backup for a database, and put the file on Amazon S3.
  4. Unfortunately, depends on the heroku toolbelt, since there is no standard API for PGBackups (that we have found).
  5. Be sure that you are logged in to the heroku toolbelt before you run this script, and that it is in your $PATH.
  6. Usage:
  7. herokupostgres_s3_backup.py -r <path_to_heroku> -a <app_name> -b <bucket> -k <aws_key_id> -s <aws_secret> -p <s3_key_prefix>
  8. herokupostgres_s3_backup.py (-h | --help)
  9. Options:
  10. -h --help Show this screen.
  11. -a <app_name> --app=<app_name> Heroku App name.
  12. -r <path_to_heroku> --herokupath=<path_to_heroku> location where the heroku executable lives, needs trailing slash
  13. -b <bucket> --bucket=<bucket> S3 Bucket name
  14. -k <aws_key_id> --awskey=<aws_key_id> AWS Key ID
  15. -s <aws_secret> --awssecret=<aws_secret> AWS Secret Key
  16. -p <s3_key_prefix> --prefix=<s3_key_prefix Prefixes filename of S3 object
  17. """
  18. import requests
  19. import math
  20. import os
  21. import sys
  22. import datetime
  23. import subprocess
  24. from docopt import docopt
  25. import boto
  26. from filechunkio import FileChunkIO
  27. # Gets the latest backup for a given app
  28. # Relies on the heroku cli toolbelt to talk to PGBackups
  29. def get_backup(heroku_path, app_name):
  30. # first, get the heroku pgbackups:url from the heroku toolbelt
  31. print('Looking up backup URL for:{0}'.format(app_name))
  32. # 'Shelling out' isn't ideal in this situation, but it is the path of least resistance for now.
  33. backup_url = subprocess.check_output(heroku_path + 'heroku pgbackups:url --app {0}'.format(app_name),
  34. shell=True).rstrip()
  35. # download the file to disk. Stream, since the file could potentially be large
  36. print('Downloading backup from:{0}'.format(backup_url))
  37. # We need to timestamp our own, since the backup url just gets the 'latest'
  38. backup_filename = app_name + '-' + datetime.datetime.now().isoformat()
  39. r = requests.get(backup_url, stream=True)
  40. with open(backup_filename, 'wb') as f:
  41. for chunk in r.iter_content(chunk_size=1024):
  42. if chunk: # filter out keep-alive new chunks
  43. f.write(chunk)
  44. f.flush()
  45. print('saved backup to file: {0}'.format(backup_filename))
  46. return backup_filename
  47. # Using S3 Multipart upload to handle potentially large files
  48. def upload_to_s3(s3key, filename, bucket, aws_key, aws_secret):
  49. conn = boto.connect_s3(aws_key, aws_secret)
  50. bucket = conn.get_bucket(bucket)
  51. # Get file info
  52. source_path = filename
  53. source_size = os.stat(source_path).st_size
  54. # Create a multipart upload request
  55. mp = bucket.initiate_multipart_upload(s3key)
  56. # Use a chunk size of 50 MiB
  57. chunk_size = 52428800
  58. chunk_count = int(math.ceil(source_size / chunk_size))
  59. # Send the file parts, using FileChunkIO to create a file-like object
  60. # that points to a certain byte range within the original file. We
  61. # set bytes to never exceed the original file size.
  62. for i in range(chunk_count + 1):
  63. print('Uploading file chunk: {0} of {1}'.format(i + 1, chunk_count + 1))
  64. offset = chunk_size * i
  65. bytes = min(chunk_size, source_size - offset)
  66. with FileChunkIO(source_path, 'r', offset=offset, bytes=bytes) as fp:
  67. mp.upload_part_from_file(fp, part_num=i + 1)
  68. # Finish the upload
  69. completed_upload = mp.complete_upload()
  70. return completed_upload
  71. def delete_local_backup_file(filename):
  72. print('Deleting file from local filesystem:{0}'.format(filename))
  73. os.remove(filename)
  74. if __name__ == '__main__':
  75. # grab all the arguments
  76. arguments = docopt(__doc__, version='herokupostgres_s3_backup 0.0.1')
  77. app_name = arguments['--app']
  78. heroku_path = arguments['--herokupath']
  79. bucket = arguments['--bucket']
  80. aws_key = arguments['--awskey']
  81. aws_secret = arguments['--awssecret']
  82. prefix = arguments['--prefix']
  83. # first, fetch the backup
  84. filename = get_backup(heroku_path, app_name)
  85. if not filename:
  86. # we failed to save the backup successfully.
  87. sys.exit(1)
  88. # now, store the file we just downloaded up on S3
  89. print('Uploading file to S3. Bucket:{0}'.format(bucket))
  90. s3_success = upload_to_s3(prefix + filename, filename, bucket, aws_key, aws_secret)
  91. if not s3_success:
  92. # somehow failed the file upload
  93. print('Failure with S3 upload. Exiting...')
  94. sys.exit(1)
  95. print('Upload to S3 completed successfully')
  96. # Delete the local backup file, to not take up excessive disk space
  97. delete_local_backup_file(filename)