s3sync_pending.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. """
  2. Upload pending Media to S3
  3. ==========================
  4. Django command that retrieves all files from your chosen cache backend
  5. and uploads them to S3. Useful to run as a cron to sync files periodically,
  6. in conjunction with the storage backend for URLs.
  7. Note: This script requires the Python boto library and valid Amazon Web
  8. Services API keys.
  9. Required settings.py variables:
  10. AWS_ACCESS_KEY_ID = ''
  11. AWS_SECRET_ACCESS_KEY = ''
  12. BUCKET_UPLOADS = 'bucket-name.yourdomain.com'
  13. Command options are:
  14. -p PREFIX, --prefix=PREFIX
  15. The prefix to prepend to the path on S3.
  16. -d DIRECTORY, --dir=DIRECTORY
  17. The root directory to use instead of your MEDIA_ROOT
  18. --remove-missing
  19. Remove any existing keys from the bucket that are not
  20. present in your local. DANGEROUS!
  21. --dry-run
  22. Do a dry-run to show what files would be affected.
  23. """
  24. import optparse
  25. from django.conf import settings
  26. from django.core.management.base import BaseCommand, CommandError
  27. import boto
  28. from s3sync.storage import cache
  29. from s3sync.utils import (ConfigMissingError, get_aws_info, get_bucket_and_key,
  30. get_pending_key, get_pending_delete_key,
  31. upload_file_to_s3)
  32. class Command(BaseCommand):
  33. # Extra variables to avoid passing these around
  34. upload_count = 0
  35. remaining_count = 0
  36. deleted_count = 0
  37. remaining_delete_count = 0
  38. option_list = BaseCommand.option_list + (
  39. optparse.make_option('-p', '--prefix',
  40. dest='prefix',
  41. default='',
  42. help="The prefix to prepend to the path on S3."),
  43. optparse.make_option('-d', '--dir',
  44. dest='dir',
  45. default='',
  46. help="The root directory to use instead of your MEDIA_ROOT"),
  47. optparse.make_option('--remove-missing',
  48. action='store_true', dest='remove_missing', default=False,
  49. help="Remove keys in the bucket for files locally missing."),
  50. optparse.make_option('--dry-run',
  51. action='store_true', dest='dry_run', default=False,
  52. help="Do a dry-run to show what files would be affected."),
  53. )
  54. help = 'Uploads the pending files from cache key.'
  55. def handle(self, *args, **options):
  56. # Check for AWS keys in settings
  57. try:
  58. get_aws_info()
  59. except ConfigMissingError:
  60. raise CommandError('Missing AWS keys from settings file. ' +
  61. ' Please supply both AWS_ACCESS_KEY_ID and ' +
  62. 'AWS_SECRET_ACCESS_KEY.')
  63. self.DIRECTORY = options.get('dir')
  64. if not self.DIRECTORY:
  65. self.DIRECTORY = getattr(settings, 'MEDIA_ROOT', '')
  66. if not self.DIRECTORY:
  67. raise CommandError('Empty directory. Define MEDIA_ROOT or use '
  68. ' --dir=dirname')
  69. self.verbosity = int(options.get('verbosity'))
  70. self.prefix = options.get('prefix')
  71. self.remove_missing = options.get('remove_missing')
  72. self.dry_run = options.get('dry_run')
  73. if not hasattr(settings, 'BUCKET_UPLOADS'):
  74. raise CommandError('Please specify the name of your upload bucket.'
  75. ' Set BUCKET_UPLOADS in your settings.py')
  76. self.bucket, self.key = get_bucket_and_key(settings.BUCKET_UPLOADS)
  77. # Now call the syncing method to walk the MEDIA_ROOT directory and
  78. # upload all files found.
  79. self.upload_pending_to_s3()
  80. if self.remove_missing:
  81. self.delete_pending_from_s3()
  82. print ("%d files uploaded (%d remaining)." % (self.upload_count,
  83. self.remaining_count))
  84. if self.remove_missing:
  85. print ("%d files deleted (%s remaining)." % (self.deleted_count,
  86. self.remaining_delete_count))
  87. if self.dry_run:
  88. print ('THIS IS A DRY RUN, NO ACTUAL CHANGES.')
  89. def delete_pending_from_s3(self):
  90. """Gets the pending filenames from cache and deletes them."""
  91. pending_delete_key = get_pending_delete_key()
  92. pending = cache.get(pending_delete_key, [])
  93. remaining = []
  94. for i, file_key in enumerate(pending):
  95. prefixed_file_key = '%s/%s' % (self.prefix, file_key)
  96. if self.verbosity > 0:
  97. print ("Deleting %s..." % prefixed_file_key)
  98. if self.dry_run:
  99. self.deleted_count += 1
  100. continue
  101. failed = True
  102. try:
  103. self.bucket.delete_key(prefixed_file_key)
  104. except boto.exception.S3ResponseError as e:
  105. # TODO: retry to delete a few times
  106. print ("Failed to delete: %s" % e)
  107. except Exception as e:
  108. print (e)
  109. raise
  110. else:
  111. failed = False
  112. self.deleted_count += 1
  113. finally:
  114. if failed:
  115. remaining.append(file_key)
  116. self.remaining_delete_count += 1
  117. if not self.dry_run:
  118. cache.set(pending_delete_key, remaining)
  119. def upload_pending_to_s3(self):
  120. """Gets the pending filenames from cache and uploads them."""
  121. pending_key = get_pending_key()
  122. pending = cache.get(pending_key, [])
  123. remaining = []
  124. for i, file_key in enumerate(pending):
  125. prefixed_file_key = '%s/%s' % (self.prefix, file_key)
  126. if self.verbosity > 0:
  127. print ("Uploading %s..." % prefixed_file_key)
  128. if self.dry_run:
  129. self.upload_count += 1
  130. continue
  131. filename = self.DIRECTORY + '/' + file_key
  132. failed = True
  133. try:
  134. upload_file_to_s3(prefixed_file_key, filename, self.key,
  135. do_gzip=True, do_expires=True)
  136. except boto.exception.S3CreateError as e:
  137. # TODO: retry to create a few times
  138. print ("Failed to upload: %s" % e)
  139. except Exception as e:
  140. print (e)
  141. raise
  142. else:
  143. failed = False
  144. self.upload_count += 1
  145. cache.delete(file_key)
  146. finally:
  147. if failed:
  148. remaining.append(file_key)
  149. self.remaining_count += 1
  150. if not self.dry_run:
  151. cache.set(pending_key, remaining)