s3sync_media_6.py 3.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. def upload_s3(self, arg, dirname, names):
  2. """
  3. This is the callback to os.path.walk and where much of the work happens
  4. """
  5. bucket, key, bucket_name, root_dir = arg
  6. # Skip files and directories we don't want to sync
  7. for pattern in self.EXCLUDE_LIST:
  8. if fnmatch(os.path.basename(dirname), pattern):
  9. if self.verbosity > 1:
  10. print ('Skipping: %s (rule: %s)' % (names, pattern))
  11. del names[:]
  12. return
  13. # Later we assume the MEDIA_ROOT ends with a trailing slash
  14. if not root_dir.endswith(os.path.sep):
  15. root_dir = root_dir + os.path.sep
  16. list_prefix = dirname[len(root_dir):]
  17. if self.prefix:
  18. list_prefix = '%s/%s' % (self.prefix, list_prefix)
  19. s3_list = bucket_lister(bucket, prefix=list_prefix)
  20. for name in names:
  21. bad_name = False
  22. for pattern in self.EXCLUDE_LIST:
  23. if fnmatch(name, pattern):
  24. bad_name = True # Skip files we don't want to sync
  25. if bad_name:
  26. if self.verbosity > 1:
  27. print ('Skipping: %s (rule: %s)' % (names, pattern))
  28. continue
  29. filename = os.path.join(dirname, name)
  30. if os.path.isdir(filename):
  31. continue # Don't try to upload directories
  32. file_key = filename[len(root_dir):]
  33. if self.prefix:
  34. file_key = '%s/%s' % (self.prefix, file_key)
  35. # Check if file on S3 is older than local file, if so, upload
  36. # TODO: check if hash chunk corresponds
  37. if not self.do_force:
  38. s3_key = self.find_key_in_list(s3_list, file_key)
  39. if s3_key:
  40. s3_datetime = datetime.datetime(*time.strptime(
  41. s3_key.last_modified, '%Y-%m-%dT%H:%M:%S.000Z')[0:6])
  42. local_datetime = datetime.datetime.utcfromtimestamp(
  43. os.stat(filename).st_mtime)
  44. if local_datetime < s3_datetime:
  45. self.skip_count += 1
  46. if self.verbosity > 1:
  47. print ("File %s hasn't been modified since last " \
  48. "being uploaded" % (file_key))
  49. if file_key in self.s3_files:
  50. self.files_processed.add(file_key)
  51. del self.s3_files[file_key]
  52. continue
  53. if file_key in self.s3_files:
  54. self.files_processed.add(file_key)
  55. del self.s3_files[file_key]
  56. # File is newer, let's process and upload
  57. if self.verbosity > 0:
  58. print ("Uploading %s..." % file_key)
  59. if self.dry_run:
  60. self.upload_count += 1
  61. continue
  62. try:
  63. upload_file_to_s3(file_key, filename, key,
  64. do_gzip=self.do_gzip, do_expires=self.do_expires,
  65. verbosity=self.verbosity)
  66. except boto.exception.S3CreateError as e:
  67. # TODO: retry to create a few times
  68. print ("Failed to upload: %s" % e)
  69. except Exception as e:
  70. print (e)
  71. raise
  72. else:
  73. self.upload_count += 1
  74. # If we don't care about what's missing, wipe this to save memory.
  75. if not self.remove_missing:
  76. self.s3_files = {}
  77. else:
  78. self.finish_list(s3_list)