deploy.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. import os
  2. import re
  3. import argparse
  4. import logging
  5. import gzip
  6. import shutil
  7. import mimetypes
  8. from datetime import datetime
  9. import boto3
  10. from six import BytesIO
  11. from . import config
  12. from .prefixcovertree import PrefixCoverTree
  13. # Support UTC timezone in 2.7
  14. try:
  15. from datetime import timezone
  16. UTC = timezone.utc
  17. except ImportError:
  18. from datetime import tzinfo, timedelta
  19. class UTCTz(tzinfo):
  20. def utcoffset(self, dt):
  21. return timedelta(0)
  22. def tzname(self, dt):
  23. return 'UTC'
  24. def dst(self, dt):
  25. return timedelta(0)
  26. UTC = UTCTz()
  27. COMPRESSED_EXTENSIONS = frozenset([
  28. '.txt', '.html', '.css', '.js', '.json', '.xml', '.rss', '.ico', '.svg'])
  29. _STORAGE_STANDARD = 'STANDARD'
  30. _STORAGE_REDUCED_REDUDANCY = 'REDUCED_REDUNDANCY'
  31. logger = logging.getLogger(__name__)
  32. mimetypes.init()
  33. def key_name_from_path(path):
  34. """Convert a relative path into a key name."""
  35. key_parts = []
  36. while True:
  37. head, tail = os.path.split(path)
  38. if tail != '.':
  39. key_parts.append(tail)
  40. if head == '':
  41. break
  42. path = head
  43. return '/'.join(reversed(key_parts))
  44. def upload_key(obj, path, cache_rules, dry, storage_class=None):
  45. """Upload data in path to key."""
  46. mime_guess = mimetypes.guess_type(obj.key)
  47. if mime_guess is not None:
  48. content_type = mime_guess[0]
  49. else:
  50. content_type = 'application/octet-stream'
  51. content_file = open(path, 'rb')
  52. try:
  53. encoding = None
  54. cache_control = config.resolve_cache_rules(obj.key, cache_rules)
  55. if cache_control is not None:
  56. logger.debug('Using cache control: {}'.format(cache_control))
  57. _, ext = os.path.splitext(path)
  58. if ext in COMPRESSED_EXTENSIONS:
  59. logger.info('Compressing {}...'.format(obj.key))
  60. compressed = BytesIO()
  61. gzip_file = gzip.GzipFile(
  62. fileobj=compressed, mode='wb', compresslevel=9)
  63. try:
  64. shutil.copyfileobj(content_file, gzip_file)
  65. finally:
  66. gzip_file.close()
  67. compressed.seek(0)
  68. content_file, _ = compressed, content_file.close() # noqa
  69. encoding = 'gzip'
  70. logger.info('Uploading {}...'.format(obj.key))
  71. if not dry:
  72. kwargs = {}
  73. if content_type is not None:
  74. kwargs['ContentType'] = content_type
  75. if cache_control is not None:
  76. kwargs['CacheControl'] = cache_control
  77. if encoding is not None:
  78. kwargs['ContentEncoding'] = encoding
  79. if storage_class is not None:
  80. kwargs['StorageClass'] = storage_class
  81. obj.put(Body=content_file.read(), **kwargs)
  82. finally:
  83. content_file.close()
  84. def deploy(conf, base_path, force, dry):
  85. """Deploy using given configuration."""
  86. bucket_name = conf['s3_bucket']
  87. cache_rules = conf.get('cache_rules', [])
  88. endpoint_url = conf.get('endpoint_url')
  89. if conf.get('s3_reduced_redundancy', False):
  90. storage_class = _STORAGE_REDUCED_REDUDANCY
  91. else:
  92. storage_class = _STORAGE_STANDARD
  93. logger.info('Connecting to bucket {}...'.format(bucket_name))
  94. s3 = boto3.resource('s3', endpoint_url=endpoint_url)
  95. bucket = s3.Bucket(bucket_name)
  96. site_dir = os.path.join(base_path, conf['site'])
  97. logger.info('Site: {}'.format(site_dir))
  98. processed_keys = set()
  99. updated_keys = set()
  100. for obj in bucket.objects.all():
  101. processed_keys.add(obj.key)
  102. path = os.path.join(site_dir, obj.key)
  103. # Delete keys that have been deleted locally
  104. if not os.path.isfile(path):
  105. logger.info('Deleting {}...'.format(obj.key))
  106. if not dry:
  107. obj.delete()
  108. updated_keys.add(obj.key)
  109. continue
  110. # Skip keys that have not been updated
  111. mtime = datetime.fromtimestamp(os.path.getmtime(path), UTC)
  112. if not force:
  113. if (mtime <= obj.last_modified and
  114. obj.storage_class == storage_class):
  115. logger.info('Not modified, skipping {}.'.format(obj.key))
  116. continue
  117. upload_key(obj, path, cache_rules, dry, storage_class=storage_class)
  118. updated_keys.add(obj.key)
  119. for dirpath, dirnames, filenames in os.walk(site_dir):
  120. key_base = os.path.relpath(dirpath, site_dir)
  121. for name in filenames:
  122. path = os.path.join(dirpath, name)
  123. key_name = key_name_from_path(os.path.join(key_base, name))
  124. if key_name in processed_keys:
  125. continue
  126. # Create new object
  127. obj = bucket.Object(key_name)
  128. logger.info('Creating key {}...'.format(obj.key))
  129. upload_key(
  130. obj, path, cache_rules, dry, storage_class=storage_class)
  131. updated_keys.add(key_name)
  132. logger.info('Bucket update done.')
  133. # Invalidate files in cloudfront distribution
  134. if 'cloudfront_distribution_id' in conf:
  135. logger.info('Connecting to Cloudfront distribution {}...'.format(
  136. conf['cloudfront_distribution_id']))
  137. index_pattern = None
  138. if 'index_document' in conf:
  139. index_doc = conf['index_document']
  140. index_pattern = r'(^(?:.*/)?)' + re.escape(index_doc) + '$'
  141. def path_from_key_name(key_name):
  142. if index_pattern is not None:
  143. m = re.match(index_pattern, key_name)
  144. if m:
  145. return m.group(1)
  146. return key_name
  147. t = PrefixCoverTree()
  148. for key_name in updated_keys:
  149. t.include(path_from_key_name(key_name))
  150. for key_name in processed_keys - updated_keys:
  151. t.exclude(path_from_key_name(key_name))
  152. paths = []
  153. for prefix, exact in t.matches():
  154. path = '/' + prefix + ('' if exact else '*')
  155. logger.info('Preparing to invalidate {}...'.format(path))
  156. paths.append(path)
  157. invalidate_paths(conf['cloudfront_distribution_id'], paths, dry)
  158. def invalidate_paths(dist_id, paths, dry):
  159. """Invalidate CloudFront distribution paths."""
  160. cloudfront = boto3.client('cloudfront')
  161. if len(paths) > 0:
  162. if not dry:
  163. logger.info('Creating invalidation request...')
  164. response = cloudfront.create_invalidation(
  165. DistributionId=dist_id,
  166. InvalidationBatch=dict(
  167. Paths=dict(
  168. Quantity=len(paths),
  169. Items=paths
  170. ),
  171. CallerReference='s3-deploy-website'
  172. )
  173. )
  174. invalidation = response['Invalidation']
  175. logger.info('Invalidation request {} is {}'.format(
  176. invalidation['Id'], invalidation['Status']))
  177. else:
  178. logger.info('Nothing updated, invalidation skipped.')
  179. def main(command_args=None):
  180. logging.basicConfig(level=logging.INFO)
  181. logging.getLogger('boto3').setLevel(logging.WARNING)
  182. parser = argparse.ArgumentParser(
  183. description='AWS S3 website deployment tool')
  184. parser.add_argument(
  185. '-f', '--force', action='store_true', dest='force',
  186. help='force upload of all files')
  187. parser.add_argument(
  188. '-n', '--dry-run', action='store_true', dest='dry',
  189. help='run without uploading any files')
  190. parser.add_argument(
  191. 'path', help='the .s3_website.yaml configuration file or directory',
  192. default='.', nargs='?')
  193. args = parser.parse_args(command_args)
  194. # Open configuration file
  195. conf, base_path = config.load_config_file(args.path)
  196. deploy(conf, base_path, args.force, args.dry)