LiuFan
/
PrivacyScanData


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
							
import os
import re
import argparse
import logging
import gzip
import shutil
import mimetypes
from datetime import datetime

import boto3

from six import BytesIO

from . import config
from .prefixcovertree import PrefixCoverTree

# Support UTC timezone in 2.7
try:
    from datetime import timezone
    UTC = timezone.utc
except ImportError:
    from datetime import tzinfo, timedelta

    class UTCTz(tzinfo):
        def utcoffset(self, dt):
            return timedelta(0)

        def tzname(self, dt):
            return 'UTC'

        def dst(self, dt):
            return timedelta(0)

    UTC = UTCTz()


COMPRESSED_EXTENSIONS = frozenset([
    '.txt', '.html', '.css', '.js', '.json', '.xml', '.rss', '.ico', '.svg'])

_STORAGE_STANDARD = 'STANDARD'
_STORAGE_REDUCED_REDUDANCY = 'REDUCED_REDUNDANCY'

logger = logging.getLogger(__name__)

mimetypes.init()


def key_name_from_path(path):
    """Convert a relative path into a key name."""
    key_parts = []
    while True:
        head, tail = os.path.split(path)
        if tail != '.':
            key_parts.append(tail)
        if head == '':
            break
        path = head

    return '/'.join(reversed(key_parts))


def upload_key(obj, path, cache_rules, dry, storage_class=None):
    """Upload data in path to key."""

    mime_guess = mimetypes.guess_type(obj.key)
    if mime_guess is not None:
        content_type = mime_guess[0]
    else:
        content_type = 'application/octet-stream'

    content_file = open(path, 'rb')
    try:
        encoding = None

        cache_control = config.resolve_cache_rules(obj.key, cache_rules)
        if cache_control is not None:
            logger.debug('Using cache control: {}'.format(cache_control))

        _, ext = os.path.splitext(path)
        if ext in COMPRESSED_EXTENSIONS:
            logger.info('Compressing {}...'.format(obj.key))
            compressed = BytesIO()
            gzip_file = gzip.GzipFile(
                fileobj=compressed, mode='wb', compresslevel=9)
            try:
                shutil.copyfileobj(content_file, gzip_file)
            finally:
                gzip_file.close()
            compressed.seek(0)
            content_file, _ = compressed, content_file.close()  # noqa
            encoding = 'gzip'

        logger.info('Uploading {}...'.format(obj.key))

        if not dry:
            kwargs = {}
            if content_type is not None:
                kwargs['ContentType'] = content_type
            if cache_control is not None:
                kwargs['CacheControl'] = cache_control

            if encoding is not None:
                kwargs['ContentEncoding'] = encoding

            if storage_class is not None:
                kwargs['StorageClass'] = storage_class

            obj.put(Body=content_file.read(), **kwargs)
    finally:
        content_file.close()


def deploy(conf, base_path, force, dry):
    """Deploy using given configuration."""
    bucket_name = conf['s3_bucket']
    cache_rules = conf.get('cache_rules', [])
    endpoint_url = conf.get('endpoint_url')

    if conf.get('s3_reduced_redundancy', False):
        storage_class = _STORAGE_REDUCED_REDUDANCY
    else:
        storage_class = _STORAGE_STANDARD

    logger.info('Connecting to bucket {}...'.format(bucket_name))

    s3 = boto3.resource('s3', endpoint_url=endpoint_url)
    bucket = s3.Bucket(bucket_name)

    site_dir = os.path.join(base_path, conf['site'])

    logger.info('Site: {}'.format(site_dir))

    processed_keys = set()
    updated_keys = set()

    for obj in bucket.objects.all():
        processed_keys.add(obj.key)
        path = os.path.join(site_dir, obj.key)

        # Delete keys that have been deleted locally
        if not os.path.isfile(path):
            logger.info('Deleting {}...'.format(obj.key))
            if not dry:
                obj.delete()
            updated_keys.add(obj.key)
            continue

        # Skip keys that have not been updated
        mtime = datetime.fromtimestamp(os.path.getmtime(path), UTC)
        if not force:
            if (mtime <= obj.last_modified and
                    obj.storage_class == storage_class):
                logger.info('Not modified, skipping {}.'.format(obj.key))
                continue

        upload_key(obj, path, cache_rules, dry, storage_class=storage_class)
        updated_keys.add(obj.key)

    for dirpath, dirnames, filenames in os.walk(site_dir):
        key_base = os.path.relpath(dirpath, site_dir)
        for name in filenames:
            path = os.path.join(dirpath, name)
            key_name = key_name_from_path(os.path.join(key_base, name))
            if key_name in processed_keys:
                continue

            # Create new object
            obj = bucket.Object(key_name)

            logger.info('Creating key {}...'.format(obj.key))

            upload_key(
                obj, path, cache_rules, dry, storage_class=storage_class)
            updated_keys.add(key_name)

    logger.info('Bucket update done.')

    # Invalidate files in cloudfront distribution
    if 'cloudfront_distribution_id' in conf:
        logger.info('Connecting to Cloudfront distribution {}...'.format(
            conf['cloudfront_distribution_id']))

        index_pattern = None
        if 'index_document' in conf:
            index_doc = conf['index_document']
            index_pattern = r'(^(?:.*/)?)' + re.escape(index_doc) + '$'

        def path_from_key_name(key_name):
            if index_pattern is not None:
                m = re.match(index_pattern, key_name)
                if m:
                    return m.group(1)
            return key_name

        t = PrefixCoverTree()
        for key_name in updated_keys:
            t.include(path_from_key_name(key_name))
        for key_name in processed_keys - updated_keys:
            t.exclude(path_from_key_name(key_name))

        paths = []
        for prefix, exact in t.matches():
            path = '/' + prefix + ('' if exact else '*')
            logger.info('Preparing to invalidate {}...'.format(path))
            paths.append(path)

        invalidate_paths(conf['cloudfront_distribution_id'], paths, dry)


def invalidate_paths(dist_id, paths, dry):
    """Invalidate CloudFront distribution paths."""
    cloudfront = boto3.client('cloudfront')
    if len(paths) > 0:
        if not dry:
            logger.info('Creating invalidation request...')
            response = cloudfront.create_invalidation(
                DistributionId=dist_id,
                InvalidationBatch=dict(
                    Paths=dict(
                        Quantity=len(paths),
                        Items=paths
                    ),
                    CallerReference='s3-deploy-website'
                )
            )
            invalidation = response['Invalidation']
            logger.info('Invalidation request {} is {}'.format(
                invalidation['Id'], invalidation['Status']))
    else:
        logger.info('Nothing updated, invalidation skipped.')


def main(command_args=None):
    logging.basicConfig(level=logging.INFO)
    logging.getLogger('boto3').setLevel(logging.WARNING)

    parser = argparse.ArgumentParser(
        description='AWS S3 website deployment tool')
    parser.add_argument(
        '-f', '--force', action='store_true', dest='force',
        help='force upload of all files')
    parser.add_argument(
        '-n', '--dry-run', action='store_true', dest='dry',
        help='run without uploading any files')
    parser.add_argument(
        'path', help='the .s3_website.yaml configuration file or directory',
        default='.', nargs='?')
    args = parser.parse_args(command_args)

    # Open configuration file
    conf, base_path = config.load_config_file(args.path)

    deploy(conf, base_path, args.force, args.dry)