"""
Sync Media to S3
================

Django command that scans all files in your settings.MEDIA_ROOT folder and
uploads them to S3 with the same directory structure.

This command can optionally do the following but it is off by default:
* gzip compress any CSS and Javascript files it finds and adds the appropriate
  'Content-Encoding' header.
* set a far future 'Expires' header for optimal caching.

Note: This script requires the Python boto library and valid Amazon Web
Services API keys.

Required settings.py variables:
AWS_ACCESS_KEY_ID = ''
AWS_SECRET_ACCESS_KEY = ''

Command options are:
  -b BUCKET, --bucket=BUCKET
                        The name of the Amazon bucket you are uploading to.
  -p PREFIX, --prefix=PREFIX
                        The prefix to prepend to the path on S3.
  -d DIRECTORY, --dir=DIRECTORY
                        The root directory to use instead of your MEDIA_ROOT
  --gzip                Enables gzipping CSS and Javascript files.
  --expires             Enables setting a far future expires header.
  --force               Skip the file mtime check to force upload of all
                        files.
  --remove-missing
                        Remove any existing keys from the bucket that are not
                        present in your local. DANGEROUS!
  --exclude-list        Override default directory and file exclusion
                        filters. (enter as comma separated line)
  --dry-run
                        Do A dry-run to show what files would be affected.

"""
import datetime
from fnmatch import fnmatch
try:
    from hashlib import md5
except ImportError:
    from md5 import md5
import optparse
import os
import time

from django.conf import settings
from django.core.management.base import BaseCommand, CommandError

from s3sync.utils import (get_aws_info, get_bucket_and_key, ConfigMissingError,
    upload_file_to_s3)

# Make sure boto is available
try:
    import boto
    import boto.exception
    from boto.s3.bucketlistresultset import bucket_lister
except ImportError:
    raise ImportError("The boto Python library is not installed.")


class Command(BaseCommand):
    # Extra variables to avoid passing these around
    AWS_ACCESS_KEY_ID = ''
    AWS_SECRET_ACCESS_KEY = ''
    AWS_BUCKET_NAME = ''
    DIRECTORY = ''
    EXCLUDE_LIST = []

    upload_count = 0
    skip_count = 0
    remove_bucket_count = 0

    option_list = BaseCommand.option_list + (
        optparse.make_option('-b', '--bucket',
            dest='bucket', default='',
            help="The name of the Amazon bucket you are uploading to."),
        optparse.make_option('-p', '--prefix',
            dest='prefix',
            default='',
            help="The prefix to prepend to the path on S3."),
        optparse.make_option('-d', '--dir',
            dest='dir', default=settings.MEDIA_ROOT,
            help="The root directory to use instead of your MEDIA_ROOT"),
        optparse.make_option('--gzip',
            action='store_true', dest='gzip', default=False,
            help="Enables gzipping CSS and Javascript files."),
        optparse.make_option('--expires',
            action='store_true', dest='expires', default=False,
            help="Enables setting a far future expires header."),
        optparse.make_option('--force',
            action='store_true', dest='force', default=False,
            help="Skip the file mtime check to force upload of all files."),
        optparse.make_option('--remove-missing',
            action='store_true', dest='remove_missing', default=False,
            help="Remove keys in the bucket for files locally missing."),
        optparse.make_option('--dry-run',
            action='store_true', dest='dry_run', default=False,
            help="Do a dry-run to show what files would be affected."),
        optparse.make_option('--exclude-list', dest='exclude_list',
            action='store', default='',
            help="Override default directory and file exclusion filters. "
                 "(enter as comma separated line)"),
        # TODO: implement
        optparse.make_option('--hash-chunk-size', dest='hash_chunk',
            action='store', default=4096,
            help="Override default directory and file exclusion filters. "
                 "(enter as comma separated line)"),
    )

    help = ('Syncs the complete MEDIA_ROOT structure and files to S3 into '
            'the given bucket name.')

    def handle(self, *args, **options):
        # Check for AWS keys in settings
        try:
            self.AWS_ACCESS_KEY_ID, self.AWS_SECRET_ACCESS_KEY, self.AWS_S3_HOST = get_aws_info()
        except ConfigMissingError:
            raise CommandError('Missing AWS keys from settings file. ' +
                ' Please supply both AWS_ACCESS_KEY_ID and ' +
                'AWS_SECRET_ACCESS_KEY.')

        self.AWS_ACCESS_KEY_ID = settings.AWS_ACCESS_KEY_ID
        self.AWS_SECRET_ACCESS_KEY = settings.AWS_SECRET_ACCESS_KEY
        self.AWS_BUCKET_NAME = options.get('bucket')

        if not self.AWS_BUCKET_NAME:
            raise CommandError('No bucket specified. Use --bucket=name')

        if not settings.MEDIA_ROOT:
            raise CommandError('MEDIA_ROOT must be set in your settings.')

        self.verbosity = int(options.get('verbosity'))
        # TODO: compare first hash chunk of files to see if they're identical
        self.hash_chunk = int(options.get('hash_chunk'))
        self.prefix = options.get('prefix')
        self.do_gzip = options.get('gzip')
        self.do_expires = options.get('expires')
        self.do_force = options.get('force')
        self.remove_missing = options.get('remove_missing')
        self.dry_run = options.get('dry_run')
        self.DIRECTORY = options.get('dir')
        exclude_list = options.get('exclude_list')
        if exclude_list and isinstance(exclude_list, list):
            # command line option overrides default exclude_list
            self.EXCLUDE_LIST = exclude_list
        elif exclude_list:
            self.EXCLUDE_LIST = exclude_list.split(',')

        # Now call the syncing method to walk the MEDIA_ROOT directory and
        # upload all files found.
        self.sync_s3()

        print ("%d files uploaded." % (self.upload_count))
        print ("%d files skipped." % (self.skip_count))
        if self.remove_missing:
            print ("%d keys removed from bucket." % (self.remove_bucket_count))
        if self.dry_run:
            print ('THIS IS A DRY RUN, NO ACTUAL CHANGES.')

    def sync_s3(self):
        """
        Walks the media directory and syncs files to S3
        """
        bucket, key = get_bucket_and_key(self.AWS_BUCKET_NAME)
        self.s3_files = {}
        self.files_processed = set()
        os.path.walk(self.DIRECTORY, self.upload_s3,
            (bucket, key, self.AWS_BUCKET_NAME, self.DIRECTORY))

        # Remove files on bucket if they're missing locally
        if self.remove_missing:
            self.remove_s3(bucket)

    def find_key_in_list(self, s3_list, file_key):
        if file_key in self.s3_files:
            return self.s3_files[file_key]
        for s3_key in s3_list:
            if s3_key.name == file_key:
                return s3_key
            if s3_key.name not in self.files_processed:
                self.s3_files[s3_key.name] = s3_key
        return None

    def finish_list(self, s3_list):
        for s3_key in s3_list:
            if s3_key.name not in self.files_processed:
                self.s3_files[s3_key.name] = s3_key

    def remove_s3(self, bucket):
        if not self.s3_files:
            if self.verbosity > 0:
                print ('No files to remove.')
            return

        for key, value in self.s3_files.items():
            if not self.dry_run:
                bucket.delete_key(value.name)
            self.remove_bucket_count += 1
            print ("Deleting %s..." % (key))

    def upload_s3(self, arg, dirname, names):
        """
        This is the callback to os.path.walk and where much of the work happens
        """
        bucket, key, bucket_name, root_dir = arg

        # Skip files and directories we don't want to sync
        for pattern in self.EXCLUDE_LIST:
            if fnmatch(os.path.basename(dirname), pattern):
                if self.verbosity > 1:
                    print ('Skipping: %s (rule: %s)' % (names, pattern))
                del names[:]
                return

        # Later we assume the MEDIA_ROOT ends with a trailing slash
        if not root_dir.endswith(os.path.sep):
            root_dir = root_dir + os.path.sep

        list_prefix = dirname[len(root_dir):]
        if self.prefix:
            list_prefix = '%s/%s' % (self.prefix, list_prefix)
        s3_list = bucket_lister(bucket, prefix=list_prefix)

        for name in names:
            bad_name = False
            for pattern in self.EXCLUDE_LIST:
                if fnmatch(name, pattern):
                    bad_name = True  # Skip files we don't want to sync
            if bad_name:
                if self.verbosity > 1:
                    print ('Skipping: %s (rule: %s)' % (names, pattern))
                continue

            filename = os.path.join(dirname, name)
            if os.path.isdir(filename):
                continue  # Don't try to upload directories

            file_key = filename[len(root_dir):]
            if self.prefix:
                file_key = '%s/%s' % (self.prefix, file_key)

            # Check if file on S3 is older than local file, if so, upload
            # TODO: check if hash chunk corresponds
            if not self.do_force:
                s3_key = self.find_key_in_list(s3_list, file_key)
                if s3_key:
                    s3_datetime = datetime.datetime(*time.strptime(
                        s3_key.last_modified, '%Y-%m-%dT%H:%M:%S.000Z')[0:6])
                    local_datetime = datetime.datetime.utcfromtimestamp(
                        os.stat(filename).st_mtime)
                    if local_datetime < s3_datetime:
                        self.skip_count += 1
                        if self.verbosity > 1:
                            print ("File %s hasn't been modified since last " \
                                "being uploaded" % (file_key))
                        if file_key in self.s3_files:
                            self.files_processed.add(file_key)
                            del self.s3_files[file_key]
                        continue
            if file_key in self.s3_files:
                self.files_processed.add(file_key)
                del self.s3_files[file_key]

            # File is newer, let's process and upload
            if self.verbosity > 0:
                print ("Uploading %s..." % file_key)
                if self.dry_run:
                    self.upload_count += 1
                    continue

            try:
                upload_file_to_s3(file_key, filename, key,
                    do_gzip=self.do_gzip, do_expires=self.do_expires,
                    verbosity=self.verbosity)
            except boto.exception.S3CreateError as e:
                # TODO: retry to create a few times
                print ("Failed to upload: %s" % e)
            except Exception as e:
                print (e)
                raise
            else:
                self.upload_count += 1

        # If we don't care about what's missing, wipe this to save memory.
        if not self.remove_missing:
            self.s3_files = {}
        else:
            self.finish_list(s3_list)