123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280 |
- """ Store packages in S3 """
- import logging
- import posixpath
- from datetime import timedelta
- from urllib.parse import quote, urlparse
- import boto3
- from botocore.config import Config
- from botocore.exceptions import ClientError
- from botocore.signers import CloudFrontSigner
- from cryptography.hazmat.backends import default_backend
- from cryptography.hazmat.primitives import hashes, serialization
- from cryptography.hazmat.primitives.asymmetric import padding
- from pyramid.settings import asbool, falsey
- from pyramid_duh.settings import asdict
- from pypicloud.dateutil import utcnow
- from pypicloud.models import Package
- from pypicloud.util import (
- EnvironSettings,
- PackageParseError,
- normalize_metadata,
- parse_filename,
- )
- from .object_store import ObjectStoreStorage
- LOG = logging.getLogger(__name__)
- class S3Storage(ObjectStoreStorage):
- """Storage backend that uses S3"""
- test = False
- def __init__(self, request=None, bucket=None, **kwargs):
- super(S3Storage, self).__init__(request=request, **kwargs)
- self.bucket = bucket
- @classmethod
- def _subclass_specific_config(cls, settings, common_config):
- sse = settings.get("storage.server_side_encryption")
- if sse not in [None, "AES256", "aws:kms"]:
- LOG.warning(
- "Unrecognized value %r for 'storage.sse'. See "
- "https://boto3.readthedocs.io/en/latest/reference/services/s3.html#S3.Object.put "
- "for more details",
- sse,
- )
- bucket_name = settings.get("storage.bucket")
- if bucket_name is None:
- raise ValueError("You must specify the 'storage.bucket'")
- return {"sse": sse, "bucket": cls.get_bucket(bucket_name, settings)}
- @classmethod
- def get_bucket(
- cls, bucket_name: str, settings: EnvironSettings
- ) -> "boto3.s3.Bucket":
- config_settings = settings.get_as_dict(
- "storage.",
- region_name=str,
- signature_version=str,
- user_agent=str,
- user_agent_extra=str,
- connect_timeout=int,
- read_timeout=int,
- parameter_validation=asbool,
- max_pool_connections=int,
- proxies=asdict,
- )
- config_settings["s3"] = settings.get_as_dict(
- "storage.",
- use_accelerate_endpoint=asbool,
- payload_signing_enabled=asbool,
- addressing_style=str,
- signature_version=str,
- )
- config = Config(**config_settings)
- def verify_value(val):
- """Verify can be a boolean (False) or a string"""
- s = str(val).strip().lower()
- if s in falsey:
- return False
- else:
- return str(val)
- s3conn = boto3.resource(
- "s3",
- config=config,
- **settings.get_as_dict(
- "storage.",
- region_name=str,
- api_version=str,
- use_ssl=asbool,
- verify=verify_value,
- endpoint_url=str,
- aws_access_key_id=str,
- aws_secret_access_key=str,
- aws_session_token=str,
- )
- )
- bucket = s3conn.Bucket(bucket_name)
- try:
- head = s3conn.meta.client.head_bucket(Bucket=bucket_name)
- except ClientError as e:
- if e.response["Error"]["Code"] == "404":
- LOG.info("Creating S3 bucket %s", bucket_name)
- if config.region_name:
- location = {"LocationConstraint": config.region_name}
- bucket.create(CreateBucketConfiguration=location)
- else:
- bucket.create()
- bucket.wait_until_exists()
- else:
- if e.response["Error"]["Code"] == "301":
- LOG.error(
- "Bucket found in different region. Check that "
- "the S3 bucket specified in 'storage.bucket' is "
- "in 'storage.region_name'"
- )
- raise
- return bucket
- @classmethod
- def package_from_object(cls, obj, factory):
- """Create a package from a S3 object"""
- filename = posixpath.basename(obj.key)
- name = obj.metadata.get("name")
- version = obj.metadata.get("version")
- metadata = Package.read_metadata(obj.metadata)
- # We used to not store metadata. This is for backwards
- # compatibility
- if name is None or version is None:
- try:
- name, version = parse_filename(filename)
- except PackageParseError:
- LOG.warning("S3 file %s has no package name", obj.key)
- return None
- return factory(
- name, version, filename, obj.last_modified, path=obj.key, **metadata
- )
- def list(self, factory=Package):
- keys = self.bucket.objects.filter(Prefix=self.bucket_prefix)
- for summary in keys:
- # ObjectSummary has no metadata, so we have to fetch it.
- obj = summary.Object()
- pkg = self.package_from_object(obj, factory)
- if pkg is not None:
- yield pkg
- def _generate_url(self, package):
- """Generate a signed url to the S3 file"""
- if self.public_url:
- if self.region_name:
- return "https://s3.{0}.amazonaws.com/{1}/{2}".format(
- self.region_name, self.bucket.name, self.get_path(package)
- )
- else:
- if "." in self.bucket.name:
- self._log_region_warning()
- return "https://{0}.s3.amazonaws.com/{1}".format(
- self.bucket.name, self.get_path(package)
- )
- url = self.bucket.meta.client.generate_presigned_url(
- "get_object",
- Params={"Bucket": self.bucket.name, "Key": self.get_path(package)},
- ExpiresIn=self.expire_after,
- )
- # There is a special case if your bucket has a '.' in the name. The
- # generated URL will return a 301 and the pip downloads will fail.
- # If you provide a region_name, boto should correctly generate a url in
- # the form of `s3.<region>.amazonaws.com`
- # See https://github.com/stevearc/pypicloud/issues/145
- if "." in self.bucket.name:
- pieces = urlparse(url)
- if pieces.netloc == "s3.amazonaws.com" and self.region_name is None:
- self._log_region_warning()
- return url
- def _log_region_warning(self):
- """Spit out a warning about including region_name"""
- LOG.warning(
- "Your signed S3 urls may not work! "
- "Try adding the bucket region to the config with "
- "'storage.region_name = <region>' or using a bucket "
- "without any dots ('.') in the name."
- )
- def upload(self, package, datastream):
- key = self.bucket.Object(self.get_path(package))
- kwargs = {}
- if self.sse is not None:
- kwargs["ServerSideEncryption"] = self.sse
- if self.object_acl:
- kwargs["ACL"] = self.object_acl
- if self.storage_class is not None:
- kwargs["StorageClass"] = self.storage_class
- metadata = package.get_metadata()
- metadata["name"] = package.name
- metadata["version"] = package.version
- metadata = normalize_metadata(metadata)
- key.put(Metadata=metadata, Body=datastream, **kwargs)
- def delete(self, package):
- self.bucket.delete_objects(
- Delete={"Objects": [{"Key": self.get_path(package)}]}
- )
- def check_health(self):
- try:
- self.bucket.meta.client.head_bucket(Bucket=self.bucket.name)
- except ClientError as e:
- return False, str(e)
- else:
- return True, ""
- class CloudFrontS3Storage(S3Storage):
- """Storage backend that uses S3 and CloudFront"""
- def __init__(
- self, request=None, domain=None, crypto_pk=None, key_id=None, **kwargs
- ):
- super(CloudFrontS3Storage, self).__init__(request, **kwargs)
- self.domain = domain
- self.crypto_pk = crypto_pk
- self.key_id = key_id
- self.cf_signer = None
- if key_id is not None:
- self.cf_signer = CloudFrontSigner(self.key_id, self._rsa_signer)
- self.client = boto3.client("cloudfront")
- @classmethod
- def configure(cls, settings):
- kwargs = super(CloudFrontS3Storage, cls).configure(settings)
- kwargs["domain"] = settings["storage.cloud_front_domain"]
- kwargs["key_id"] = settings.get("storage.cloud_front_key_id")
- private_key = settings.get("storage.cloud_front_key_string")
- if private_key is None:
- key_file = settings.get("storage.cloud_front_key_file")
- if key_file:
- with open(key_file, "rb") as ifile:
- private_key = ifile.read()
- else:
- private_key = private_key.encode("utf-8")
- crypto_pk = serialization.load_pem_private_key(
- private_key, password=None, backend=default_backend()
- )
- kwargs["crypto_pk"] = crypto_pk
- return kwargs
- def _rsa_signer(self, message):
- """Generate a RSA signature for a message"""
- return self.crypto_pk.sign(message, padding.PKCS1v15(), hashes.SHA1())
- def _generate_url(self, package):
- """Get the fully-qualified CloudFront path for a package"""
- path = self.get_path(package)
- url = self.domain + "/" + quote(path)
- # No key id, no signer, so we don't have to sign the URL
- if self.cf_signer is None:
- return url
- # To sign with a canned policy:
- expires = utcnow() + timedelta(seconds=self.expire_after)
- return self.cf_signer.generate_presigned_url(url, date_less_than=expires)
|