s3_1.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. """ Store packages in S3 """
  2. import logging
  3. import posixpath
  4. from datetime import timedelta
  5. from urllib.parse import quote, urlparse
  6. import boto3
  7. from botocore.config import Config
  8. from botocore.exceptions import ClientError
  9. from botocore.signers import CloudFrontSigner
  10. from cryptography.hazmat.backends import default_backend
  11. from cryptography.hazmat.primitives import hashes, serialization
  12. from cryptography.hazmat.primitives.asymmetric import padding
  13. from pyramid.settings import asbool, falsey
  14. from pyramid_duh.settings import asdict
  15. from pypicloud.dateutil import utcnow
  16. from pypicloud.models import Package
  17. from pypicloud.util import (
  18. EnvironSettings,
  19. PackageParseError,
  20. normalize_metadata,
  21. parse_filename,
  22. )
  23. from .object_store import ObjectStoreStorage
  24. LOG = logging.getLogger(__name__)
  25. class S3Storage(ObjectStoreStorage):
  26. """Storage backend that uses S3"""
  27. test = False
  28. def __init__(self, request=None, bucket=None, **kwargs):
  29. super(S3Storage, self).__init__(request=request, **kwargs)
  30. self.bucket = bucket
  31. @classmethod
  32. def _subclass_specific_config(cls, settings, common_config):
  33. sse = settings.get("storage.server_side_encryption")
  34. if sse not in [None, "AES256", "aws:kms"]:
  35. LOG.warning(
  36. "Unrecognized value %r for 'storage.sse'. See "
  37. "https://boto3.readthedocs.io/en/latest/reference/services/s3.html#S3.Object.put "
  38. "for more details",
  39. sse,
  40. )
  41. bucket_name = settings.get("storage.bucket")
  42. if bucket_name is None:
  43. raise ValueError("You must specify the 'storage.bucket'")
  44. return {"sse": sse, "bucket": cls.get_bucket(bucket_name, settings)}
  45. @classmethod
  46. def get_bucket(
  47. cls, bucket_name: str, settings: EnvironSettings
  48. ) -> "boto3.s3.Bucket":
  49. config_settings = settings.get_as_dict(
  50. "storage.",
  51. region_name=str,
  52. signature_version=str,
  53. user_agent=str,
  54. user_agent_extra=str,
  55. connect_timeout=int,
  56. read_timeout=int,
  57. parameter_validation=asbool,
  58. max_pool_connections=int,
  59. proxies=asdict,
  60. )
  61. config_settings["s3"] = settings.get_as_dict(
  62. "storage.",
  63. use_accelerate_endpoint=asbool,
  64. payload_signing_enabled=asbool,
  65. addressing_style=str,
  66. signature_version=str,
  67. )
  68. config = Config(**config_settings)
  69. def verify_value(val):
  70. """Verify can be a boolean (False) or a string"""
  71. s = str(val).strip().lower()
  72. if s in falsey:
  73. return False
  74. else:
  75. return str(val)
  76. s3conn = boto3.resource(
  77. "s3",
  78. config=config,
  79. **settings.get_as_dict(
  80. "storage.",
  81. region_name=str,
  82. api_version=str,
  83. use_ssl=asbool,
  84. verify=verify_value,
  85. endpoint_url=str,
  86. aws_access_key_id=str,
  87. aws_secret_access_key=str,
  88. aws_session_token=str,
  89. )
  90. )
  91. bucket = s3conn.Bucket(bucket_name)
  92. try:
  93. head = s3conn.meta.client.head_bucket(Bucket=bucket_name)
  94. except ClientError as e:
  95. if e.response["Error"]["Code"] == "404":
  96. LOG.info("Creating S3 bucket %s", bucket_name)
  97. if config.region_name:
  98. location = {"LocationConstraint": config.region_name}
  99. bucket.create(CreateBucketConfiguration=location)
  100. else:
  101. bucket.create()
  102. bucket.wait_until_exists()
  103. else:
  104. if e.response["Error"]["Code"] == "301":
  105. LOG.error(
  106. "Bucket found in different region. Check that "
  107. "the S3 bucket specified in 'storage.bucket' is "
  108. "in 'storage.region_name'"
  109. )
  110. raise
  111. return bucket
  112. @classmethod
  113. def package_from_object(cls, obj, factory):
  114. """Create a package from a S3 object"""
  115. filename = posixpath.basename(obj.key)
  116. name = obj.metadata.get("name")
  117. version = obj.metadata.get("version")
  118. metadata = Package.read_metadata(obj.metadata)
  119. # We used to not store metadata. This is for backwards
  120. # compatibility
  121. if name is None or version is None:
  122. try:
  123. name, version = parse_filename(filename)
  124. except PackageParseError:
  125. LOG.warning("S3 file %s has no package name", obj.key)
  126. return None
  127. return factory(
  128. name, version, filename, obj.last_modified, path=obj.key, **metadata
  129. )
  130. def list(self, factory=Package):
  131. keys = self.bucket.objects.filter(Prefix=self.bucket_prefix)
  132. for summary in keys:
  133. # ObjectSummary has no metadata, so we have to fetch it.
  134. obj = summary.Object()
  135. pkg = self.package_from_object(obj, factory)
  136. if pkg is not None:
  137. yield pkg
  138. def _generate_url(self, package):
  139. """Generate a signed url to the S3 file"""
  140. if self.public_url:
  141. if self.region_name:
  142. return "https://s3.{0}.amazonaws.com/{1}/{2}".format(
  143. self.region_name, self.bucket.name, self.get_path(package)
  144. )
  145. else:
  146. if "." in self.bucket.name:
  147. self._log_region_warning()
  148. return "https://{0}.s3.amazonaws.com/{1}".format(
  149. self.bucket.name, self.get_path(package)
  150. )
  151. url = self.bucket.meta.client.generate_presigned_url(
  152. "get_object",
  153. Params={"Bucket": self.bucket.name, "Key": self.get_path(package)},
  154. ExpiresIn=self.expire_after,
  155. )
  156. # There is a special case if your bucket has a '.' in the name. The
  157. # generated URL will return a 301 and the pip downloads will fail.
  158. # If you provide a region_name, boto should correctly generate a url in
  159. # the form of `s3.<region>.amazonaws.com`
  160. # See https://github.com/stevearc/pypicloud/issues/145
  161. if "." in self.bucket.name:
  162. pieces = urlparse(url)
  163. if pieces.netloc == "s3.amazonaws.com" and self.region_name is None:
  164. self._log_region_warning()
  165. return url
  166. def _log_region_warning(self):
  167. """Spit out a warning about including region_name"""
  168. LOG.warning(
  169. "Your signed S3 urls may not work! "
  170. "Try adding the bucket region to the config with "
  171. "'storage.region_name = <region>' or using a bucket "
  172. "without any dots ('.') in the name."
  173. )
  174. def upload(self, package, datastream):
  175. key = self.bucket.Object(self.get_path(package))
  176. kwargs = {}
  177. if self.sse is not None:
  178. kwargs["ServerSideEncryption"] = self.sse
  179. if self.object_acl:
  180. kwargs["ACL"] = self.object_acl
  181. if self.storage_class is not None:
  182. kwargs["StorageClass"] = self.storage_class
  183. metadata = package.get_metadata()
  184. metadata["name"] = package.name
  185. metadata["version"] = package.version
  186. metadata = normalize_metadata(metadata)
  187. key.put(Metadata=metadata, Body=datastream, **kwargs)
  188. def delete(self, package):
  189. self.bucket.delete_objects(
  190. Delete={"Objects": [{"Key": self.get_path(package)}]}
  191. )
  192. def check_health(self):
  193. try:
  194. self.bucket.meta.client.head_bucket(Bucket=self.bucket.name)
  195. except ClientError as e:
  196. return False, str(e)
  197. else:
  198. return True, ""
  199. class CloudFrontS3Storage(S3Storage):
  200. """Storage backend that uses S3 and CloudFront"""
  201. def __init__(
  202. self, request=None, domain=None, crypto_pk=None, key_id=None, **kwargs
  203. ):
  204. super(CloudFrontS3Storage, self).__init__(request, **kwargs)
  205. self.domain = domain
  206. self.crypto_pk = crypto_pk
  207. self.key_id = key_id
  208. self.cf_signer = None
  209. if key_id is not None:
  210. self.cf_signer = CloudFrontSigner(self.key_id, self._rsa_signer)
  211. self.client = boto3.client("cloudfront")
  212. @classmethod
  213. def configure(cls, settings):
  214. kwargs = super(CloudFrontS3Storage, cls).configure(settings)
  215. kwargs["domain"] = settings["storage.cloud_front_domain"]
  216. kwargs["key_id"] = settings.get("storage.cloud_front_key_id")
  217. private_key = settings.get("storage.cloud_front_key_string")
  218. if private_key is None:
  219. key_file = settings.get("storage.cloud_front_key_file")
  220. if key_file:
  221. with open(key_file, "rb") as ifile:
  222. private_key = ifile.read()
  223. else:
  224. private_key = private_key.encode("utf-8")
  225. crypto_pk = serialization.load_pem_private_key(
  226. private_key, password=None, backend=default_backend()
  227. )
  228. kwargs["crypto_pk"] = crypto_pk
  229. return kwargs
  230. def _rsa_signer(self, message):
  231. """Generate a RSA signature for a message"""
  232. return self.crypto_pk.sign(message, padding.PKCS1v15(), hashes.SHA1())
  233. def _generate_url(self, package):
  234. """Get the fully-qualified CloudFront path for a package"""
  235. path = self.get_path(package)
  236. url = self.domain + "/" + quote(path)
  237. # No key id, no signer, so we don't have to sign the URL
  238. if self.cf_signer is None:
  239. return url
  240. # To sign with a canned policy:
  241. expires = utcnow() + timedelta(seconds=self.expire_after)
  242. return self.cf_signer.generate_presigned_url(url, date_less_than=expires)