bucketstore.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. """
  2. bucketstore module
  3. """
  4. import io
  5. import os
  6. import os.path
  7. import boto3
  8. import botocore
  9. from typing import BinaryIO, Callable, List, Union
  10. AWS_DEFAULT_REGION = "us-east-1"
  11. __version__ = "VERSION"
  12. class S3Key:
  13. """An Amazon S3 Key"""
  14. def __init__(self, bucket: "S3Bucket", name: str) -> None:
  15. """constructor"""
  16. super().__init__()
  17. self.bucket = bucket
  18. self.name = name
  19. def __repr__(self) -> str:
  20. """str representation of an s3key"""
  21. return f"<S3Key name={self.name} bucket={self.bucket.name}>"
  22. def __len__(self) -> int:
  23. """returns the size of the s3 object of this key in bytes"""
  24. return self.size()
  25. @property
  26. def _boto_object(self): # type: ignore
  27. """the underlying boto3 s3 key object"""
  28. return self.bucket._boto_s3.Object(self.bucket.name, self.name)
  29. def get(self) -> str:
  30. """Gets the value of the key."""
  31. return self._boto_object.get()["Body"].read()
  32. def download(self, file: Union[str, BinaryIO], callback: Callable = None) -> None:
  33. """download the key to the given path or file object"""
  34. if self.name not in self.bucket:
  35. raise Exception("this key does not exist!")
  36. _download = self.bucket._boto_s3.meta.client.download_fileobj
  37. if isinstance(file, str):
  38. with open(file, "wb") as data:
  39. _download(self.bucket.name, self.name, data, Callback=callback)
  40. elif isinstance(file, io.IOBase):
  41. _download(self.bucket.name, self.name, file, Callback=callback)
  42. def upload(self, file: Union[str, BinaryIO], callback: Callable = None) -> None:
  43. """upload the file or file obj at the given path to this key"""
  44. _upload = self.bucket._boto_s3.meta.client.upload_fileobj
  45. if isinstance(file, str):
  46. if not os.path.isfile(file):
  47. raise Exception("file does not exist!")
  48. with open(file, "rb") as data:
  49. _upload(data, self.bucket.name, self.name, Callback=callback)
  50. elif isinstance(file, io.IOBase):
  51. _upload(file, self.bucket.name, self.name, Callback=callback)
  52. def size(self) -> int:
  53. """get the size of this object in s3"""
  54. total = 0
  55. for key in self.bucket._boto_bucket.objects.filter(Prefix=self.name):
  56. total += key.size
  57. return total
  58. def set(self, value: str, metadata: dict = None, content_type: str = "") -> dict:
  59. """Sets the key to the given value."""
  60. if not metadata:
  61. metadata = {}
  62. return self._boto_object.put(
  63. Body=value, Metadata=metadata, ContentType=content_type
  64. )
  65. def rename(self, new_name: str) -> None:
  66. """renames the key to a given new name"""
  67. # copy the item to avoid pulling and pushing
  68. self.bucket._boto_s3.Object(self.bucket.name, new_name).copy_from(
  69. CopySource=f"{self.bucket.name}/{self.name}"
  70. )
  71. # Delete the current key.
  72. self.delete()
  73. # Set the new name.
  74. self.name = new_name
  75. def delete(
  76. self,
  77. ) -> dict:
  78. """Deletes the key."""
  79. return self._boto_object.delete()
  80. @property
  81. def is_public(self) -> bool:
  82. """returns True if the public-read ACL is set for the Key."""
  83. for grant in self._boto_object.Acl().grants:
  84. if "AllUsers" in grant["Grantee"].get("URI", ""):
  85. if grant["Permission"] == "READ":
  86. return True
  87. return False
  88. def make_public(self) -> dict:
  89. """sets the 'public-read' ACL for the key."""
  90. if not self.is_public:
  91. return self._boto_object.Acl().put(ACL="public-read")
  92. return {}
  93. @property
  94. def meta(self) -> dict:
  95. """returns the metadata for the key."""
  96. return self._boto_object.get()["Metadata"]
  97. @meta.setter
  98. def meta(self, value: dict) -> None:
  99. """sets the metadata for the key."""
  100. self.set(self.get(), value)
  101. @property
  102. def url(self) -> str:
  103. """returns the public URL for the given key."""
  104. if self.is_public:
  105. endpoint = self.bucket._boto_s3.meta.client.meta.endpoint_url
  106. return f"{endpoint}/{self.bucket.name}/{self.name}"
  107. raise ValueError(
  108. f"{self.name} does not have the public-read ACL set. "
  109. "Use the make_public() method to allow for "
  110. "public URL sharing."
  111. )
  112. def temp_url(self, duration: int = 120) -> str:
  113. """returns a temporary URL for the given key."""
  114. return self.bucket._boto_s3.meta.client.generate_presigned_url(
  115. "get_object",
  116. Params={"Bucket": self.bucket.name, "Key": self.name},
  117. ExpiresIn=duration,
  118. )
  119. class S3Bucket:
  120. """An Amazon S3 Bucket."""
  121. def __init__(
  122. self,
  123. name: str,
  124. create: bool = False,
  125. region: str = "",
  126. endpoint_url: str = None,
  127. ) -> None:
  128. super().__init__()
  129. self.name = name
  130. self.region = region or os.getenv("AWS_DEFAULT_REGION", AWS_DEFAULT_REGION)
  131. env_endpoint_url = os.getenv("AWS_ENDPOINT_URL", "")
  132. self.endpoint_url = (
  133. endpoint_url or env_endpoint_url if env_endpoint_url else None
  134. )
  135. self._boto_s3 = boto3.resource(
  136. "s3", self.region, endpoint_url=self.endpoint_url
  137. )
  138. self._boto_bucket = self._boto_s3.Bucket(self.name)
  139. # Check if the bucket exists.
  140. if not self._boto_s3.Bucket(self.name) in self._boto_s3.buckets.all():
  141. if create:
  142. # Create the bucket.
  143. self._boto_s3.create_bucket(Bucket=self.name)
  144. else:
  145. raise ValueError(f"The bucket {self.name} doesn't exist!")
  146. def __getitem__(self, key: str) -> str:
  147. """allows for accessing keys with the array syntax"""
  148. return self.get(key)
  149. def __setitem__(self, key: str, value: str) -> dict:
  150. """allows for setting/uploading keys with the array syntax"""
  151. return self.set(key, value)
  152. def __delitem__(self, key: str) -> dict:
  153. """allow for deletion of keys via the del operator"""
  154. return self.delete(key)
  155. def __contains__(self, item: str) -> bool:
  156. """allows for use of the in keyword on the bucket object"""
  157. try:
  158. self._boto_s3.Object(self.name, item).load()
  159. return True
  160. except botocore.exceptions.ClientError as exception:
  161. if exception.response["Error"]["Code"] == "404":
  162. # The object does not exist.
  163. return False
  164. raise # pragma: no cover
  165. def list(self, prefix: str = None, legacy_api: bool = False) -> List:
  166. """returns a list of keys in the bucket."""
  167. if prefix:
  168. if legacy_api:
  169. paginator = self._boto_s3.meta.client.get_paginator("list_objects")
  170. else:
  171. paginator = self._boto_s3.meta.client.get_paginator("list_objects_v2")
  172. objects = []
  173. for page in paginator.paginate(Bucket=self.name, Prefix=prefix):
  174. for obj in page.get("Contents", []):
  175. objects.append(obj["Key"])
  176. return objects
  177. return [k.key for k in self._boto_bucket.objects.all()]
  178. @property
  179. def is_public(self) -> bool:
  180. """returns True if the public-read ACL is set for the bucket."""
  181. for grant in self._boto_bucket.Acl().grants:
  182. if "AllUsers" in grant["Grantee"].get("URI", ""):
  183. if grant["Permission"] == "READ":
  184. return True
  185. return False
  186. def make_public(self) -> dict:
  187. """Makes the bucket public-readable."""
  188. return self._boto_bucket.Acl().put(ACL="public-read")
  189. def key(self, key: str) -> S3Key:
  190. """returns a given key from the bucket."""
  191. return S3Key(self, key)
  192. def all(self) -> List[S3Key]:
  193. """returns all keys in the bucket."""
  194. return [self.key(k) for k in self.list()]
  195. def get(self, key: str) -> str:
  196. """get the contents of the given key"""
  197. selected_key = self.key(key)
  198. return selected_key.get()
  199. def set(
  200. self, key: str, value: str, metadata: dict = None, content_type: str = ""
  201. ) -> dict:
  202. """creates/edits a key in the s3 bucket"""
  203. if not metadata:
  204. metadata = {}
  205. new_key = self.key(key)
  206. return new_key.set(value, metadata, content_type)
  207. def delete(self, key: str = None) -> dict:
  208. """Deletes the given key, or the whole bucket."""
  209. # Delete the whole bucket.
  210. if key is None:
  211. # Delete everything in the bucket.
  212. for each_key in self.all():
  213. each_key.delete()
  214. # Delete the bucket.
  215. return self._boto_bucket.delete()
  216. # If a key was passed, delete they key.
  217. k = self.key(key)
  218. return k.delete()
  219. def __repr__(self) -> str:
  220. """representation of an s3bucket object"""
  221. return f"<S3Bucket name={self.name}>"
  222. def list() -> List[str]: # pylint: disable=redefined-builtin
  223. """lists buckets, by name."""
  224. s3_resource = boto3.resource("s3")
  225. return [bucket.name for bucket in s3_resource.buckets.all()]
  226. def get(bucket_name: str, create: bool = False) -> S3Bucket:
  227. """get an s3bucket object by name"""
  228. return S3Bucket(bucket_name, create=create)
  229. def login(
  230. access_key_id: str,
  231. secret_access_key: str,
  232. region: str = AWS_DEFAULT_REGION,
  233. endpoint_url: str = "",
  234. ) -> None:
  235. """sets environment variables for boto3."""
  236. os.environ["AWS_ACCESS_KEY_ID"] = access_key_id
  237. os.environ["AWS_SECRET_ACCESS_KEY"] = secret_access_key
  238. os.environ["AWS_DEFAULT_REGION"] = region
  239. os.environ["AWS_ENDPOINT_URL"] = endpoint_url