azure_upload.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. #!/usr/bin/env python
  2. from azure_storage.methods import client_prep, create_blob_client, create_parent_parser, setup_arguments
  3. from argparse import ArgumentParser, RawTextHelpFormatter
  4. import coloredlogs
  5. import logging
  6. import azure
  7. import sys
  8. import os
  9. class AzureUpload(object):
  10. def main(self):
  11. self.container_name, self.connect_str, self.blob_service_client, self.container_client = \
  12. client_prep(container_name=self.container_name,
  13. passphrase=self.passphrase,
  14. account_name=self.account_name)
  15. # Hide the INFO-level messages sent to the logger from Azure by increasing the logging level to WARNING
  16. logging.getLogger().setLevel(logging.WARNING)
  17. # Run the proper method depending on whether a file or a folder is requested
  18. if self.category == 'file':
  19. # If the container doesn't exist, run the container creation method, and re-run the upload
  20. self.upload_file(object_name=self.object_name,
  21. blob_service_client=self.blob_service_client,
  22. container_name=self.container_name,
  23. account_name=self.account_name,
  24. path=self.path,
  25. storage_tier=self.storage_tier)
  26. elif self.category == 'folder':
  27. self.upload_folder(object_name=self.object_name,
  28. blob_service_client=self.blob_service_client,
  29. container_name=self.container_name,
  30. account_name=self.account_name,
  31. path=self.path,
  32. storage_tier=self.storage_tier)
  33. @staticmethod
  34. def upload_file(object_name, blob_service_client, container_name, account_name, path, storage_tier):
  35. """
  36. Upload a single file to Azure storage
  37. :param object_name: type str: Name and path of file/folder to download from Azure storage
  38. :param blob_service_client: type: azure.storage.blob.BlobServiceClient
  39. :param container_name: type str: Name of the container of interest
  40. :param account_name: type str: Name of the Azure storage account
  41. :param path: type str: Path of folders in which the files are to be placed
  42. :param storage_tier: type str: Storage tier to use for the file
  43. """
  44. # Extract the name of the file from the provided name, as it may include the path
  45. file_name = os.path.basename(object_name)
  46. if path is not None:
  47. file_name = os.path.join(path, file_name)
  48. # Create a blob client for this file in the container in which it will be stored
  49. blob_client = create_blob_client(blob_service_client=blob_service_client,
  50. container_name=container_name,
  51. blob_file=file_name)
  52. # Attempt to upload the file to the specified container.
  53. try:
  54. # Read in the file data as binary
  55. with open(object_name, "rb") as data:
  56. # Upload the file data to the blob
  57. blob_client.upload_blob(data)
  58. # Set the storage tier
  59. blob_client.set_standard_blob_tier(standard_blob_tier=storage_tier)
  60. # If a file with that name already exists in that container, warn the user
  61. except azure.core.exceptions.ResourceExistsError:
  62. logging.warning(f'The file {file_name} already exists in container {container_name} in '
  63. f'storage account {account_name}')
  64. raise SystemExit
  65. # Despite the attempt to correct the container name, it may still be invalid
  66. except azure.core.exceptions.HttpResponseError as e:
  67. if 'ContainerNotFound' in str(e):
  68. logging.warning(f'Could not create container {container_name}')
  69. raise SystemExit
  70. except FileNotFoundError:
  71. logging.error(f'Could not find the specified file {object_name} to upload. Please ensure that the '
  72. f'supplied name and path are correct.')
  73. raise SystemExit
  74. @staticmethod
  75. def upload_folder(object_name, blob_service_client, container_name, account_name, path, storage_tier):
  76. """
  77. Upload all the files (and sub-folders as applicable) in the specified folder to Azure storage
  78. :param object_name: type str: Name and path of file/folder to download from Azure storage
  79. :param blob_service_client: type: azure.storage.blob.BlobServiceClient
  80. :param container_name: type str: Name of the container of interest
  81. :param account_name: type str: Name of the Azure storage account
  82. :param path: type str: Path of folders in which the files are to be placed
  83. :param storage_tier: type str: Storage tier to use for the folder
  84. """
  85. # Use os.walk to find all the files and folders in the supplied directory
  86. for root, dirs, files in os.walk(object_name):
  87. # Determine the relative path for the current sub-folder to the supplied root folder by creating a list
  88. # from the splitting of the root path using the OS-appropriate separator (os.sep) and slicing the list to
  89. # remove the first entry (the root) e.g. outputs/files/reports, where 'outputs/files' is the supplied
  90. # directory, would return 'files/reports'
  91. rel_path = os.path.join(os.sep.join(root.split(os.sep)[1:]))
  92. # Using the same logic as above, extract the root directory e.g. outputs/files/reports, where
  93. # 'outputs/files' is the supplied directory, would return 'outputs'
  94. # root_path = root.split(os.sep)[0]
  95. # Ensure that the root path starts with the appropriate separator when an absolute path is provided
  96. # if object_name.startswith(os.sep):
  97. # root_path = os.sep + root_path
  98. for file_name in files:
  99. # If the path is supplied, the folders of interest must be extract in order to keep the original
  100. # folder structure
  101. if path is not None:
  102. # Set the target folder as the relative path between the root and the supplied folder name
  103. # e.g. /home/users/files/folder/nested_folder and /home/users/files/folder would return
  104. # nested_folder, while identical root and folder would return a dot (.)
  105. target_folder = os.path.relpath(root, start=object_name.rstrip(os.sep))
  106. # If the target_folder is a dot, treat it as empty
  107. target_folder = target_folder if target_folder != '.' else ''
  108. # Create the target file in the container by joining the desired path, the target folder and the
  109. # name of the file
  110. target_file = os.path.join(path, target_folder, file_name)
  111. # Add the file name to the calculated relative path to set the name of the blob in Azure storage e.g.
  112. # files/reports/summary.tsv
  113. else:
  114. target_file = os.path.join(rel_path, file_name)
  115. # Create a blob client for this file using the supplied container name
  116. blob_client = create_blob_client(blob_service_client=blob_service_client,
  117. container_name=container_name,
  118. blob_file=target_file)
  119. # Set the local name and path of the file, so it can be opened
  120. local_file = os.path.join(root, file_name)
  121. # Attempt to upload the file to the specified container
  122. try:
  123. # Re-add the root path to find the file on the local system
  124. with open(os.path.join(local_file), "rb") as data:
  125. # Upload the file to Azure storage
  126. blob_client.upload_blob(data)
  127. # Set the storage tier
  128. blob_client.set_standard_blob_tier(standard_blob_tier=storage_tier)
  129. # Print a warning if a file with that name already exists in the specified container
  130. except azure.core.exceptions.ResourceExistsError:
  131. logging.warning(f'The file {local_file} already exists in container {container_name} '
  132. f'in storage account {account_name} as {target_file}')
  133. def __init__(self, object_name, container_name, account_name, passphrase, path, storage_tier, category):
  134. # Set the name of the file/folder to upload
  135. self.object_name = object_name
  136. if category == 'file':
  137. try:
  138. assert os.path.isfile(self.object_name)
  139. except AssertionError:
  140. logging.error(f'Cannot locate the specified file to upload: {self.object_name}')
  141. raise SystemExit
  142. elif category == 'folder':
  143. try:
  144. assert os.path.isdir(self.object_name)
  145. except AssertionError:
  146. logging.error(f'Cannot located the specified folder to upload: {self.object_name}')
  147. raise SystemExit
  148. else:
  149. logging.error(f'Something is wrong. There is no {category} option available')
  150. raise SystemExit
  151. # Initialise necessary class variables
  152. self.passphrase = passphrase
  153. self.account_name = account_name
  154. self.container_name = container_name
  155. self.path = path
  156. self.storage_tier = storage_tier
  157. self.category = category
  158. self.connect_str = str()
  159. self.blob_service_client = None
  160. self.container_client = None
  161. self.retry = False
  162. def file_upload(args):
  163. """
  164. Run the AzureUpload class for a file
  165. :param args: type ArgumentParser arguments
  166. """
  167. logging.info(f'Uploading {args.file} to container {args.container_name} in Azure storage account '
  168. f'{args.account_name}')
  169. # Create the file_upload object
  170. file_uploader = AzureUpload(object_name=args.file,
  171. account_name=args.account_name,
  172. container_name=args.container_name,
  173. passphrase=args.passphrase,
  174. path=args.reset_path,
  175. storage_tier=args.storage_tier,
  176. category='file')
  177. file_uploader.main()
  178. def folder_upload(args):
  179. """
  180. Run the AzureUpload class for a folder
  181. :param args: type ArgumentParser arguments
  182. """
  183. logging.info(f'Uploading folder (and its contents) {args.folder} to container {args.container_name} in Azure '
  184. f'storage account {args.account_name}')
  185. folder_uploader = AzureUpload(object_name=args.folder,
  186. account_name=args.account_name,
  187. container_name=args.container_name,
  188. passphrase=args.passphrase,
  189. path=args.reset_path,
  190. storage_tier=args.storage_tier,
  191. category='folder')
  192. folder_uploader.main()
  193. def cli():
  194. parser = ArgumentParser(description='Upload files or folders to Azure storage')
  195. # Create the parental parser, and the subparser
  196. subparsers, parent_parser = create_parent_parser(parser=parser)
  197. parent_parser.add_argument('-r', '--reset_path',
  198. type=str,
  199. help='Set the path of the file/folder within a folder in the target container '
  200. 'e.g. sequence_data/220202-m05722. If you want to place it directly in the '
  201. 'container without any nesting, use or \'\'')
  202. parent_parser.add_argument('-s', '--storage_tier',
  203. type=str,
  204. default='Hot',
  205. choices=['Hot', 'Cool', 'Archive'],
  206. metavar='STORAGE_TIER',
  207. help='Set the storage tier for the file/folder to be uploaded. Options are "Hot", '
  208. '"Cool", and "Archive". Default is Hot')
  209. # File upload subparser
  210. file_subparser = subparsers.add_parser(parents=[parent_parser],
  211. name='file',
  212. description='Upload a file to Azure storage',
  213. formatter_class=RawTextHelpFormatter,
  214. help='Upload a file to Azure storage')
  215. file_subparser.add_argument('-f', '--file',
  216. type=str,
  217. required=True,
  218. help='Name and path of the file to upload to Azure storage.'
  219. 'e.g. /mnt/sequences/220202_M05722/2022-SEQ-0001_S1_L001_R1_001.fastq.gz')
  220. file_subparser.set_defaults(func=file_upload)
  221. # Folder upload subparser
  222. folder_subparser = subparsers.add_parser(parents=[parent_parser],
  223. name='folder',
  224. description='Upload a folder to Azure storage',
  225. formatter_class=RawTextHelpFormatter,
  226. help='Upload a folder to Azure storage')
  227. folder_subparser.add_argument('-f', '--folder',
  228. type=str,
  229. required=True,
  230. help='Name and path of the folder to upload to Azure storage.'
  231. 'e.g. /mnt/sequences/220202_M05722/')
  232. folder_subparser.set_defaults(func=folder_upload)
  233. # Set up the arguments, and run the appropriate subparser
  234. arguments = setup_arguments(parser=parser)
  235. # Return to the requested logging level, as it has been increased to WARNING to suppress the log being filled with
  236. # information from azure.core.pipeline.policies.http_logging_policy
  237. coloredlogs.install(level=arguments.verbosity.upper())
  238. logging.info('Upload complete')
  239. # Prevent the arguments being printed to the console (they are returned in order for the tests to work)
  240. sys.stderr = open(os.devnull, 'w')
  241. return arguments
  242. if __name__ == '__main__':
  243. cli()