azure_download.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. #!/usr/bin/env python
  2. from azure_storage.methods import client_prep, create_blob_client, create_parent_parser, setup_arguments
  3. from argparse import ArgumentParser, RawTextHelpFormatter
  4. import coloredlogs
  5. import logging
  6. import azure
  7. import sys
  8. import os
  9. class AzureContainerDownload(object):
  10. def main(self):
  11. self.container_name, self.connect_str, self.blob_service_client, self.container_client = \
  12. client_prep(container_name=self.container_name,
  13. passphrase=self.passphrase,
  14. account_name=self.account_name,
  15. create=False)
  16. self.download_container(container_client=self.container_client,
  17. blob_service_client=self.blob_service_client,
  18. container_name=self.container_name,
  19. output_path=self.output_path)
  20. @staticmethod
  21. def download_container(container_client, blob_service_client, container_name, output_path):
  22. """
  23. Download the container from Azure storage
  24. :param container_client: type azure.storage.blob.BlobServiceClient.ContainerClient
  25. :param blob_service_client: type: azure.storage.blob.BlobServiceClient
  26. :param container_name: type str: Name of the container of interest
  27. :param output_path: type str: Name and path of the folder into which the container is to be downloaded
  28. """
  29. # Create a generator containing all the blobs in the container
  30. generator = container_client.list_blobs()
  31. try:
  32. # Hide the INFO-level messages sent to the logger from Azure by increasing the logging level to WARNING
  33. logging.getLogger().setLevel(logging.WARNING)
  34. for blob_file in generator:
  35. # Create the blob client
  36. blob_client = create_blob_client(blob_service_client=blob_service_client,
  37. container_name=container_name,
  38. blob_file=blob_file)
  39. # Extract the folder structure of the blob e.g. 220202-m05722/InterOp
  40. folder_structure = os.path.split(os.path.dirname(blob_file.name))
  41. # Determine the path to output the file. Join the supplied path, the name of the container and
  42. # the joined (splatted) folder structure. Logic: https://stackoverflow.com/a/14826889
  43. download_path = os.path.join(output_path, container_name, os.path.join(*folder_structure))
  44. # Create the path if required
  45. os.makedirs(download_path, exist_ok=True)
  46. # Set the name of file by removing any path information
  47. file_name = os.path.basename(blob_file.name)
  48. # Finally, set the name and the path of the output file
  49. download_file = os.path.join(download_path, file_name)
  50. # Open the target output file as binary
  51. with open(download_file, 'wb') as downloaded_file:
  52. # Write the data from the blob client to the local file
  53. downloaded_file.write(blob_client.download_blob().readall())
  54. except azure.core.exceptions.ResourceNotFoundError:
  55. logging.error(f' The specified container, {container_name}, does not exist.')
  56. raise SystemExit
  57. def __init__(self, container_name, output_path, account_name, passphrase):
  58. # Set the container name variable
  59. self.container_name = container_name
  60. # Output path
  61. if output_path.startswith('~'):
  62. self.output_path = os.path.abspath(os.path.expanduser(os.path.join(output_path)))
  63. else:
  64. self.output_path = os.path.abspath(os.path.join(output_path))
  65. # Create the output path
  66. try:
  67. os.makedirs(self.output_path, exist_ok=True)
  68. except PermissionError:
  69. logging.error(f'Could not use the supplied output path: {self.output_path}')
  70. raise SystemExit
  71. # Initialise necessary class variables
  72. self.passphrase = passphrase
  73. self.account_name = account_name
  74. self.connect_str = str()
  75. self.blob_service_client = None
  76. self.container_client = None
  77. class AzureDownload(object):
  78. def main(self):
  79. self.container_name, self.connect_str, self.blob_service_client, self.container_client = \
  80. client_prep(container_name=self.container_name,
  81. passphrase=self.passphrase,
  82. account_name=self.account_name,
  83. create=False)
  84. # Run the proper method depending on whether a file or a folder is requested
  85. if self.category == 'file':
  86. self.download_file(container_client=self.container_client,
  87. blob_service_client=self.blob_service_client,
  88. container_name=self.container_name,
  89. object_name=self.object_name,
  90. output_path=self.output_path)
  91. elif self.category == 'folder':
  92. self.download_folder(container_client=self.container_client,
  93. blob_service_client=self.blob_service_client,
  94. container_name=self.container_name,
  95. object_name=self.object_name,
  96. output_path=self.output_path)
  97. else:
  98. logging.error(f'Something is wrong. There is no {self.category} option available')
  99. raise SystemExit
  100. @staticmethod
  101. def download_file(container_client, blob_service_client, container_name, object_name, output_path):
  102. """
  103. Download the specified file from Azure storage
  104. :param container_client: type azure.storage.blob.BlobServiceClient.ContainerClient
  105. :param blob_service_client: type: azure.storage.blob.BlobServiceClient
  106. :param container_name: type str: Name of the container of interest
  107. :param object_name: type str: Name and path of file to download from Azure storage
  108. :param output_path: type str: Name and path of the folder into which the file is to be downloaded
  109. """
  110. # Create a generator containing all the blobs in the container
  111. generator = container_client.list_blobs()
  112. # Create a boolean to determine if the file has been located
  113. present = False
  114. # Hide the INFO-level messages sent to the logger from Azure by increasing the logging level to WARNING
  115. logging.getLogger().setLevel(logging.WARNING)
  116. try:
  117. for blob_file in generator:
  118. # Filter for the blob name
  119. if blob_file.name == object_name:
  120. # Update the file presence variable
  121. present = True
  122. # Create the blob client
  123. blob_client = create_blob_client(blob_service_client=blob_service_client,
  124. container_name=container_name,
  125. blob_file=blob_file)
  126. # Set the name of file by removing any path information
  127. file_name = os.path.basename(blob_file.name)
  128. # Finally, set the name and the path of the output file
  129. download_file = os.path.join(output_path, file_name)
  130. # Open the target output file as binary
  131. with open(download_file, 'wb') as downloaded_file:
  132. # Write the data from the blob client to the local file
  133. downloaded_file.write(blob_client.download_blob().readall())
  134. # Send an error to the user that the file could not be found
  135. if not present:
  136. logging.error(f'Could not locate the desired file {object_name} in {container_name}')
  137. raise SystemExit
  138. except azure.core.exceptions.ResourceNotFoundError:
  139. logging.error(f' The specified container, {container_name}, does not exist.')
  140. raise SystemExit
  141. @staticmethod
  142. def download_folder(container_client, blob_service_client, container_name, object_name, output_path):
  143. """
  144. Download the specified folder from Azure storage
  145. :param container_client: type azure.storage.blob.BlobServiceClient.ContainerClient
  146. :param blob_service_client: type: azure.storage.blob.BlobServiceClient
  147. :param container_name: type str: Name of the container of interest
  148. :param object_name: type str: Name and path of folder to download from Azure storage
  149. :param output_path: type str: Name and path of the folder into which the folder is to be downloaded
  150. """
  151. # Create a generator containing all the blobs in the container
  152. generator = container_client.list_blobs()
  153. # Boolean to track whether the folder was located
  154. present = False
  155. # Hide the INFO-level messages sent to the logger from Azure by increasing the logging level to WARNING
  156. logging.getLogger().setLevel(logging.WARNING)
  157. try:
  158. for blob_file in generator:
  159. # Create the path of the file by adding the container name to the path of the file
  160. blob_path = os.path.join(container_name, os.path.split(blob_file.name)[0])
  161. # Ensure that the supplied folder path is present in the blob path
  162. if os.path.normpath(object_name) in os.path.normpath(blob_path):
  163. # Update the folder presence boolean
  164. present = True
  165. # Create the blob client
  166. blob_client = create_blob_client(blob_service_client=blob_service_client,
  167. container_name=container_name,
  168. blob_file=blob_file)
  169. # Determine the path to output the file. Join the supplied path and the path of the blob
  170. download_path = os.path.join(output_path, os.path.join(os.path.dirname(blob_file.name)))
  171. # Create the path if required
  172. os.makedirs(download_path, exist_ok=True)
  173. # Set the name of file by removing any path information
  174. file_name = os.path.basename(blob_file.name)
  175. # Finally, set the name and the path of the output file
  176. download_file = os.path.join(download_path, file_name)
  177. # Open the target output file as binary
  178. with open(download_file, 'wb') as downloaded_file:
  179. # Write the data from the blob client to the local file
  180. downloaded_file.write(blob_client.download_blob().readall())
  181. # Send an error to the user that the folder could not be found
  182. if not present:
  183. logging.error(f'Could not locate the desired folder {object_name} in container {container_name}')
  184. raise SystemExit
  185. except azure.core.exceptions.ResourceNotFoundError:
  186. logging.error(f' The specified container, {container_name}, does not exist.')
  187. raise SystemExit
  188. def __init__(self, object_name, container_name, output_path, account_name, passphrase, category):
  189. # Set the name of the file/folder to download
  190. self.object_name = object_name
  191. # Set the container name variable
  192. self.container_name = container_name
  193. # Output path
  194. if output_path.startswith('~'):
  195. self.output_path = os.path.abspath(os.path.expanduser(os.path.join(output_path)))
  196. else:
  197. self.output_path = os.path.abspath(os.path.join(output_path))
  198. # Create the output path
  199. try:
  200. os.makedirs(self.output_path, exist_ok=True)
  201. except PermissionError:
  202. logging.error(f'Could not use the supplied output path: {self.output_path}')
  203. raise SystemExit
  204. # Initialise necessary class variables
  205. self.passphrase = passphrase
  206. self.account_name = account_name
  207. self.category = category
  208. self.connect_str = str()
  209. self.blob_service_client = None
  210. self.container_client = None
  211. def container_download(args):
  212. """
  213. Run the AzureContainerDownload method
  214. :param args: type ArgumentParser arguments
  215. """
  216. logging.info(f'Downloading Azure container {args.container_name}')
  217. # Create the container download object
  218. container_downloader = AzureContainerDownload(container_name=args.container_name,
  219. output_path=args.output_path,
  220. account_name=args.account_name,
  221. passphrase=args.passphrase)
  222. container_downloader.main()
  223. def file_download(args):
  224. """
  225. Run the AzureDownload class for a file
  226. :param args: type ArgumentParser arguments
  227. """
  228. logging.info(f'Downloading {args.file} from Azure storage')
  229. # Create the file download object
  230. file_downloader = AzureDownload(object_name=args.file,
  231. container_name=args.container_name,
  232. output_path=args.output_path,
  233. account_name=args.account_name,
  234. passphrase=args.passphrase,
  235. category='file')
  236. file_downloader.main()
  237. def folder_download(args):
  238. """
  239. Run the AzureDownload class for a folder
  240. :param args: type ArgumentParser arguments
  241. """
  242. logging.info(f'Downloading contents of folder {args.folder} from Azure storage')
  243. folder_downloader = AzureDownload(object_name=args.folder,
  244. container_name=args.container_name,
  245. output_path=args.output_path,
  246. account_name=args.account_name,
  247. passphrase=args.passphrase,
  248. category='folder')
  249. folder_downloader.main()
  250. def cli():
  251. parser = ArgumentParser(description='Download containers/files/folders from Azure storage')
  252. # Create the parental parser, and the subparser
  253. subparsers, parent_parser = create_parent_parser(parser=parser)
  254. parent_parser.add_argument('-o', '--output_path',
  255. default=os.getcwd(),
  256. help='Name and path of directory in which the outputs are to be saved. Default is '
  257. 'your $CWD')
  258. # Container downloading parser
  259. container_subparser = subparsers.add_parser(parents=[parent_parser],
  260. name='container',
  261. description='Download a container from Azure storage',
  262. formatter_class=RawTextHelpFormatter,
  263. help='Download a container from Azure storage')
  264. container_subparser.set_defaults(func=container_download)
  265. # Blob (file) downloading subparser
  266. file_subparser = subparsers.add_parser(parents=[parent_parser],
  267. name='file',
  268. description='Download a file from Azure storage',
  269. formatter_class=RawTextHelpFormatter,
  270. help='Download a file from Azure storage')
  271. file_subparser.add_argument('-f', '--file',
  272. type=str,
  273. required=True,
  274. help='Name of file to download from Azure storage.'
  275. 'e.g. 2022-SEQ-0001_S1_L001_R1_001.fastq.gz')
  276. file_subparser.set_defaults(func=file_download)
  277. # Folder downloading subparser
  278. folder_subparser = subparsers.add_parser(parents=[parent_parser],
  279. name='folder',
  280. description='Download a folder from Azure storage',
  281. formatter_class=RawTextHelpFormatter,
  282. help='Download a folder from Azure storage')
  283. folder_subparser.add_argument('-f', '--folder',
  284. type=str,
  285. required=True,
  286. help='Name of the folder to download from Azure storage e.g. InterOp')
  287. folder_subparser.set_defaults(func=folder_download)
  288. # Set up the arguments, and run the appropriate subparser
  289. arguments = setup_arguments(parser=parser)
  290. # Return to the requested logging level, as it has been increased to WARNING to suppress the log being filled with
  291. # information from azure.core.pipeline.policies.http_logging_policy
  292. coloredlogs.install(level=arguments.verbosity.upper())
  293. logging.info('Download complete')
  294. # Prevent the arguments being printed to the console (they are returned in order for the tests to work)
  295. sys.stderr = open(os.devnull, 'w')
  296. return arguments
  297. if __name__ == '__main__':
  298. cli()