download.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790
  1. # Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"). You
  4. # may not use this file except in compliance with the License. A copy of
  5. # the License is located at
  6. #
  7. # http://aws.amazon.com/apache2.0/
  8. #
  9. # or in the "license" file accompanying this file. This file is
  10. # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
  11. # ANY KIND, either express or implied. See the License for the specific
  12. # language governing permissions and limitations under the License.
  13. import heapq
  14. import logging
  15. import threading
  16. from s3transfer.compat import seekable
  17. from s3transfer.exceptions import RetriesExceededError
  18. from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG
  19. from s3transfer.tasks import SubmissionTask, Task
  20. from s3transfer.utils import (
  21. S3_RETRYABLE_DOWNLOAD_ERRORS,
  22. CountCallbackInvoker,
  23. DeferredOpenFile,
  24. FunctionContainer,
  25. StreamReaderProgress,
  26. calculate_num_parts,
  27. calculate_range_parameter,
  28. get_callbacks,
  29. invoke_progress_callbacks,
  30. )
  31. logger = logging.getLogger(__name__)
  32. class DownloadOutputManager:
  33. """Base manager class for handling various types of files for downloads
  34. This class is typically used for the DownloadSubmissionTask class to help
  35. determine the following:
  36. * Provides the fileobj to write to downloads to
  37. * Get a task to complete once everything downloaded has been written
  38. The answers/implementations differ for the various types of file outputs
  39. that may be accepted. All implementations must subclass and override
  40. public methods from this class.
  41. """
  42. def __init__(self, osutil, transfer_coordinator, io_executor):
  43. self._osutil = osutil
  44. self._transfer_coordinator = transfer_coordinator
  45. self._io_executor = io_executor
  46. @classmethod
  47. def is_compatible(cls, download_target, osutil):
  48. """Determines if the target for the download is compatible with manager
  49. :param download_target: The target for which the upload will write
  50. data to.
  51. :param osutil: The os utility to be used for the transfer
  52. :returns: True if the manager can handle the type of target specified
  53. otherwise returns False.
  54. """
  55. raise NotImplementedError('must implement is_compatible()')
  56. def get_download_task_tag(self):
  57. """Get the tag (if any) to associate all GetObjectTasks
  58. :rtype: s3transfer.futures.TaskTag
  59. :returns: The tag to associate all GetObjectTasks with
  60. """
  61. return None
  62. def get_fileobj_for_io_writes(self, transfer_future):
  63. """Get file-like object to use for io writes in the io executor
  64. :type transfer_future: s3transfer.futures.TransferFuture
  65. :param transfer_future: The future associated with upload request
  66. returns: A file-like object to write to
  67. """
  68. raise NotImplementedError('must implement get_fileobj_for_io_writes()')
  69. def queue_file_io_task(self, fileobj, data, offset):
  70. """Queue IO write for submission to the IO executor.
  71. This method accepts an IO executor and information about the
  72. downloaded data, and handles submitting this to the IO executor.
  73. This method may defer submission to the IO executor if necessary.
  74. """
  75. self._transfer_coordinator.submit(
  76. self._io_executor, self.get_io_write_task(fileobj, data, offset)
  77. )
  78. def get_io_write_task(self, fileobj, data, offset):
  79. """Get an IO write task for the requested set of data
  80. This task can be ran immediately or be submitted to the IO executor
  81. for it to run.
  82. :type fileobj: file-like object
  83. :param fileobj: The file-like object to write to
  84. :type data: bytes
  85. :param data: The data to write out
  86. :type offset: integer
  87. :param offset: The offset to write the data to in the file-like object
  88. :returns: An IO task to be used to write data to a file-like object
  89. """
  90. return IOWriteTask(
  91. self._transfer_coordinator,
  92. main_kwargs={
  93. 'fileobj': fileobj,
  94. 'data': data,
  95. 'offset': offset,
  96. },
  97. )
  98. def get_final_io_task(self):
  99. """Get the final io task to complete the download
  100. This is needed because based on the architecture of the TransferManager
  101. the final tasks will be sent to the IO executor, but the executor
  102. needs a final task for it to signal that the transfer is done and
  103. all done callbacks can be run.
  104. :rtype: s3transfer.tasks.Task
  105. :returns: A final task to completed in the io executor
  106. """
  107. raise NotImplementedError('must implement get_final_io_task()')
  108. def _get_fileobj_from_filename(self, filename):
  109. f = DeferredOpenFile(
  110. filename, mode='wb', open_function=self._osutil.open
  111. )
  112. # Make sure the file gets closed and we remove the temporary file
  113. # if anything goes wrong during the process.
  114. self._transfer_coordinator.add_failure_cleanup(f.close)
  115. return f
  116. class DownloadFilenameOutputManager(DownloadOutputManager):
  117. def __init__(self, osutil, transfer_coordinator, io_executor):
  118. super().__init__(osutil, transfer_coordinator, io_executor)
  119. self._final_filename = None
  120. self._temp_filename = None
  121. self._temp_fileobj = None
  122. @classmethod
  123. def is_compatible(cls, download_target, osutil):
  124. return isinstance(download_target, str)
  125. def get_fileobj_for_io_writes(self, transfer_future):
  126. fileobj = transfer_future.meta.call_args.fileobj
  127. self._final_filename = fileobj
  128. self._temp_filename = self._osutil.get_temp_filename(fileobj)
  129. self._temp_fileobj = self._get_temp_fileobj()
  130. return self._temp_fileobj
  131. def get_final_io_task(self):
  132. # A task to rename the file from the temporary file to its final
  133. # location is needed. This should be the last task needed to complete
  134. # the download.
  135. return IORenameFileTask(
  136. transfer_coordinator=self._transfer_coordinator,
  137. main_kwargs={
  138. 'fileobj': self._temp_fileobj,
  139. 'final_filename': self._final_filename,
  140. 'osutil': self._osutil,
  141. },
  142. is_final=True,
  143. )
  144. def _get_temp_fileobj(self):
  145. f = self._get_fileobj_from_filename(self._temp_filename)
  146. self._transfer_coordinator.add_failure_cleanup(
  147. self._osutil.remove_file, self._temp_filename
  148. )
  149. return f
  150. class DownloadSeekableOutputManager(DownloadOutputManager):
  151. @classmethod
  152. def is_compatible(cls, download_target, osutil):
  153. return seekable(download_target)
  154. def get_fileobj_for_io_writes(self, transfer_future):
  155. # Return the fileobj provided to the future.
  156. return transfer_future.meta.call_args.fileobj
  157. def get_final_io_task(self):
  158. # This task will serve the purpose of signaling when all of the io
  159. # writes have finished so done callbacks can be called.
  160. return CompleteDownloadNOOPTask(
  161. transfer_coordinator=self._transfer_coordinator
  162. )
  163. class DownloadNonSeekableOutputManager(DownloadOutputManager):
  164. def __init__(
  165. self, osutil, transfer_coordinator, io_executor, defer_queue=None
  166. ):
  167. super().__init__(osutil, transfer_coordinator, io_executor)
  168. if defer_queue is None:
  169. defer_queue = DeferQueue()
  170. self._defer_queue = defer_queue
  171. self._io_submit_lock = threading.Lock()
  172. @classmethod
  173. def is_compatible(cls, download_target, osutil):
  174. return hasattr(download_target, 'write')
  175. def get_download_task_tag(self):
  176. return IN_MEMORY_DOWNLOAD_TAG
  177. def get_fileobj_for_io_writes(self, transfer_future):
  178. return transfer_future.meta.call_args.fileobj
  179. def get_final_io_task(self):
  180. return CompleteDownloadNOOPTask(
  181. transfer_coordinator=self._transfer_coordinator
  182. )
  183. def queue_file_io_task(self, fileobj, data, offset):
  184. with self._io_submit_lock:
  185. writes = self._defer_queue.request_writes(offset, data)
  186. for write in writes:
  187. data = write['data']
  188. logger.debug(
  189. "Queueing IO offset %s for fileobj: %s",
  190. write['offset'],
  191. fileobj,
  192. )
  193. super().queue_file_io_task(fileobj, data, offset)
  194. def get_io_write_task(self, fileobj, data, offset):
  195. return IOStreamingWriteTask(
  196. self._transfer_coordinator,
  197. main_kwargs={
  198. 'fileobj': fileobj,
  199. 'data': data,
  200. },
  201. )
  202. class DownloadSpecialFilenameOutputManager(DownloadNonSeekableOutputManager):
  203. def __init__(
  204. self, osutil, transfer_coordinator, io_executor, defer_queue=None
  205. ):
  206. super().__init__(
  207. osutil, transfer_coordinator, io_executor, defer_queue
  208. )
  209. self._fileobj = None
  210. @classmethod
  211. def is_compatible(cls, download_target, osutil):
  212. return isinstance(download_target, str) and osutil.is_special_file(
  213. download_target
  214. )
  215. def get_fileobj_for_io_writes(self, transfer_future):
  216. filename = transfer_future.meta.call_args.fileobj
  217. self._fileobj = self._get_fileobj_from_filename(filename)
  218. return self._fileobj
  219. def get_final_io_task(self):
  220. # Make sure the file gets closed once the transfer is done.
  221. return IOCloseTask(
  222. transfer_coordinator=self._transfer_coordinator,
  223. is_final=True,
  224. main_kwargs={'fileobj': self._fileobj},
  225. )
  226. class DownloadSubmissionTask(SubmissionTask):
  227. """Task for submitting tasks to execute a download"""
  228. def _get_download_output_manager_cls(self, transfer_future, osutil):
  229. """Retrieves a class for managing output for a download
  230. :type transfer_future: s3transfer.futures.TransferFuture
  231. :param transfer_future: The transfer future for the request
  232. :type osutil: s3transfer.utils.OSUtils
  233. :param osutil: The os utility associated to the transfer
  234. :rtype: class of DownloadOutputManager
  235. :returns: The appropriate class to use for managing a specific type of
  236. input for downloads.
  237. """
  238. download_manager_resolver_chain = [
  239. DownloadSpecialFilenameOutputManager,
  240. DownloadFilenameOutputManager,
  241. DownloadSeekableOutputManager,
  242. DownloadNonSeekableOutputManager,
  243. ]
  244. fileobj = transfer_future.meta.call_args.fileobj
  245. for download_manager_cls in download_manager_resolver_chain:
  246. if download_manager_cls.is_compatible(fileobj, osutil):
  247. return download_manager_cls
  248. raise RuntimeError(
  249. 'Output {} of type: {} is not supported.'.format(
  250. fileobj, type(fileobj)
  251. )
  252. )
  253. def _submit(
  254. self,
  255. client,
  256. config,
  257. osutil,
  258. request_executor,
  259. io_executor,
  260. transfer_future,
  261. bandwidth_limiter=None,
  262. ):
  263. """
  264. :param client: The client associated with the transfer manager
  265. :type config: s3transfer.manager.TransferConfig
  266. :param config: The transfer config associated with the transfer
  267. manager
  268. :type osutil: s3transfer.utils.OSUtil
  269. :param osutil: The os utility associated to the transfer manager
  270. :type request_executor: s3transfer.futures.BoundedExecutor
  271. :param request_executor: The request executor associated with the
  272. transfer manager
  273. :type io_executor: s3transfer.futures.BoundedExecutor
  274. :param io_executor: The io executor associated with the
  275. transfer manager
  276. :type transfer_future: s3transfer.futures.TransferFuture
  277. :param transfer_future: The transfer future associated with the
  278. transfer request that tasks are being submitted for
  279. :type bandwidth_limiter: s3transfer.bandwidth.BandwidthLimiter
  280. :param bandwidth_limiter: The bandwidth limiter to use when
  281. downloading streams
  282. """
  283. if transfer_future.meta.size is None:
  284. # If a size was not provided figure out the size for the
  285. # user.
  286. response = client.head_object(
  287. Bucket=transfer_future.meta.call_args.bucket,
  288. Key=transfer_future.meta.call_args.key,
  289. **transfer_future.meta.call_args.extra_args,
  290. )
  291. transfer_future.meta.provide_transfer_size(
  292. response['ContentLength']
  293. )
  294. download_output_manager = self._get_download_output_manager_cls(
  295. transfer_future, osutil
  296. )(osutil, self._transfer_coordinator, io_executor)
  297. # If it is greater than threshold do a ranged download, otherwise
  298. # do a regular GetObject download.
  299. if transfer_future.meta.size < config.multipart_threshold:
  300. self._submit_download_request(
  301. client,
  302. config,
  303. osutil,
  304. request_executor,
  305. io_executor,
  306. download_output_manager,
  307. transfer_future,
  308. bandwidth_limiter,
  309. )
  310. else:
  311. self._submit_ranged_download_request(
  312. client,
  313. config,
  314. osutil,
  315. request_executor,
  316. io_executor,
  317. download_output_manager,
  318. transfer_future,
  319. bandwidth_limiter,
  320. )
  321. def _submit_download_request(
  322. self,
  323. client,
  324. config,
  325. osutil,
  326. request_executor,
  327. io_executor,
  328. download_output_manager,
  329. transfer_future,
  330. bandwidth_limiter,
  331. ):
  332. call_args = transfer_future.meta.call_args
  333. # Get a handle to the file that will be used for writing downloaded
  334. # contents
  335. fileobj = download_output_manager.get_fileobj_for_io_writes(
  336. transfer_future
  337. )
  338. # Get the needed callbacks for the task
  339. progress_callbacks = get_callbacks(transfer_future, 'progress')
  340. # Get any associated tags for the get object task.
  341. get_object_tag = download_output_manager.get_download_task_tag()
  342. # Get the final io task to run once the download is complete.
  343. final_task = download_output_manager.get_final_io_task()
  344. # Submit the task to download the object.
  345. self._transfer_coordinator.submit(
  346. request_executor,
  347. ImmediatelyWriteIOGetObjectTask(
  348. transfer_coordinator=self._transfer_coordinator,
  349. main_kwargs={
  350. 'client': client,
  351. 'bucket': call_args.bucket,
  352. 'key': call_args.key,
  353. 'fileobj': fileobj,
  354. 'extra_args': call_args.extra_args,
  355. 'callbacks': progress_callbacks,
  356. 'max_attempts': config.num_download_attempts,
  357. 'download_output_manager': download_output_manager,
  358. 'io_chunksize': config.io_chunksize,
  359. 'bandwidth_limiter': bandwidth_limiter,
  360. },
  361. done_callbacks=[final_task],
  362. ),
  363. tag=get_object_tag,
  364. )
  365. def _submit_ranged_download_request(
  366. self,
  367. client,
  368. config,
  369. osutil,
  370. request_executor,
  371. io_executor,
  372. download_output_manager,
  373. transfer_future,
  374. bandwidth_limiter,
  375. ):
  376. call_args = transfer_future.meta.call_args
  377. # Get the needed progress callbacks for the task
  378. progress_callbacks = get_callbacks(transfer_future, 'progress')
  379. # Get a handle to the file that will be used for writing downloaded
  380. # contents
  381. fileobj = download_output_manager.get_fileobj_for_io_writes(
  382. transfer_future
  383. )
  384. # Determine the number of parts
  385. part_size = config.multipart_chunksize
  386. num_parts = calculate_num_parts(transfer_future.meta.size, part_size)
  387. # Get any associated tags for the get object task.
  388. get_object_tag = download_output_manager.get_download_task_tag()
  389. # Callback invoker to submit the final io task once all downloads
  390. # are complete.
  391. finalize_download_invoker = CountCallbackInvoker(
  392. self._get_final_io_task_submission_callback(
  393. download_output_manager, io_executor
  394. )
  395. )
  396. for i in range(num_parts):
  397. # Calculate the range parameter
  398. range_parameter = calculate_range_parameter(
  399. part_size, i, num_parts
  400. )
  401. # Inject the Range parameter to the parameters to be passed in
  402. # as extra args
  403. extra_args = {'Range': range_parameter}
  404. extra_args.update(call_args.extra_args)
  405. finalize_download_invoker.increment()
  406. # Submit the ranged downloads
  407. self._transfer_coordinator.submit(
  408. request_executor,
  409. GetObjectTask(
  410. transfer_coordinator=self._transfer_coordinator,
  411. main_kwargs={
  412. 'client': client,
  413. 'bucket': call_args.bucket,
  414. 'key': call_args.key,
  415. 'fileobj': fileobj,
  416. 'extra_args': extra_args,
  417. 'callbacks': progress_callbacks,
  418. 'max_attempts': config.num_download_attempts,
  419. 'start_index': i * part_size,
  420. 'download_output_manager': download_output_manager,
  421. 'io_chunksize': config.io_chunksize,
  422. 'bandwidth_limiter': bandwidth_limiter,
  423. },
  424. done_callbacks=[finalize_download_invoker.decrement],
  425. ),
  426. tag=get_object_tag,
  427. )
  428. finalize_download_invoker.finalize()
  429. def _get_final_io_task_submission_callback(
  430. self, download_manager, io_executor
  431. ):
  432. final_task = download_manager.get_final_io_task()
  433. return FunctionContainer(
  434. self._transfer_coordinator.submit, io_executor, final_task
  435. )
  436. def _calculate_range_param(self, part_size, part_index, num_parts):
  437. # Used to calculate the Range parameter
  438. start_range = part_index * part_size
  439. if part_index == num_parts - 1:
  440. end_range = ''
  441. else:
  442. end_range = start_range + part_size - 1
  443. range_param = f'bytes={start_range}-{end_range}'
  444. return range_param
  445. class GetObjectTask(Task):
  446. def _main(
  447. self,
  448. client,
  449. bucket,
  450. key,
  451. fileobj,
  452. extra_args,
  453. callbacks,
  454. max_attempts,
  455. download_output_manager,
  456. io_chunksize,
  457. start_index=0,
  458. bandwidth_limiter=None,
  459. ):
  460. """Downloads an object and places content into io queue
  461. :param client: The client to use when calling GetObject
  462. :param bucket: The bucket to download from
  463. :param key: The key to download from
  464. :param fileobj: The file handle to write content to
  465. :param exta_args: Any extra arguments to include in GetObject request
  466. :param callbacks: List of progress callbacks to invoke on download
  467. :param max_attempts: The number of retries to do when downloading
  468. :param download_output_manager: The download output manager associated
  469. with the current download.
  470. :param io_chunksize: The size of each io chunk to read from the
  471. download stream and queue in the io queue.
  472. :param start_index: The location in the file to start writing the
  473. content of the key to.
  474. :param bandwidth_limiter: The bandwidth limiter to use when throttling
  475. the downloading of data in streams.
  476. """
  477. last_exception = None
  478. for i in range(max_attempts):
  479. try:
  480. current_index = start_index
  481. response = client.get_object(
  482. Bucket=bucket, Key=key, **extra_args
  483. )
  484. streaming_body = StreamReaderProgress(
  485. response['Body'], callbacks
  486. )
  487. if bandwidth_limiter:
  488. streaming_body = (
  489. bandwidth_limiter.get_bandwith_limited_stream(
  490. streaming_body, self._transfer_coordinator
  491. )
  492. )
  493. chunks = DownloadChunkIterator(streaming_body, io_chunksize)
  494. for chunk in chunks:
  495. # If the transfer is done because of a cancellation
  496. # or error somewhere else, stop trying to submit more
  497. # data to be written and break out of the download.
  498. if not self._transfer_coordinator.done():
  499. self._handle_io(
  500. download_output_manager,
  501. fileobj,
  502. chunk,
  503. current_index,
  504. )
  505. current_index += len(chunk)
  506. else:
  507. return
  508. return
  509. except S3_RETRYABLE_DOWNLOAD_ERRORS as e:
  510. logger.debug(
  511. "Retrying exception caught (%s), "
  512. "retrying request, (attempt %s / %s)",
  513. e,
  514. i,
  515. max_attempts,
  516. exc_info=True,
  517. )
  518. last_exception = e
  519. # Also invoke the progress callbacks to indicate that we
  520. # are trying to download the stream again and all progress
  521. # for this GetObject has been lost.
  522. invoke_progress_callbacks(
  523. callbacks, start_index - current_index
  524. )
  525. continue
  526. raise RetriesExceededError(last_exception)
  527. def _handle_io(self, download_output_manager, fileobj, chunk, index):
  528. download_output_manager.queue_file_io_task(fileobj, chunk, index)
  529. class ImmediatelyWriteIOGetObjectTask(GetObjectTask):
  530. """GetObjectTask that immediately writes to the provided file object
  531. This is useful for downloads where it is known only one thread is
  532. downloading the object so there is no reason to go through the
  533. overhead of using an IO queue and executor.
  534. """
  535. def _handle_io(self, download_output_manager, fileobj, chunk, index):
  536. task = download_output_manager.get_io_write_task(fileobj, chunk, index)
  537. task()
  538. class IOWriteTask(Task):
  539. def _main(self, fileobj, data, offset):
  540. """Pulls off an io queue to write contents to a file
  541. :param fileobj: The file handle to write content to
  542. :param data: The data to write
  543. :param offset: The offset to write the data to.
  544. """
  545. fileobj.seek(offset)
  546. fileobj.write(data)
  547. class IOStreamingWriteTask(Task):
  548. """Task for writing data to a non-seekable stream."""
  549. def _main(self, fileobj, data):
  550. """Write data to a fileobj.
  551. Data will be written directly to the fileobj without
  552. any prior seeking.
  553. :param fileobj: The fileobj to write content to
  554. :param data: The data to write
  555. """
  556. fileobj.write(data)
  557. class IORenameFileTask(Task):
  558. """A task to rename a temporary file to its final filename
  559. :param fileobj: The file handle that content was written to.
  560. :param final_filename: The final name of the file to rename to
  561. upon completion of writing the contents.
  562. :param osutil: OS utility
  563. """
  564. def _main(self, fileobj, final_filename, osutil):
  565. fileobj.close()
  566. osutil.rename_file(fileobj.name, final_filename)
  567. class IOCloseTask(Task):
  568. """A task to close out a file once the download is complete.
  569. :param fileobj: The fileobj to close.
  570. """
  571. def _main(self, fileobj):
  572. fileobj.close()
  573. class CompleteDownloadNOOPTask(Task):
  574. """A NOOP task to serve as an indicator that the download is complete
  575. Note that the default for is_final is set to True because this should
  576. always be the last task.
  577. """
  578. def __init__(
  579. self,
  580. transfer_coordinator,
  581. main_kwargs=None,
  582. pending_main_kwargs=None,
  583. done_callbacks=None,
  584. is_final=True,
  585. ):
  586. super().__init__(
  587. transfer_coordinator=transfer_coordinator,
  588. main_kwargs=main_kwargs,
  589. pending_main_kwargs=pending_main_kwargs,
  590. done_callbacks=done_callbacks,
  591. is_final=is_final,
  592. )
  593. def _main(self):
  594. pass
  595. class DownloadChunkIterator:
  596. def __init__(self, body, chunksize):
  597. """Iterator to chunk out a downloaded S3 stream
  598. :param body: A readable file-like object
  599. :param chunksize: The amount to read each time
  600. """
  601. self._body = body
  602. self._chunksize = chunksize
  603. self._num_reads = 0
  604. def __iter__(self):
  605. return self
  606. def __next__(self):
  607. chunk = self._body.read(self._chunksize)
  608. self._num_reads += 1
  609. if chunk:
  610. return chunk
  611. elif self._num_reads == 1:
  612. # Even though the response may have not had any
  613. # content, we still want to account for an empty object's
  614. # existence so return the empty chunk for that initial
  615. # read.
  616. return chunk
  617. raise StopIteration()
  618. next = __next__
  619. class DeferQueue:
  620. """IO queue that defers write requests until they are queued sequentially.
  621. This class is used to track IO data for a *single* fileobj.
  622. You can send data to this queue, and it will defer any IO write requests
  623. until it has the next contiguous block available (starting at 0).
  624. """
  625. def __init__(self):
  626. self._writes = []
  627. self._pending_offsets = set()
  628. self._next_offset = 0
  629. def request_writes(self, offset, data):
  630. """Request any available writes given new incoming data.
  631. You call this method by providing new data along with the
  632. offset associated with the data. If that new data unlocks
  633. any contiguous writes that can now be submitted, this
  634. method will return all applicable writes.
  635. This is done with 1 method call so you don't have to
  636. make two method calls (put(), get()) which acquires a lock
  637. each method call.
  638. """
  639. if offset < self._next_offset:
  640. # This is a request for a write that we've already
  641. # seen. This can happen in the event of a retry
  642. # where if we retry at at offset N/2, we'll requeue
  643. # offsets 0-N/2 again.
  644. return []
  645. writes = []
  646. if offset in self._pending_offsets:
  647. # We've already queued this offset so this request is
  648. # a duplicate. In this case we should ignore
  649. # this request and prefer what's already queued.
  650. return []
  651. heapq.heappush(self._writes, (offset, data))
  652. self._pending_offsets.add(offset)
  653. while self._writes and self._writes[0][0] == self._next_offset:
  654. next_write = heapq.heappop(self._writes)
  655. writes.append({'offset': next_write[0], 'data': next_write[1]})
  656. self._pending_offsets.remove(next_write[0])
  657. self._next_offset += len(next_write[1])
  658. return writes