session.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. from requests import Session as _Session
  2. from requests.exceptions import ConnectionError, ChunkedEncodingError, Timeout, HTTPError
  3. from requests.adapters import HTTPAdapter
  4. import logging
  5. import time
  6. from .cookiejar import ClientCookieJar
  7. try:
  8. from fake_useragent import UserAgent
  9. except ImportError:
  10. UserAgent = None
  11. ua = None
  12. ua_str = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36'
  13. else:
  14. ua = UserAgent(fallback='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36')
  15. ua_str = ua.chrome
  16. session_logger = logging.getLogger('showroom.session')
  17. class ClientSession(_Session):
  18. """
  19. Wrapper for requests.Session.
  20. Mainly used to catch temporary errors and set a Timeout
  21. Overrides requests.Session.get() and increases max pool size
  22. Raises:
  23. May raise TimeoutError, ConnectionError, HTTPError, or ChunkedEncodingError
  24. if retries are exceeded.
  25. """
  26. # TODO: set pool_maxsize based on config
  27. def __init__(self, pool_maxsize=100):
  28. super().__init__()
  29. self.cookies = ClientCookieJar()
  30. https_adapter = HTTPAdapter(pool_maxsize=pool_maxsize)
  31. self.mount('https://www.showroom-live.com', https_adapter)
  32. self.headers = {"User-Agent": ua_str}
  33. # TODO: post
  34. def get(self, url, params=None, max_delay=30.0, max_retries=20, **kwargs):
  35. error_count = 0
  36. wait = 0
  37. timeouts = 0
  38. while True:
  39. try:
  40. r = super().get(url, params=params, timeout=(3.0, 15.0), **kwargs)
  41. r.raise_for_status()
  42. except Timeout as e:
  43. session_logger.debug('Timeout while fetching {}: {}'.format(url, e))
  44. timeouts += 1
  45. wait = min(2 * 1.5 ** timeouts, max_delay*4)
  46. if timeouts > max_retries:
  47. session_logger.error('Max timeouts exceeded while fetching {}: {}'.format(url, e))
  48. # raise
  49. elif timeouts > max_retries // 2:
  50. session_logger.warning('{} timeouts while fetching {}: {}'.format(timeouts, url, e))
  51. except ChunkedEncodingError as e:
  52. session_logger.debug('Chunked encoding error while fetching {}: {}'.format(url, e))
  53. error_count += 1
  54. wait = min(wait + error_count, max_delay)
  55. if error_count > max_retries:
  56. session_logger.warning('Max retries exceeded while fetching {}: {}'.format(url, e))
  57. raise
  58. except HTTPError as e:
  59. status_code = e.response.status_code
  60. session_logger.debug('{} while fetching {}: {}'.format(status_code, url, e))
  61. error_count += 1
  62. wait = min(wait + 2 + error_count, max_delay)
  63. # Some of these aren't recoverable
  64. if status_code == 404:
  65. session_logger.error('Getting {} failed permanently: 404 page not found'.format(url))
  66. raise # PageNotFoundError(e) # ?
  67. elif status_code == 403:
  68. session_logger.error('Getting {} failed permanently: 403 permission denied'.format(url))
  69. raise # specific error?
  70. elif status_code == 402:
  71. session_logger.error('Getting {} failed permanently: '
  72. '401 auth required (not implemented)'.format(url))
  73. raise
  74. elif status_code == 429:
  75. session_logger.error('Too many requests while getting {}: {}'.format(url, e))
  76. wait += 5 * 60.0
  77. elif 400 <= status_code < 500:
  78. session_logger.error('Getting {} failed permanently: {}'.format(url, e))
  79. raise
  80. if error_count > max_retries:
  81. session_logger.warning('Max retries exceeded while fetching {}: {}'.format(url, e))
  82. raise
  83. except ConnectionError as e:
  84. session_logger.debug('ConnectionError while accessing {}: {}'.format(url, e))
  85. error_count += 1
  86. wait = min(wait + 2 * error_count, max_delay)
  87. # ConnectionErrors are assumed to be always recoverable
  88. # if error_count > max_retries:
  89. # session_logger.warning('Max retries exceeded while fetching {}: {}'.format(url, e))
  90. # raise
  91. else:
  92. return r
  93. session_logger.debug('Retrying in {} seconds...'.format(wait))
  94. time.sleep(wait)