LiuFan
/
PrivacyScanData


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
							from requests import Session as _Session
from requests.exceptions import ConnectionError, ChunkedEncodingError, Timeout, HTTPError
from requests.adapters import HTTPAdapter
import logging
import time
from .cookiejar import ClientCookieJar

try:
    from fake_useragent import UserAgent
except ImportError:
    UserAgent = None
    ua = None
    ua_str = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36'
else:
    ua = UserAgent(fallback='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36')
    ua_str = ua.chrome

session_logger = logging.getLogger('showroom.session')


class ClientSession(_Session):
    """
    Wrapper for requests.Session.

    Mainly used to catch temporary errors and set a Timeout

    Overrides requests.Session.get() and increases max pool size

    Raises:
        May raise TimeoutError, ConnectionError, HTTPError, or ChunkedEncodingError
        if retries are exceeded.
    """

    # TODO: set pool_maxsize based on config
    def __init__(self, pool_maxsize=100):
        super().__init__()
        self.cookies = ClientCookieJar()
        https_adapter = HTTPAdapter(pool_maxsize=pool_maxsize)
        self.mount('https://www.showroom-live.com', https_adapter)
        self.headers = {"User-Agent": ua_str}

    # TODO: post
    def get(self, url, params=None, max_delay=30.0, max_retries=20, **kwargs):
        error_count = 0
        wait = 0
        timeouts = 0
        while True:
            try:
                r = super().get(url, params=params, timeout=(3.0, 15.0), **kwargs)
                r.raise_for_status()
            except Timeout as e:
                session_logger.debug('Timeout while fetching {}: {}'.format(url, e))
                timeouts += 1
                wait = min(2 * 1.5 ** timeouts, max_delay*4)

                if timeouts > max_retries:
                    session_logger.error('Max timeouts exceeded while fetching {}: {}'.format(url, e))
                    # raise
                elif timeouts > max_retries // 2:
                    session_logger.warning('{} timeouts while fetching {}: {}'.format(timeouts, url, e))

            except ChunkedEncodingError as e:
                session_logger.debug('Chunked encoding error while fetching {}: {}'.format(url, e))
                error_count += 1
                wait = min(wait + error_count, max_delay)

                if error_count > max_retries:
                    session_logger.warning('Max retries exceeded while fetching {}: {}'.format(url, e))
                    raise

            except HTTPError as e:
                status_code = e.response.status_code
                session_logger.debug('{} while fetching {}: {}'.format(status_code, url, e))

                error_count += 1
                wait = min(wait + 2 + error_count, max_delay)

                # Some of these aren't recoverable
                if status_code == 404:
                    session_logger.error('Getting {} failed permanently: 404 page not found'.format(url))
                    raise  # PageNotFoundError(e)  # ?
                elif status_code == 403:
                    session_logger.error('Getting {} failed permanently: 403 permission denied'.format(url))
                    raise  # specific error?
                elif status_code == 402:
                    session_logger.error('Getting {} failed permanently: '
                                         '401 auth required (not implemented)'.format(url))
                    raise
                elif status_code == 429:
                    session_logger.error('Too many requests while getting {}: {}'.format(url, e))
                    wait += 5 * 60.0
                elif 400 <= status_code < 500:
                    session_logger.error('Getting {} failed permanently: {}'.format(url, e))
                    raise

                if error_count > max_retries:
                    session_logger.warning('Max retries exceeded while fetching {}: {}'.format(url, e))
                    raise

            except ConnectionError as e:
                session_logger.debug('ConnectionError while accessing {}: {}'.format(url, e))

                error_count += 1
                wait = min(wait + 2 * error_count, max_delay)

                # ConnectionErrors are assumed to be always recoverable
                # if error_count > max_retries:
                #     session_logger.warning('Max retries exceeded while fetching {}: {}'.format(url, e))
                #     raise

            else:
                return r

            session_logger.debug('Retrying in {} seconds...'.format(wait))
            time.sleep(wait)