123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392 |
- # Copyright (c) 2017 crocoite contributors
- #
- # Permission is hereby granted, free of charge, to any person obtaining a copy
- # of this software and associated documentation files (the "Software"), to deal
- # in the Software without restriction, including without limitation the rights
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- # copies of the Software, and to permit persons to whom the Software is
- # furnished to do so, subject to the following conditions:
- #
- # The above copyright notice and this permission notice shall be included in
- # all copies or substantial portions of the Software.
- #
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- # THE SOFTWARE.
- """
- Communication with Google Chrome through its DevTools protocol.
- """
- import json, asyncio, logging, os
- from tempfile import mkdtemp
- import shutil
- from http.cookies import Morsel
- import aiohttp, websockets
- from yarl import URL
- from .util import StrJsonEncoder
- logger = logging.getLogger (__name__)
- class Browser:
- """
- Communicate with Google Chrome through its DevTools protocol.
-
- Asynchronous context manager that creates a new Tab when entering.
- Destroyed upon exit.
- """
- __slots__ = ('session', 'url', 'tab')
- def __init__ (self, url):
- self.url = URL (url)
- self.session = None
- self.tab = None
- async def __aiter__ (self):
- """ List all tabs """
- async with aiohttp.ClientSession () as session:
- async with session.get (self.url.with_path ('/json/list')) as r:
- resp = await r.json ()
- for tab in resp:
- if tab['type'] == 'page':
- yield tab
- async def __aenter__ (self):
- """ Create tab """
- assert self.tab is None
- assert self.session is None
- self.session = aiohttp.ClientSession ()
- async with self.session.get (self.url.with_path ('/json/new')) as r:
- resp = await r.json ()
- self.tab = await Tab.create (**resp)
- return self.tab
- async def __aexit__ (self, excType, excValue, traceback):
- assert self.tab is not None
- assert self.session is not None
- await self.tab.close ()
- try:
- async with self.session.get (self.url.with_path (f'/json/close/{self.tab.id}')) as r:
- resp = await r.text ()
- assert resp == 'Target is closing'
- except aiohttp.client_exceptions.ClientConnectorError:
- # oh boy, the whole browser crashed instead
- if excType is Crashed:
- # exception is reraised by `return False`
- pass
- else:
- # this one is more important
- raise
- self.tab = None
- await self.session.close ()
- self.session = None
- return False
- class TabFunction:
- """
- Helper class for infinite-depth tab functions.
- A method usually consists of namespace (Page, Network, …) and function name
- (getFoobar) separated by a dot. This class creates these function names
- while providing an intuitive Python interface (tab.Network.getFoobar).
- This was inspired by pychrome.
- """
- __slots__ = ('name', 'tab')
- def __init__ (self, name, tab):
- self.name = name
- self.tab = tab
- def __eq__ (self, b):
- assert isinstance (b, TabFunction)
- return self.name == b.name
- def __hash__ (self):
- return hash (self.name)
- def __getattr__ (self, k):
- return TabFunction (f'{self.name}.{k}', self.tab)
- async def __call__ (self, **kwargs):
- return await self.tab (self.name, **kwargs)
- def __repr__ (self):
- return f'<TabFunction {self.name}>'
- class TabException (Exception):
- pass
- class Crashed (TabException):
- pass
- class MethodNotFound (TabException):
- pass
- class InvalidParameter (TabException):
- pass
- # map error codes to native exceptions
- errorMap = {-32601: MethodNotFound, -32602: InvalidParameter}
- class Tab:
- """
- Communicate with a single Google Chrome browser tab.
- """
- __slots__ = ('id', 'wsUrl', 'ws', 'msgid', 'transactions', 'queue', '_recvHandle', 'crashed')
- def __init__ (self, tabid, ws):
- """ Do not use this method, use Browser context manager. """
- self.id = tabid
- self.ws = ws
- self.msgid = 1
- self.crashed = False
- self.transactions = {}
- self.queue = asyncio.Queue ()
- def __getattr__ (self, k):
- return TabFunction (k, self)
- async def __call__ (self, method, **kwargs):
- """
- Actually call browser method with kwargs
- """
- if self.crashed or self._recvHandle.done ():
- raise Crashed ()
- msgid = self.msgid
- self.msgid += 1
- message = {'method': method, 'params': kwargs, 'id': msgid}
- t = self.transactions[msgid] = {'event': asyncio.Event (), 'result': None}
- logger.debug (f'← {message}')
- await self.ws.send (json.dumps (message, cls=StrJsonEncoder))
- await t['event'].wait ()
- ret = t['result']
- del self.transactions[msgid]
- if isinstance (ret, Exception):
- raise ret
- return ret
- async def _recvProcess (self):
- """
- Receive process that dispatches received websocket frames
- These are either events which will be put into a queue or request
- responses which unblock a __call__.
- """
- async def markCrashed (reason):
- # all pending requests can be considered failed since the
- # browser state is lost
- for v in self.transactions.values ():
- v['result'] = Crashed (reason)
- v['event'].set ()
- # and all future requests will fail as well until reloaded
- self.crashed = True
- await self.queue.put (Crashed (reason))
- while True:
- try:
- msg = await self.ws.recv ()
- msg = json.loads (msg)
- except Exception as e:
- # right now we cannot recover from this
- await markCrashed (e)
- break
- logger.debug (f'→ {msg}')
- if 'id' in msg:
- msgid = msg['id']
- t = self.transactions.get (msgid, None)
- if t is not None:
- if 'error' in msg:
- e = msg['error']
- t['result'] = errorMap.get (e['code'], TabException) (e['code'], e['message'])
- else:
- t['result'] = msg['result']
- t['event'].set ()
- else:
- # ignore stale result
- pass # pragma: no cover
- elif 'method' in msg:
- # special treatment
- if msg['method'] == 'Inspector.targetCrashed':
- await markCrashed ('target')
- else:
- await self.queue.put (msg)
- else:
- assert False # pragma: no cover
- async def run (self):
- self._recvHandle = asyncio.ensure_future (self._recvProcess ())
- async def close (self):
- self._recvHandle.cancel ()
- await self.ws.close ()
- # no join, throw away the queue. There will be nobody listening on the
- # other end.
- #await self.queue.join ()
- @property
- def pending (self):
- return self.queue.qsize ()
- async def get (self):
- def getattrRecursive (obj, name):
- if '.' in name:
- n, ext = name.split ('.', 1)
- return getattrRecursive (getattr (obj, n), ext)
- return getattr (obj, name)
- if self.crashed:
- raise Crashed ()
- ret = await self.queue.get ()
- if isinstance (ret, Exception):
- raise ret
- return getattrRecursive (self, ret['method']), ret['params']
- @classmethod
- async def create (cls, **kwargs):
- """ Async init """
- # increase size limit of a single frame to something ridiciously high,
- # so we can safely grab screenshots
- maxSize = 100*1024*1024 # 100 MB
- # chrome does not like pings and kills the connection, disable them
- ws = await websockets.connect(kwargs['webSocketDebuggerUrl'],
- max_size=maxSize, ping_interval=None)
- ret = cls (kwargs['id'], ws)
- await ret.run ()
- return ret
- class Process:
- """ Start Google Chrome listening on a random port """
- __slots__ = ('binary', 'windowSize', 'p', 'userDataDir')
- def __init__ (self, binary='google-chrome-stable', windowSize=(1920, 1080)):
- self.binary = binary
- self.windowSize = windowSize
- self.p = None
- async def __aenter__ (self):
- assert self.p is None
- self.userDataDir = mkdtemp (prefix=__package__ + '-chrome-userdata-')
- # see https://github.com/GoogleChrome/chrome-launcher/blob/master/docs/chrome-flags-for-tools.md
- args = [self.binary,
- '--window-size={},{}'.format (*self.windowSize),
- f'--user-data-dir={self.userDataDir}', # use temporory user dir
- '--no-default-browser-check',
- '--no-first-run', # don’t show first run screen
- '--disable-breakpad', # no error reports
- '--disable-extensions',
- '--disable-infobars',
- '--disable-notifications', # no libnotify
- '--disable-background-networking', # disable background services (updating, safe browsing, …)
- '--safebrowsing-disable-auto-update',
- '--disable-sync', # no google account syncing
- '--metrics-recording-only', # do not submit metrics
- '--disable-default-apps',
- '--disable-background-timer-throttling',
- '--disable-client-side-phishing-detection',
- '--disable-popup-blocking',
- '--disable-prompt-on-repost',
- '--enable-automation', # enable various automation-related things
- '--password-store=basic',
- '--headless',
- '--disable-gpu',
- '--hide-scrollbars', # hide scrollbars on screenshots
- '--mute-audio', # don’t play any audio
- '--remote-debugging-port=0', # pick a port. XXX: we may want to use --remote-debugging-pipe instead
- '--homepage=about:blank',
- 'about:blank']
- # start new session, so ^C does not affect subprocess
- self.p = await asyncio.create_subprocess_exec (*args,
- stdout=asyncio.subprocess.DEVNULL,
- stderr=asyncio.subprocess.DEVNULL,
- stdin=asyncio.subprocess.DEVNULL,
- start_new_session=True)
- port = None
- # chrome writes its current active devtools port to a file. due to the
- # sleep() this is rather ugly, but should work with all versions of the
- # browser.
- for i in range (100):
- try:
- with open (os.path.join (self.userDataDir, 'DevToolsActivePort'), 'r') as fd:
- port = int (fd.readline ().strip ())
- break
- except FileNotFoundError:
- await asyncio.sleep (0.2)
- if port is None:
- raise Exception ('Chrome died on us.')
- return URL.build(scheme='http', host='localhost', port=port)
- async def __aexit__ (self, *exc):
- try:
- self.p.terminate ()
- await self.p.wait ()
- except ProcessLookupError:
- # ok, fine, dead already
- pass
- # Try to delete the temporary directory multiple times. It looks like
- # Chrome will change files in there even after it exited (i.e. .wait()
- # returned). Very strange.
- for i in range (5):
- try:
- shutil.rmtree (self.userDataDir)
- break
- except:
- await asyncio.sleep (0.2)
- self.p = None
- return False
- class Passthrough:
- __slots__ = ('url', )
- def __init__ (self, url):
- self.url = URL (url)
- async def __aenter__ (self):
- return self.url
- async def __aexit__ (self, *exc):
- return False
- def toCookieParam (m):
- """
- Convert Python’s http.cookies.Morsel to Chrome’s CookieParam, see
- https://chromedevtools.github.io/devtools-protocol/1-3/Network#type-CookieParam
- """
- assert isinstance (m, Morsel)
- out = {'name': m.key, 'value': m.value}
- # unsupported by chrome
- for k in ('max-age', 'comment', 'version'):
- if m[k]:
- raise ValueError (f'Unsupported cookie attribute {k} set, cannot convert')
- for mname, cname in [('expires', None), ('path', None), ('domain', None), ('secure', None), ('httponly', 'httpOnly')]:
- value = m[mname]
- if value:
- cname = cname or mname
- out[cname] = value
- return out
|