devtools.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. # Copyright (c) 2017 crocoite contributors
  2. #
  3. # Permission is hereby granted, free of charge, to any person obtaining a copy
  4. # of this software and associated documentation files (the "Software"), to deal
  5. # in the Software without restriction, including without limitation the rights
  6. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. # copies of the Software, and to permit persons to whom the Software is
  8. # furnished to do so, subject to the following conditions:
  9. #
  10. # The above copyright notice and this permission notice shall be included in
  11. # all copies or substantial portions of the Software.
  12. #
  13. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. # THE SOFTWARE.
  20. """
  21. Communication with Google Chrome through its DevTools protocol.
  22. """
  23. import json, asyncio, logging, os
  24. from tempfile import mkdtemp
  25. import shutil
  26. from http.cookies import Morsel
  27. import aiohttp, websockets
  28. from yarl import URL
  29. from .util import StrJsonEncoder
  30. logger = logging.getLogger (__name__)
  31. class Browser:
  32. """
  33. Communicate with Google Chrome through its DevTools protocol.
  34. Asynchronous context manager that creates a new Tab when entering.
  35. Destroyed upon exit.
  36. """
  37. __slots__ = ('session', 'url', 'tab')
  38. def __init__ (self, url):
  39. self.url = URL (url)
  40. self.session = None
  41. self.tab = None
  42. async def __aiter__ (self):
  43. """ List all tabs """
  44. async with aiohttp.ClientSession () as session:
  45. async with session.get (self.url.with_path ('/json/list')) as r:
  46. resp = await r.json ()
  47. for tab in resp:
  48. if tab['type'] == 'page':
  49. yield tab
  50. async def __aenter__ (self):
  51. """ Create tab """
  52. assert self.tab is None
  53. assert self.session is None
  54. self.session = aiohttp.ClientSession ()
  55. async with self.session.get (self.url.with_path ('/json/new')) as r:
  56. resp = await r.json ()
  57. self.tab = await Tab.create (**resp)
  58. return self.tab
  59. async def __aexit__ (self, excType, excValue, traceback):
  60. assert self.tab is not None
  61. assert self.session is not None
  62. await self.tab.close ()
  63. try:
  64. async with self.session.get (self.url.with_path (f'/json/close/{self.tab.id}')) as r:
  65. resp = await r.text ()
  66. assert resp == 'Target is closing'
  67. except aiohttp.client_exceptions.ClientConnectorError:
  68. # oh boy, the whole browser crashed instead
  69. if excType is Crashed:
  70. # exception is reraised by `return False`
  71. pass
  72. else:
  73. # this one is more important
  74. raise
  75. self.tab = None
  76. await self.session.close ()
  77. self.session = None
  78. return False
  79. class TabFunction:
  80. """
  81. Helper class for infinite-depth tab functions.
  82. A method usually consists of namespace (Page, Network, …) and function name
  83. (getFoobar) separated by a dot. This class creates these function names
  84. while providing an intuitive Python interface (tab.Network.getFoobar).
  85. This was inspired by pychrome.
  86. """
  87. __slots__ = ('name', 'tab')
  88. def __init__ (self, name, tab):
  89. self.name = name
  90. self.tab = tab
  91. def __eq__ (self, b):
  92. assert isinstance (b, TabFunction)
  93. return self.name == b.name
  94. def __hash__ (self):
  95. return hash (self.name)
  96. def __getattr__ (self, k):
  97. return TabFunction (f'{self.name}.{k}', self.tab)
  98. async def __call__ (self, **kwargs):
  99. return await self.tab (self.name, **kwargs)
  100. def __repr__ (self):
  101. return f'<TabFunction {self.name}>'
  102. class TabException (Exception):
  103. pass
  104. class Crashed (TabException):
  105. pass
  106. class MethodNotFound (TabException):
  107. pass
  108. class InvalidParameter (TabException):
  109. pass
  110. # map error codes to native exceptions
  111. errorMap = {-32601: MethodNotFound, -32602: InvalidParameter}
  112. class Tab:
  113. """
  114. Communicate with a single Google Chrome browser tab.
  115. """
  116. __slots__ = ('id', 'wsUrl', 'ws', 'msgid', 'transactions', 'queue', '_recvHandle', 'crashed')
  117. def __init__ (self, tabid, ws):
  118. """ Do not use this method, use Browser context manager. """
  119. self.id = tabid
  120. self.ws = ws
  121. self.msgid = 1
  122. self.crashed = False
  123. self.transactions = {}
  124. self.queue = asyncio.Queue ()
  125. def __getattr__ (self, k):
  126. return TabFunction (k, self)
  127. async def __call__ (self, method, **kwargs):
  128. """
  129. Actually call browser method with kwargs
  130. """
  131. if self.crashed or self._recvHandle.done ():
  132. raise Crashed ()
  133. msgid = self.msgid
  134. self.msgid += 1
  135. message = {'method': method, 'params': kwargs, 'id': msgid}
  136. t = self.transactions[msgid] = {'event': asyncio.Event (), 'result': None}
  137. logger.debug (f'← {message}')
  138. await self.ws.send (json.dumps (message, cls=StrJsonEncoder))
  139. await t['event'].wait ()
  140. ret = t['result']
  141. del self.transactions[msgid]
  142. if isinstance (ret, Exception):
  143. raise ret
  144. return ret
  145. async def _recvProcess (self):
  146. """
  147. Receive process that dispatches received websocket frames
  148. These are either events which will be put into a queue or request
  149. responses which unblock a __call__.
  150. """
  151. async def markCrashed (reason):
  152. # all pending requests can be considered failed since the
  153. # browser state is lost
  154. for v in self.transactions.values ():
  155. v['result'] = Crashed (reason)
  156. v['event'].set ()
  157. # and all future requests will fail as well until reloaded
  158. self.crashed = True
  159. await self.queue.put (Crashed (reason))
  160. while True:
  161. try:
  162. msg = await self.ws.recv ()
  163. msg = json.loads (msg)
  164. except Exception as e:
  165. # right now we cannot recover from this
  166. await markCrashed (e)
  167. break
  168. logger.debug (f'→ {msg}')
  169. if 'id' in msg:
  170. msgid = msg['id']
  171. t = self.transactions.get (msgid, None)
  172. if t is not None:
  173. if 'error' in msg:
  174. e = msg['error']
  175. t['result'] = errorMap.get (e['code'], TabException) (e['code'], e['message'])
  176. else:
  177. t['result'] = msg['result']
  178. t['event'].set ()
  179. else:
  180. # ignore stale result
  181. pass # pragma: no cover
  182. elif 'method' in msg:
  183. # special treatment
  184. if msg['method'] == 'Inspector.targetCrashed':
  185. await markCrashed ('target')
  186. else:
  187. await self.queue.put (msg)
  188. else:
  189. assert False # pragma: no cover
  190. async def run (self):
  191. self._recvHandle = asyncio.ensure_future (self._recvProcess ())
  192. async def close (self):
  193. self._recvHandle.cancel ()
  194. await self.ws.close ()
  195. # no join, throw away the queue. There will be nobody listening on the
  196. # other end.
  197. #await self.queue.join ()
  198. @property
  199. def pending (self):
  200. return self.queue.qsize ()
  201. async def get (self):
  202. def getattrRecursive (obj, name):
  203. if '.' in name:
  204. n, ext = name.split ('.', 1)
  205. return getattrRecursive (getattr (obj, n), ext)
  206. return getattr (obj, name)
  207. if self.crashed:
  208. raise Crashed ()
  209. ret = await self.queue.get ()
  210. if isinstance (ret, Exception):
  211. raise ret
  212. return getattrRecursive (self, ret['method']), ret['params']
  213. @classmethod
  214. async def create (cls, **kwargs):
  215. """ Async init """
  216. # increase size limit of a single frame to something ridiciously high,
  217. # so we can safely grab screenshots
  218. maxSize = 100*1024*1024 # 100 MB
  219. # chrome does not like pings and kills the connection, disable them
  220. ws = await websockets.connect(kwargs['webSocketDebuggerUrl'],
  221. max_size=maxSize, ping_interval=None)
  222. ret = cls (kwargs['id'], ws)
  223. await ret.run ()
  224. return ret
  225. class Process:
  226. """ Start Google Chrome listening on a random port """
  227. __slots__ = ('binary', 'windowSize', 'p', 'userDataDir')
  228. def __init__ (self, binary='google-chrome-stable', windowSize=(1920, 1080)):
  229. self.binary = binary
  230. self.windowSize = windowSize
  231. self.p = None
  232. async def __aenter__ (self):
  233. assert self.p is None
  234. self.userDataDir = mkdtemp (prefix=__package__ + '-chrome-userdata-')
  235. # see https://github.com/GoogleChrome/chrome-launcher/blob/master/docs/chrome-flags-for-tools.md
  236. args = [self.binary,
  237. '--window-size={},{}'.format (*self.windowSize),
  238. f'--user-data-dir={self.userDataDir}', # use temporory user dir
  239. '--no-default-browser-check',
  240. '--no-first-run', # don’t show first run screen
  241. '--disable-breakpad', # no error reports
  242. '--disable-extensions',
  243. '--disable-infobars',
  244. '--disable-notifications', # no libnotify
  245. '--disable-background-networking', # disable background services (updating, safe browsing, …)
  246. '--safebrowsing-disable-auto-update',
  247. '--disable-sync', # no google account syncing
  248. '--metrics-recording-only', # do not submit metrics
  249. '--disable-default-apps',
  250. '--disable-background-timer-throttling',
  251. '--disable-client-side-phishing-detection',
  252. '--disable-popup-blocking',
  253. '--disable-prompt-on-repost',
  254. '--enable-automation', # enable various automation-related things
  255. '--password-store=basic',
  256. '--headless',
  257. '--disable-gpu',
  258. '--hide-scrollbars', # hide scrollbars on screenshots
  259. '--mute-audio', # don’t play any audio
  260. '--remote-debugging-port=0', # pick a port. XXX: we may want to use --remote-debugging-pipe instead
  261. '--homepage=about:blank',
  262. 'about:blank']
  263. # start new session, so ^C does not affect subprocess
  264. self.p = await asyncio.create_subprocess_exec (*args,
  265. stdout=asyncio.subprocess.DEVNULL,
  266. stderr=asyncio.subprocess.DEVNULL,
  267. stdin=asyncio.subprocess.DEVNULL,
  268. start_new_session=True)
  269. port = None
  270. # chrome writes its current active devtools port to a file. due to the
  271. # sleep() this is rather ugly, but should work with all versions of the
  272. # browser.
  273. for i in range (100):
  274. try:
  275. with open (os.path.join (self.userDataDir, 'DevToolsActivePort'), 'r') as fd:
  276. port = int (fd.readline ().strip ())
  277. break
  278. except FileNotFoundError:
  279. await asyncio.sleep (0.2)
  280. if port is None:
  281. raise Exception ('Chrome died on us.')
  282. return URL.build(scheme='http', host='localhost', port=port)
  283. async def __aexit__ (self, *exc):
  284. try:
  285. self.p.terminate ()
  286. await self.p.wait ()
  287. except ProcessLookupError:
  288. # ok, fine, dead already
  289. pass
  290. # Try to delete the temporary directory multiple times. It looks like
  291. # Chrome will change files in there even after it exited (i.e. .wait()
  292. # returned). Very strange.
  293. for i in range (5):
  294. try:
  295. shutil.rmtree (self.userDataDir)
  296. break
  297. except:
  298. await asyncio.sleep (0.2)
  299. self.p = None
  300. return False
  301. class Passthrough:
  302. __slots__ = ('url', )
  303. def __init__ (self, url):
  304. self.url = URL (url)
  305. async def __aenter__ (self):
  306. return self.url
  307. async def __aexit__ (self, *exc):
  308. return False
  309. def toCookieParam (m):
  310. """
  311. Convert Python’s http.cookies.Morsel to Chrome’s CookieParam, see
  312. https://chromedevtools.github.io/devtools-protocol/1-3/Network#type-CookieParam
  313. """
  314. assert isinstance (m, Morsel)
  315. out = {'name': m.key, 'value': m.value}
  316. # unsupported by chrome
  317. for k in ('max-age', 'comment', 'version'):
  318. if m[k]:
  319. raise ValueError (f'Unsupported cookie attribute {k} set, cannot convert')
  320. for mname, cname in [('expires', None), ('path', None), ('domain', None), ('secure', None), ('httponly', 'httpOnly')]:
  321. value = m[mname]
  322. if value:
  323. cname = cname or mname
  324. out[cname] = value
  325. return out