test_browser.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. # Copyright (c) 2017 crocoite contributors
  2. #
  3. # Permission is hereby granted, free of charge, to any person obtaining a copy
  4. # of this software and associated documentation files (the "Software"), to deal
  5. # in the Software without restriction, including without limitation the rights
  6. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. # copies of the Software, and to permit persons to whom the Software is
  8. # furnished to do so, subject to the following conditions:
  9. #
  10. # The above copyright notice and this permission notice shall be included in
  11. # all copies or substantial portions of the Software.
  12. #
  13. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. # THE SOFTWARE.
  20. import asyncio, socket
  21. from operator import itemgetter
  22. from http.server import BaseHTTPRequestHandler
  23. from datetime import datetime
  24. from yarl import URL
  25. from aiohttp import web
  26. from multidict import CIMultiDict
  27. from hypothesis import given
  28. import hypothesis.strategies as st
  29. from hypothesis.provisional import domains
  30. import pytest
  31. from .browser import RequestResponsePair, SiteLoader, Request, \
  32. UnicodeBody, ReferenceTimestamp, Base64Body, UnicodeBody, Request, \
  33. Response, NavigateError, PageIdle, FrameNavigated
  34. from .logger import Logger, Consumer
  35. from .devtools import Crashed, Process
  36. # if you want to know what’s going on:
  37. #import logging
  38. #logging.basicConfig(level=logging.DEBUG)
  39. class AssertConsumer (Consumer):
  40. def __call__ (self, **kwargs):
  41. assert 'uuid' in kwargs
  42. assert 'msg' in kwargs
  43. assert 'context' in kwargs
  44. return kwargs
  45. @pytest.fixture
  46. def logger ():
  47. return Logger (consumer=[AssertConsumer ()])
  48. @pytest.fixture
  49. async def loader (logger):
  50. async with Process () as browser, SiteLoader (browser, logger) as l:
  51. yield l
  52. @pytest.mark.asyncio
  53. async def test_crash (loader):
  54. with pytest.raises (Crashed):
  55. await loader.tab.Page.crash ()
  56. @pytest.mark.asyncio
  57. async def test_invalidurl (loader):
  58. host = 'nonexistent.example'
  59. # make sure the url does *not* resolve (some DNS intercepting ISP’s mess
  60. # with this)
  61. loop = asyncio.get_event_loop ()
  62. try:
  63. resolved = await loop.getaddrinfo (host, None)
  64. except socket.gaierror:
  65. url = URL.build (scheme='http', host=host)
  66. with pytest.raises (NavigateError):
  67. await loader.navigate (url)
  68. else:
  69. pytest.skip (f'host {host} resolved to {resolved}')
  70. timestamp = st.one_of (
  71. st.integers(min_value=0, max_value=2**32-1),
  72. st.floats (min_value=0, max_value=2**32-1),
  73. )
  74. @given(timestamp, timestamp, timestamp)
  75. def test_referencetimestamp (relativeA, absoluteA, relativeB):
  76. ts = ReferenceTimestamp (relativeA, absoluteA)
  77. absoluteA = datetime.utcfromtimestamp (absoluteA)
  78. absoluteB = ts (relativeB)
  79. assert (absoluteA < absoluteB and relativeA < relativeB) or \
  80. (absoluteA >= absoluteB and relativeA >= relativeB)
  81. assert abs ((absoluteB - absoluteA).total_seconds () - (relativeB - relativeA)) < 10e-6
  82. def urls ():
  83. """ Build http/https URL """
  84. scheme = st.sampled_from (['http', 'https'])
  85. # Path must start with a slash
  86. pathSt = st.builds (lambda x: '/' + x, st.text ())
  87. args = st.fixed_dictionaries ({
  88. 'scheme': scheme,
  89. 'host': domains (),
  90. 'port': st.one_of (st.none (), st.integers (min_value=1, max_value=2**16-1)),
  91. 'path': pathSt,
  92. 'query_string': st.text (),
  93. 'fragment': st.text (),
  94. })
  95. return st.builds (lambda x: URL.build (**x), args)
  96. def urlsStr ():
  97. return st.builds (lambda x: str (x), urls ())
  98. asciiText = st.text (st.characters (min_codepoint=32, max_codepoint=126))
  99. def chromeHeaders ():
  100. # token as defined by https://tools.ietf.org/html/rfc7230#section-3.2.6
  101. token = st.sampled_from('abcdefghijklmnopqrstuvwxyz0123456789!#$%&\'*+-.^_`|~')
  102. # XXX: the value should be asciiText without leading/trailing spaces
  103. return st.dictionaries (token, token)
  104. def fixedDicts (fixed, dynamic):
  105. return st.builds (lambda x, y: x.update (y), st.fixed_dictionaries (fixed), st.lists (dynamic))
  106. def chromeRequestWillBeSent (reqid, url):
  107. methodSt = st.sampled_from (['GET', 'POST', 'PUT', 'DELETE'])
  108. return st.fixed_dictionaries ({
  109. 'requestId': reqid,
  110. 'initiator': st.just ('Test'),
  111. 'wallTime': timestamp,
  112. 'timestamp': timestamp,
  113. 'request': st.fixed_dictionaries ({
  114. 'url': url,
  115. 'method': methodSt,
  116. 'headers': chromeHeaders (),
  117. # XXX: postData, hasPostData
  118. })
  119. })
  120. def chromeResponseReceived (reqid, url):
  121. mimeTypeSt = st.one_of (st.none (), st.just ('text/html'))
  122. remoteIpAddressSt = st.one_of (st.none (), st.just ('127.0.0.1'))
  123. protocolSt = st.one_of (st.none (), st.just ('h2'))
  124. statusCodeSt = st.integers (min_value=100, max_value=999)
  125. typeSt = st.sampled_from (['Document', 'Stylesheet', 'Image', 'Media',
  126. 'Font', 'Script', 'TextTrack', 'XHR', 'Fetch', 'EventSource',
  127. 'WebSocket', 'Manifest', 'SignedExchange', 'Ping',
  128. 'CSPViolationReport', 'Other'])
  129. return st.fixed_dictionaries ({
  130. 'requestId': reqid,
  131. 'timestamp': timestamp,
  132. 'type': typeSt,
  133. 'response': st.fixed_dictionaries ({
  134. 'url': url,
  135. 'requestHeaders': chromeHeaders (), # XXX: make this optional
  136. 'headers': chromeHeaders (),
  137. 'status': statusCodeSt,
  138. 'statusText': asciiText,
  139. 'mimeType': mimeTypeSt,
  140. 'remoteIPAddress': remoteIpAddressSt,
  141. 'protocol': protocolSt,
  142. })
  143. })
  144. def chromeReqResp ():
  145. # XXX: will this gnerated the same url for all testcases?
  146. reqid = st.shared (st.text (), 'reqresp')
  147. url = st.shared (urlsStr (), 'reqresp')
  148. return st.tuples (chromeRequestWillBeSent (reqid, url),
  149. chromeResponseReceived (reqid, url))
  150. def requestResponsePair ():
  151. def f (creq, cresp, hasPostData, reqBody, respBody):
  152. i = RequestResponsePair ()
  153. i.fromRequestWillBeSent (creq)
  154. i.request.hasPostData = hasPostData
  155. if hasPostData:
  156. i.request.body = reqBody
  157. if cresp is not None:
  158. i.fromResponseReceived (cresp)
  159. if respBody is not None:
  160. i.response.body = respBody
  161. return i
  162. bodySt = st.one_of (
  163. st.none (),
  164. st.builds (UnicodeBody, st.text ()),
  165. st.builds (Base64Body.fromBytes, st.binary ())
  166. )
  167. return st.builds (lambda reqresp, hasPostData, reqBody, respBody:
  168. f (reqresp[0], reqresp[1], hasPostData, reqBody, respBody),
  169. chromeReqResp (), st.booleans (), bodySt, bodySt)
  170. @given(chromeReqResp ())
  171. def test_requestResponsePair (creqresp):
  172. creq, cresp = creqresp
  173. item = RequestResponsePair ()
  174. assert item.id is None
  175. assert item.url is None
  176. assert item.request is None
  177. assert item.response is None
  178. item.fromRequestWillBeSent (creq)
  179. assert item.id == creq['requestId']
  180. url = URL (creq['request']['url'])
  181. assert item.url == url
  182. assert item.request is not None
  183. assert item.request.timestamp == datetime.utcfromtimestamp (creq['wallTime'])
  184. assert set (item.request.headers.keys ()) == set (creq['request']['headers'].keys ())
  185. assert item.response is None
  186. item.fromResponseReceived (cresp)
  187. # url will not be overwritten
  188. assert item.id == creq['requestId'] == cresp['requestId']
  189. assert item.url == url
  190. assert item.request is not None
  191. assert set (item.request.headers.keys ()) == set (cresp['response']['requestHeaders'].keys ())
  192. assert item.response is not None
  193. assert set (item.response.headers.keys ()) == set (cresp['response']['headers'].keys ())
  194. assert (item.response.timestamp - item.request.timestamp).total_seconds () - \
  195. (cresp['timestamp'] - creq['timestamp']) < 10e-6
  196. @given(chromeReqResp ())
  197. def test_requestResponsePair_eq (creqresp):
  198. creq, cresp = creqresp
  199. item = RequestResponsePair ()
  200. item2 = RequestResponsePair ()
  201. assert item == item
  202. assert item == item2
  203. item.fromRequestWillBeSent (creq)
  204. assert item != item2
  205. item2.fromRequestWillBeSent (creq)
  206. assert item == item
  207. assert item == item2
  208. item.fromResponseReceived (cresp)
  209. assert item != item2
  210. item2.fromResponseReceived (cresp)
  211. assert item == item
  212. assert item == item2
  213. # XXX: test for inequality with different parameters
  214. ### Google Chrome integration tests ###
  215. serverUrl = URL.build (scheme='http', host='localhost', port=8080)
  216. items = [
  217. RequestResponsePair (
  218. url=serverUrl.with_path ('/encoding/utf-8'),
  219. request=Request (method='GET'),
  220. response=Response (status=200, headers=CIMultiDict ([('Content-Type', 'text/html; charset=utf-8')]),
  221. body=UnicodeBody ('äöü'), mimeType='text/html')
  222. ),
  223. RequestResponsePair (
  224. url=serverUrl.with_path ('/encoding/latin1'),
  225. request=Request (method='GET'),
  226. response=Response (status=200, headers=CIMultiDict ([('Content-Type', 'text/html; charset=latin1')]),
  227. body=UnicodeBody ('äöü'), mimeType='text/html')
  228. ),
  229. RequestResponsePair (
  230. url=serverUrl.with_path ('/encoding/utf-16'),
  231. request=Request (method='GET'),
  232. response=Response (status=200, headers=CIMultiDict ([('Content-Type', 'text/html; charset=utf-16')]),
  233. body=UnicodeBody ('äöü'), mimeType='text/html')
  234. ),
  235. RequestResponsePair (
  236. url=serverUrl.with_path ('/encoding/ISO-8859-1'),
  237. request=Request (method='GET'),
  238. response=Response (status=200, headers=CIMultiDict ([('Content-Type', 'text/html; charset=ISO-8859-1')]),
  239. body=UnicodeBody ('äöü'), mimeType='text/html')
  240. ),
  241. RequestResponsePair (
  242. url=serverUrl.with_path ('/status/200'),
  243. request=Request (method='GET'),
  244. response=Response (status=200, headers=CIMultiDict ([('Content-Type', 'text/plain')]),
  245. body=b'',
  246. mimeType='text/plain'),
  247. ),
  248. # redirects never have a response body
  249. RequestResponsePair (
  250. url=serverUrl.with_path ('/status/301'),
  251. request=Request (method='GET'),
  252. response=Response (status=301,
  253. headers=CIMultiDict ([('Content-Type', 'text/plain'),
  254. ('Location', str (serverUrl.with_path ('/status/301/redirected')))]),
  255. body=None,
  256. mimeType='text/plain'),
  257. ),
  258. RequestResponsePair (
  259. url=serverUrl.with_path ('/image/png'),
  260. request=Request (method='GET'),
  261. response=Response (status=200, headers=CIMultiDict ([('Content-Type', 'image/png')]),
  262. body=Base64Body.fromBytes (b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x00\x00\x00\x00:~\x9bU\x00\x00\x00\nIDAT\x08\x1dc\xf8\x0f\x00\x01\x01\x01\x006_g\x80\x00\x00\x00\x00IEND\xaeB`\x82'),
  263. mimeType='image/png'),
  264. ),
  265. RequestResponsePair (
  266. url=serverUrl.with_path ('/script/alert'),
  267. request=Request (method='GET'),
  268. response=Response (status=200, headers=CIMultiDict ([('Content-Type', 'text/html; charset=utf-8')]),
  269. body=UnicodeBody ('''<html><body><script>
  270. window.addEventListener("beforeunload", function (e) {
  271. e.returnValue = "bye?";
  272. return e.returnValue;
  273. });
  274. alert("stopping here");
  275. if (confirm("are you sure?") || prompt ("42?")) {
  276. window.location = "/nonexistent";
  277. }
  278. </script></body></html>'''), mimeType='text/html')
  279. ),
  280. ]
  281. @pytest.mark.asyncio
  282. # would be nice if we could use hypothesis here somehow
  283. @pytest.mark.parametrize("golden", items)
  284. async def test_integration_item (loader, golden):
  285. async def f (req):
  286. body = golden.response.body
  287. contentType = golden.response.headers.get ('content-type', '') if golden.response.headers is not None else ''
  288. charsetOff = contentType.find ('charset=')
  289. if isinstance (body, UnicodeBody) and charsetOff != -1:
  290. encoding = contentType[charsetOff+len ('charset='):]
  291. body = golden.response.body.decode ('utf-8').encode (encoding)
  292. return web.Response (body=body, status=golden.response.status,
  293. headers=golden.response.headers)
  294. app = web.Application ()
  295. app.router.add_route (golden.request.method, golden.url.path, f)
  296. runner = web.AppRunner(app)
  297. await runner.setup()
  298. site = web.TCPSite(runner, serverUrl.host, serverUrl.port)
  299. try:
  300. await site.start()
  301. except Exception as e:
  302. pytest.skip (e)
  303. haveReqResp = False
  304. haveNavigated = False
  305. try:
  306. await loader.navigate (golden.url)
  307. it = loader.__aiter__ ()
  308. while True:
  309. try:
  310. item = await asyncio.wait_for (it.__anext__ (), timeout=1)
  311. except asyncio.TimeoutError:
  312. break
  313. # XXX: can only check the first req/resp right now (due to redirect)
  314. if isinstance (item, RequestResponsePair) and not haveReqResp:
  315. # we do not know this in advance
  316. item.request.initiator = None
  317. item.request.headers = None
  318. item.remoteIpAddress = None
  319. item.protocol = None
  320. item.resourceType = None
  321. if item.response:
  322. assert item.response.statusText is not None
  323. item.response.statusText = None
  324. del item.response.headers['server']
  325. del item.response.headers['content-length']
  326. del item.response.headers['date']
  327. assert item == golden
  328. haveReqResp = True
  329. elif isinstance (item, FrameNavigated):
  330. # XXX: can’t check this, because of the redirect
  331. #assert item.url == golden.url
  332. haveNavigated = True
  333. finally:
  334. assert haveReqResp
  335. assert haveNavigated
  336. await runner.cleanup ()
  337. def test_page_idle ():
  338. for v in (True, False):
  339. idle = PageIdle (v)
  340. assert bool (idle) == v