test_behavior.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. # Copyright (c) 2017 crocoite contributors
  2. #
  3. # Permission is hereby granted, free of charge, to any person obtaining a copy
  4. # of this software and associated documentation files (the "Software"), to deal
  5. # in the Software without restriction, including without limitation the rights
  6. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. # copies of the Software, and to permit persons to whom the Software is
  8. # furnished to do so, subject to the following conditions:
  9. #
  10. # The above copyright notice and this permission notice shall be included in
  11. # all copies or substantial portions of the Software.
  12. #
  13. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. # THE SOFTWARE.
  20. import asyncio, os, yaml, re, math, struct
  21. from functools import partial
  22. from operator import attrgetter
  23. import pytest
  24. from yarl import URL
  25. from aiohttp import web
  26. import pkg_resources
  27. from .logger import Logger
  28. from .devtools import Process
  29. from .behavior import Scroll, Behavior, ExtractLinks, ExtractLinksEvent, Crash, \
  30. Screenshot, ScreenshotEvent, DomSnapshot, DomSnapshotEvent, mapOrIgnore
  31. from .controller import SinglePageController, EventHandler, ControllerSettings
  32. from .devtools import Crashed
  33. with pkg_resources.resource_stream (__name__, os.path.join ('data', 'click.yaml')) as fd:
  34. sites = list (yaml.safe_load_all (fd))
  35. clickParam = []
  36. for o in sites:
  37. for s in o['selector']:
  38. for u in s.get ('urls', []):
  39. clickParam.append ((u, s['selector']))
  40. class ClickTester (Behavior):
  41. """
  42. Test adapter checking a given selector exists after loading the page
  43. """
  44. __slots__ = ('selector', )
  45. name = 'testclick'
  46. def __init__ (self, loader, logger, selector):
  47. super ().__init__ (loader, logger)
  48. self.selector = selector
  49. async def onfinish (self):
  50. tab = self.loader.tab
  51. results = await tab.DOM.getDocument ()
  52. rootNode = results['root']['nodeId']
  53. results = await tab.DOM.querySelectorAll (nodeId=rootNode, selector=self.selector)
  54. assert results['nodeIds'], self.selector
  55. # XXX: this is not true for every element we click. Github uses <button
  56. # type=submit> and <form> without an event listener on the <button>
  57. # # verify that an event listener exists
  58. # for nid in results['nodeIds']:
  59. # obj = (await tab.DOM.resolveNode (nodeId=nid))['object']
  60. # assert obj['type'] == 'object'
  61. # listeners = (await tab.DOMDebugger.getEventListeners (objectId=obj['objectId']))['listeners']
  62. # assert any (map (lambda x: x['type'] == 'click', listeners)), listeners
  63. return
  64. yield # pragma: no cover
  65. @pytest.mark.parametrize("url,selector", clickParam)
  66. @pytest.mark.asyncio
  67. @pytest.mark.xfail(reason='depends on network access')
  68. async def test_click_selectors (url, selector):
  69. """
  70. Make sure the CSS selector exists on an example url
  71. """
  72. logger = Logger ()
  73. settings = ControllerSettings (idleTimeout=5, timeout=10)
  74. # Some selectors are loaded dynamically and require scrolling
  75. controller = SinglePageController (url=url, logger=logger,
  76. settings=settings,
  77. service=Process (),
  78. behavior=[Scroll, partial(ClickTester, selector=selector)])
  79. await controller.run ()
  80. matchParam = []
  81. for o in sites:
  82. for s in o['selector']:
  83. for u in s.get ('urls', []):
  84. matchParam.append ((o['match'], URL (u)))
  85. @pytest.mark.parametrize("match,url", matchParam)
  86. @pytest.mark.asyncio
  87. async def test_click_match (match, url):
  88. """ Test urls must match """
  89. # keep this aligned with click.js
  90. assert re.match (match, url.host, re.I)
  91. class AccumHandler (EventHandler):
  92. """ Test adapter that accumulates all incoming items """
  93. __slots__ = ('data')
  94. def __init__ (self):
  95. super().__init__ ()
  96. self.data = []
  97. async def push (self, item):
  98. self.data.append (item)
  99. async def simpleServer (url, response):
  100. async def f (req):
  101. return web.Response (body=response, status=200, content_type='text/html', charset='utf-8')
  102. app = web.Application ()
  103. app.router.add_route ('GET', url.path, f)
  104. runner = web.AppRunner(app)
  105. await runner.setup()
  106. site = web.TCPSite(runner, url.host, url.port)
  107. await site.start()
  108. return runner
  109. @pytest.mark.asyncio
  110. async def test_extract_links ():
  111. """
  112. Make sure the CSS selector exists on an example url
  113. """
  114. url = URL.build (scheme='http', host='localhost', port=8080)
  115. runner = await simpleServer (url, """<html><head></head>
  116. <body>
  117. <div>
  118. <a href="/relative">foo</a>
  119. <a href="http://example.com/absolute/">foo</a>
  120. <a href="https://example.com/absolute/secure">foo</a>
  121. <a href="#anchor">foo</a>
  122. <a href="http://neue_preise_f%c3%bcr_zahnimplantate_k%c3%b6nnten_sie_%c3%bcberraschen">foo</a>
  123. <a href="/hidden/visibility" style="visibility: hidden">foo</a>
  124. <a href="/hidden/display" style="display: none">foo</a>
  125. <div style="display: none">
  126. <a href="/hidden/display/insidediv">foo</a>
  127. </div>
  128. <!--<a href="/hidden/comment">foo</a>-->
  129. <p><img src="shapes.png" usemap="#shapes">
  130. <map name="shapes"><area shape=rect coords="50,50,100,100" href="/map/rect"></map></p>
  131. </div>
  132. </body></html>""")
  133. try:
  134. handler = AccumHandler ()
  135. logger = Logger ()
  136. controller = SinglePageController (url=url, logger=logger,
  137. service=Process (), behavior=[ExtractLinks], handler=[handler])
  138. await controller.run ()
  139. links = []
  140. for d in handler.data:
  141. if isinstance (d, ExtractLinksEvent):
  142. links.extend (d.links)
  143. assert sorted (links) == sorted ([
  144. url.with_path ('/relative'),
  145. url.with_fragment ('anchor'),
  146. URL ('http://neue_preise_f%C3%BCr_zahnimplantate_k%C3%B6nnten_sie_%C3%BCberraschen'),
  147. URL ('http://example.com/absolute/'),
  148. URL ('https://example.com/absolute/secure'),
  149. url.with_path ('/hidden/visibility'), # XXX: shall we ignore these as well?
  150. url.with_path ('/map/rect'),
  151. ])
  152. finally:
  153. await runner.cleanup ()
  154. @pytest.mark.asyncio
  155. async def test_crash ():
  156. """
  157. Crashing through Behavior works?
  158. """
  159. url = URL.build (scheme='http', host='localhost', port=8080)
  160. runner = await simpleServer (url, '<html></html>')
  161. try:
  162. logger = Logger ()
  163. controller = SinglePageController (url=url, logger=logger,
  164. service=Process (), behavior=[Crash])
  165. with pytest.raises (Crashed):
  166. await controller.run ()
  167. finally:
  168. await runner.cleanup ()
  169. @pytest.mark.asyncio
  170. async def test_screenshot ():
  171. """
  172. Make sure screenshots are taken and have the correct dimensions. We can’t
  173. and don’t want to check their content.
  174. """
  175. # ceil(0) == 0, so starting with 1
  176. for expectHeight in (1, Screenshot.maxDim, Screenshot.maxDim+1, Screenshot.maxDim*2+Screenshot.maxDim//2):
  177. url = URL.build (scheme='http', host='localhost', port=8080)
  178. runner = await simpleServer (url, f'<html><body style="margin: 0; padding: 0;"><div style="height: {expectHeight}"></div></body></html>')
  179. try:
  180. handler = AccumHandler ()
  181. logger = Logger ()
  182. controller = SinglePageController (url=url, logger=logger,
  183. service=Process (), behavior=[Screenshot], handler=[handler])
  184. await controller.run ()
  185. screenshots = list (filter (lambda x: isinstance (x, ScreenshotEvent), handler.data))
  186. assert len (screenshots) == math.ceil (expectHeight/Screenshot.maxDim)
  187. totalHeight = 0
  188. for s in screenshots:
  189. assert s.url == url
  190. # PNG ident is fixed, IHDR is always the first chunk
  191. assert s.data.startswith (b'\x89PNG\r\n\x1a\n\x00\x00\x00\x0dIHDR')
  192. width, height = struct.unpack ('>II', s.data[16:24])
  193. assert height <= Screenshot.maxDim
  194. totalHeight += height
  195. # screenshot height is at least canvas height (XXX: get hardcoded
  196. # value from devtools.Process)
  197. assert totalHeight == max (expectHeight, 1080)
  198. finally:
  199. await runner.cleanup ()
  200. @pytest.mark.asyncio
  201. async def test_dom_snapshot ():
  202. """
  203. Behavior plug-in works, <canvas> is replaced by static image, <script> is
  204. stripped. Actual conversion from Chrome DOM to HTML is validated by module
  205. .test_html
  206. """
  207. url = URL.build (scheme='http', host='localhost', port=8080)
  208. runner = await simpleServer (url, f'<html><body><p>ÄÖÜäöü</p><script>alert("yes");</script><canvas id="canvas" width="1" height="1">Alternate text.</canvas></body></html>')
  209. try:
  210. handler = AccumHandler ()
  211. logger = Logger ()
  212. controller = SinglePageController (url=url, logger=logger,
  213. service=Process (), behavior=[DomSnapshot], handler=[handler])
  214. await controller.run ()
  215. snapshots = list (filter (lambda x: isinstance (x, DomSnapshotEvent), handler.data))
  216. assert len (snapshots) == 1
  217. doc = snapshots[0].document
  218. assert doc.startswith ('<HTML><HEAD><meta charset=utf-8></HEAD><BODY><P>ÄÖÜäöü</P><IMG id=canvas width=1 height=1 src="data:image/png;base64,'.encode ('utf-8'))
  219. assert doc.endswith ('></BODY></HTML>'.encode ('utf-8'))
  220. finally:
  221. await runner.cleanup ()
  222. def test_mapOrIgnore ():
  223. def fail (x):
  224. if x < 50:
  225. raise Exception ()
  226. return x+1
  227. assert list (mapOrIgnore (fail, range (100))) == list (range (51, 101))