test_html.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. # Copyright (c) 2017 crocoite contributors
  2. #
  3. # Permission is hereby granted, free of charge, to any person obtaining a copy
  4. # of this software and associated documentation files (the "Software"), to deal
  5. # in the Software without restriction, including without limitation the rights
  6. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. # copies of the Software, and to permit persons to whom the Software is
  8. # furnished to do so, subject to the following conditions:
  9. #
  10. # The above copyright notice and this permission notice shall be included in
  11. # all copies or substantial portions of the Software.
  12. #
  13. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. # THE SOFTWARE.
  20. import asyncio
  21. import pytest, html5lib
  22. from html5lib.serializer import HTMLSerializer
  23. from html5lib.treewalkers import getTreeWalker
  24. from aiohttp import web
  25. from .html import StripTagFilter, StripAttributeFilter, ChromeTreeWalker
  26. from .test_devtools import tab, browser
  27. def test_strip_tag ():
  28. d = html5lib.parse ('<a>barbaz<b>foobar</b>.</a><b>foobar</b>.<b attr=1><c></c>')
  29. stream = StripTagFilter (getTreeWalker ('etree')(d), ['b', 'c'])
  30. serializer = HTMLSerializer ()
  31. assert serializer.render (stream) == '<a>barbaz.</a>.'
  32. def test_strip_attribute ():
  33. d = html5lib.parse ('<a b=1 c="yes" d></a><br b=2 c="no" d keep=1>')
  34. stream = StripAttributeFilter (getTreeWalker ('etree')(d), ['b', 'c', 'd'])
  35. serializer = HTMLSerializer ()
  36. assert serializer.render (stream) == '<a></a><br keep=1>'
  37. @pytest.mark.asyncio
  38. async def test_treewalker (tab):
  39. frames = await tab.Page.getFrameTree ()
  40. framehtml = '<HTML><HEAD></HEAD><BODY></BODY></HTML>'
  41. html = '<HTML><HEAD><META charset=utf-8></HEAD><BODY><H1>Hello</H1><!-- comment --><IFRAME></IFRAME></BODY></HTML>'
  42. rootframe = frames['frameTree']['frame']['id']
  43. await tab.Page.setDocumentContent (frameId=rootframe, html=html)
  44. dom = await tab.DOM.getDocument (depth=-1, pierce=True)
  45. docs = list (ChromeTreeWalker (dom['root']).split ())
  46. assert len(docs) == 2
  47. for i, doc in enumerate (docs):
  48. walker = ChromeTreeWalker (doc)
  49. serializer = HTMLSerializer ()
  50. result = serializer.render (iter(walker))
  51. if i == 0:
  52. assert result == html
  53. elif i == 1:
  54. assert result == framehtml
  55. cdataDoc = '<test><![CDATA[Hello world]]></test>'
  56. xmlHeader = '<?xml version="1.0" encoding="UTF-8"?>'
  57. async def hello(request):
  58. return web.Response(text=xmlHeader + cdataDoc, content_type='text/xml')
  59. @pytest.fixture
  60. async def server ():
  61. """ Simple HTTP server for testing notifications """
  62. app = web.Application()
  63. app.add_routes([web.get('/test.xml', hello)])
  64. runner = web.AppRunner(app)
  65. await runner.setup()
  66. site = web.TCPSite(runner, 'localhost', 8080)
  67. await site.start()
  68. yield app
  69. await runner.cleanup ()
  70. @pytest.mark.asyncio
  71. async def test_treewalker_cdata (tab, server):
  72. ret = await tab.Page.navigate (url='http://localhost:8080/test.xml')
  73. # wait until loaded XXX: replace with idle check
  74. await asyncio.sleep (0.5)
  75. dom = await tab.DOM.getDocument (depth=-1, pierce=True)
  76. docs = list (ChromeTreeWalker (dom['root']).split ())
  77. assert len(docs) == 1
  78. for i, doc in enumerate (docs):
  79. walker = ChromeTreeWalker (doc)
  80. serializer = HTMLSerializer ()
  81. result = serializer.render (iter(walker))
  82. # chrome will display a pretty-printed viewer *plus* the original
  83. # source (stripped of its xml header)
  84. assert cdataDoc in result