test_statusandheaders.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. >>> st1 = StatusAndHeadersParser(['HTTP/1.0']).parse(StringIO(status_headers_1))
  5. >>> st1
  6. StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '200 OK', headers = [('Content-Type', 'ABC'), ('Some', 'Value'), ('Multi-Line', 'Value1 Also This')])
  7. # add range (and byte headers)
  8. >>> StatusAndHeaders(statusline = '200 OK', headers=[(b'Content-Type', b'text/plain')]).add_range(10, 4, 100)
  9. StatusAndHeaders(protocol = '', statusline = '206 Partial Content', headers = [('Content-Type', 'text/plain'), ('Content-Range', 'bytes 10-13/100'), ('Content-Length', '4'), ('Accept-Ranges', 'bytes')])
  10. # other protocol expected
  11. >>> StatusAndHeadersParser(['Other']).parse(StringIO(status_headers_1)) # doctest: +IGNORE_EXCEPTION_DETAIL
  12. Traceback (most recent call last):
  13. StatusAndHeadersParserException: Expected Status Line starting with ['Other'] - Found: HTTP/1.0 200 OK
  14. >>> StatusAndHeadersParser(['Other'], verify=False).parse(StringIO(status_headers_1))
  15. StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '200 OK', headers = [('Content-Type', 'ABC'), ('Some', 'Value'), ('Multi-Line', 'Value1 Also This')])
  16. # verify protocol line
  17. >>> StatusAndHeadersParser(['HTTP/1.0'], verify=True).parse(StringIO(unknown_protocol_headers)) # doctest: +IGNORE_EXCEPTION_DETAIL
  18. Traceback (most recent call last):
  19. StatusAndHeadersParserException: Expected Status Line starting with ['HTTP/1.0'] - Found: OtherBlah
  20. # allow unexpected/invalid protocol line
  21. >>> StatusAndHeadersParser(['HTTP/1.0'], verify=False).parse(StringIO(unknown_protocol_headers))
  22. StatusAndHeaders(protocol = 'OtherBlah', statusline = '', headers = [('Foo', 'Bar')])
  23. # test equality op
  24. >>> st1 == StatusAndHeadersParser(['HTTP/1.0']).parse(StringIO(status_headers_1))
  25. True
  26. # replace header, print new headers
  27. >>> st1.replace_header('some', 'Another-Value'); st1
  28. 'Value'
  29. StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '200 OK', headers = [('Content-Type', 'ABC'), ('Some', 'Another-Value'), ('Multi-Line', 'Value1 Also This')])
  30. # replace header with dict-like api, print new headers
  31. >>> st1['some'] = 'Yet-Another-Value'; st1
  32. StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '200 OK', headers = [('Content-Type', 'ABC'), ('Some', 'Yet-Another-Value'), ('Multi-Line', 'Value1 Also This')])
  33. # remove header
  34. >>> st1.remove_header('some')
  35. True
  36. # already removed
  37. >>> st1.remove_header('Some')
  38. False
  39. # add header with dict-like api, print new headers
  40. >>> st1['foo'] = 'bar'; st1
  41. StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '200 OK', headers = [('Content-Type', 'ABC'), ('Multi-Line', 'Value1 Also This'), ('foo', 'bar')])
  42. # dict-like api existence and get value
  43. >>> 'bar' in st1
  44. False
  45. >>> 'foo' in st1
  46. True
  47. >>> st1['bar']
  48. >>> st1.get('bar')
  49. >>> st1['foo']
  50. 'bar'
  51. >>> st1.get('foo')
  52. 'bar'
  53. # remove header with dict-like api, print new headers
  54. >>> del st1['foo']; st1
  55. StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '200 OK', headers = [('Content-Type', 'ABC'), ('Multi-Line', 'Value1 Also This')])
  56. # empty
  57. >>> st2 = StatusAndHeadersParser(['HTTP/1.0']).parse(StringIO(status_headers_2)); x = st2.validate_statusline('204 No Content'); st2
  58. StatusAndHeaders(protocol = '', statusline = '204 No Content', headers = [])
  59. >>> StatusAndHeadersParser(['HTTP/1.0']).parse(StringIO(status_headers_3))
  60. StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '204 Empty', headers = [('Content-Type', 'Value'), ('Content-Length', '0')])
  61. # case-insensitive match
  62. >>> StatusAndHeadersParser(['HTTP/1.0']).parse(StringIO(status_headers_4))
  63. StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '204 empty', headers = [('Content-Type', 'Value'), ('Content-Length', '0')])
  64. """
  65. from warcio.statusandheaders import StatusAndHeadersParser, StatusAndHeaders
  66. from six import StringIO
  67. import pytest
  68. status_headers_1 = "\
  69. HTTP/1.0 200 OK\r\n\
  70. Content-Type: ABC\r\n\
  71. HTTP/1.0 200 OK\r\n\
  72. Some: Value\r\n\
  73. Multi-Line: Value1\r\n\
  74. Also This\r\n\
  75. \r\n\
  76. Body"
  77. status_headers_2 = """
  78. """
  79. status_headers_3 = "\
  80. HTTP/1.0 204 Empty\r\n\
  81. Content-Type: Value\r\n\
  82. %Invalid%\r\n\
  83. \tMultiline\r\n\
  84. Content-Length: 0\r\n\
  85. \r\n"
  86. status_headers_4 = "\
  87. http/1.0 204 empty\r\n\
  88. Content-Type: Value\r\n\
  89. %Invalid%\r\n\
  90. \tMultiline\r\n\
  91. Content-Length: 0\r\n\
  92. \r\n"
  93. unknown_protocol_headers = "\
  94. OtherBlah\r\n\
  95. Foo: Bar\r\n\
  96. \r\n"
  97. req_headers = "\
  98. GET / HTTP/1.0\r\n\
  99. Foo: Bar\r\n\
  100. Content-Length: 0\r\n"
  101. if __name__ == "__main__":
  102. import doctest
  103. doctest.testmod()
  104. def test_to_str_1():
  105. res = str(StatusAndHeadersParser(['HTTP/1.0']).parse(StringIO(status_headers_1)))
  106. exp = "\
  107. HTTP/1.0 200 OK\r\n\
  108. Content-Type: ABC\r\n\
  109. Some: Value\r\n\
  110. Multi-Line: Value1 Also This\r\n\
  111. "
  112. assert(res == exp)
  113. def test_to_str_exclude():
  114. def exclude(h):
  115. if h[0].lower() == 'multi-line':
  116. return None
  117. return h
  118. sah = StatusAndHeadersParser(['HTTP/1.0']).parse(StringIO(status_headers_1))
  119. res = sah.to_str(exclude)
  120. exp = "\
  121. HTTP/1.0 200 OK\r\n\
  122. Content-Type: ABC\r\n\
  123. Some: Value\r\n\
  124. "
  125. assert(res == exp)
  126. assert(sah.to_bytes(exclude) == (exp.encode('latin-1') + b'\r\n'))
  127. def test_to_str_2():
  128. res = str(StatusAndHeadersParser(['GET']).parse(StringIO(req_headers)))
  129. assert(res == req_headers)
  130. res = str(StatusAndHeadersParser(['GET']).parse(StringIO(req_headers + '\r\n')))
  131. assert(res == req_headers)
  132. def test_to_str_with_remove():
  133. res = StatusAndHeadersParser(['GET']).parse(StringIO(req_headers))
  134. res.remove_header('Foo')
  135. exp = "\
  136. GET / HTTP/1.0\r\n\
  137. Content-Length: 0\r\n"
  138. assert(str(res) == exp)
  139. def test_status_empty():
  140. with pytest.raises(EOFError):
  141. StatusAndHeadersParser([], verify=False).parse(StringIO(''))
  142. def test_status_one_word():
  143. res = StatusAndHeadersParser(['GET'], verify=False).parse(StringIO('A'))
  144. assert(str(res) == 'A\r\n')
  145. def test_validate_status():
  146. assert StatusAndHeaders('200 OK', []).validate_statusline('204 No Content')
  147. assert not StatusAndHeaders('Bad OK', []).validate_statusline('204 No Content')
  148. def test_non_ascii():
  149. st = StatusAndHeaders('200 OK', [('Custom-Header', 'attachment; filename="Éxamplè"')])
  150. res = st.to_ascii_bytes().decode('ascii')
  151. assert res == "\
  152. 200 OK\r\n\
  153. Custom-Header: attachment; filename*=UTF-8''%C3%89xampl%C3%A8\r\n\
  154. \r\n\
  155. "
  156. def test_non_ascii_2():
  157. st = StatusAndHeaders('200 OK', [('Custom-Header', 'value; filename="Éxamplè"; param; other=испытание; another')])
  158. res = st.to_ascii_bytes().decode('ascii')
  159. assert res == "\
  160. 200 OK\r\n\
  161. Custom-Header: value; filename*=UTF-8''%C3%89xampl%C3%A8; param; other*=UTF-8''%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5; another\r\n\
  162. \r\n\
  163. "
  164. def test_non_ascii_3():
  165. st = StatusAndHeaders('200 OK', [('Custom-Header', '“max-age=31536000″')])
  166. res = st.to_ascii_bytes().decode('ascii')
  167. assert res == "\
  168. 200 OK\r\n\
  169. Custom-Header: %E2%80%9Cmax-age%3D31536000%E2%80%B3\r\n\
  170. \r\n\
  171. "