test_check_digest_examples.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. from warcio.cli import main
  2. from warcio import ArchiveIterator
  3. from warcio.warcwriter import BufferWARCWriter
  4. from . import get_test_file
  5. import os
  6. SKIP = ['example-trunc.warc',
  7. 'example-iana.org-chunked.warc',
  8. 'example-wrong-chunks.warc.gz',
  9. 'example-bad-non-chunked.warc.gz',
  10. 'example-digest.warc'
  11. ]
  12. def pytest_generate_tests(metafunc):
  13. if 'test_filename' in metafunc.fixturenames:
  14. files = [filename for filename in os.listdir(get_test_file('.'))
  15. if filename not in SKIP and filename.endswith(('.warc', '.warc.gz', '.arc', '.arc.gz'))]
  16. metafunc.parametrize('test_filename', files)
  17. class TestExamplesDigest(object):
  18. def check_helper(self, args, expected_exit_value, capsys):
  19. exit_value = None
  20. try:
  21. main(args=args)
  22. except SystemExit as e:
  23. exit_value = e.code
  24. finally:
  25. assert exit_value == expected_exit_value
  26. return capsys.readouterr()[0] # list for py33 support
  27. def test_check_invalid(self, capsys):
  28. filenames = [get_test_file('example-digest.warc')]
  29. args = ['check'] + filenames
  30. value = self.check_helper(args, 1, capsys)
  31. assert value.count('payload digest failed') == 1
  32. assert value.count('WARC-Record-ID') == 1
  33. args = ['check', '-v'] + filenames
  34. value = self.check_helper(args, 1, capsys)
  35. assert value.count('payload digest failed') == 1
  36. assert value.count('digest pass') == 3
  37. assert value.count('WARC-Record-ID') == 4
  38. def test_check_valid(self, capsys):
  39. filenames = [get_test_file('example.warc'), get_test_file('example.warc.gz')]
  40. args = ['check'] + filenames
  41. expected = ''
  42. assert self.check_helper(args, 0, capsys) == expected
  43. args = ['check', '-v'] + filenames
  44. value = self.check_helper(args, 0, capsys)
  45. # two digests per file (payload and block)
  46. assert value.count('digest pass') == 4
  47. assert value.count('WARC-Record-ID') == 12
  48. def test_check_valid_chunked(self, capsys):
  49. filenames = [get_test_file('example-iana.org-chunked.warc')]
  50. args = ['check'] + filenames
  51. expected = ''
  52. assert self.check_helper(args, 0, capsys) == expected
  53. args = ['check', '-v'] + filenames
  54. value = self.check_helper(args, 0, capsys)
  55. # two digests per file (payload and block)
  56. assert value.count('no digest to check') == 1
  57. assert value.count('digest pass') == 2
  58. assert value.count('WARC-Record-ID') == 3
  59. def test_check_no_invalid_files(self, test_filename, capsys):
  60. args = ['check', '-v', get_test_file(test_filename)]
  61. value = self.check_helper(args, 0, capsys)
  62. assert value.count('digest failed') == 0
  63. # if ARC file, no digests to check, so no passing results
  64. if test_filename.endswith(('.arc', '.arc.gz')):
  65. assert value.count('digest pass') == 0