update_26.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. #!/usr/bin/env python3
  2. # Contest Management System - http://cms-dev.github.io/
  3. # Copyright © 2017 Luca Wehrstedt <luca.wehrstedt@gmail.com>
  4. #
  5. # This program is free software: you can redistribute it and/or modify
  6. # it under the terms of the GNU Affero General Public License as
  7. # published by the Free Software Foundation, either version 3 of the
  8. # License, or (at your option) any later version.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU Affero General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU Affero General Public License
  16. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. """A class to update a dump created by CMS.
  18. Used by DumpImporter and DumpUpdater.
  19. This updater encodes codenames using a more restricted alphabet.
  20. """
  21. import ipaddress
  22. import logging
  23. import re
  24. import string
  25. import sys
  26. logger = logging.getLogger(__name__)
  27. CODENAMES = [
  28. ("Admin", "username"),
  29. ("Contest", "name"),
  30. ("Task", "name"),
  31. ("Testcase", "codename"),
  32. ("User", "username"),
  33. ("Team", "code"),
  34. ]
  35. FILENAMES = [
  36. ("PrintJob", "filename"),
  37. ("File", "filename"),
  38. ("Executable", "filename"),
  39. ("Attachment", "filename"),
  40. ("SubmissionFormatElement", "filename"),
  41. ("Manager", "filename"),
  42. ("UserTestFile", "filename"),
  43. ("UserTestManager", "filename"),
  44. ("UserTestExecutable", "filename"),
  45. ]
  46. FILENAME_DICTS = [
  47. ("Submission", "files"),
  48. ("SubmissionResult", "executables"),
  49. ("Task", "attachments"),
  50. ("Dataset", "managers"),
  51. ("UserTest", "files"),
  52. ("UserTest", "managers"),
  53. ("UserTestResult", "executables"),
  54. ]
  55. DIGESTS = [
  56. ("PrintJob", "digest"),
  57. ("File", "digest"),
  58. ("Executable", "digest"),
  59. ("Statement", "digest"),
  60. ("Attachment", "digest"),
  61. ("Manager", "digest"),
  62. ("Testcase", "input"),
  63. ("Testcase", "output"),
  64. ("UserTest", "input"),
  65. ("UserTestFile", "digest"),
  66. ("UserTestManager", "digest"),
  67. ("UserTestResult", "output"),
  68. ("UserTestExecutable", "digest"),
  69. ]
  70. IP_ADDRESSES = [
  71. ("Participation", "ip"),
  72. ]
  73. # Encodes any unicode string using only "A-Za-z0-9_-". The encoding is
  74. # injective if the input values aren't allowed to contain a double "_".
  75. def encode_codename(s, extra=""):
  76. encoded_s = ""
  77. for char in s:
  78. if char not in string.ascii_letters + string.digits + "_-" + extra:
  79. encoded_s += "__%x" % ord(char)
  80. else:
  81. encoded_s += char
  82. return encoded_s
  83. class Updater:
  84. def __init__(self, data):
  85. assert data["_version"] == 25
  86. self.objs = data
  87. def run(self):
  88. for k, v in self.objs.items():
  89. if k.startswith("_"):
  90. continue
  91. for cls, col in CODENAMES:
  92. if v["_class"] == cls and v[col] is not None:
  93. v[col] = encode_codename(v[col])
  94. if v[col] == "":
  95. logger.critical(
  96. "The dump contains an instance of %s whose %s "
  97. "field contains an invalid codename: `%s'.",
  98. cls, col, v[col])
  99. sys.exit(1)
  100. for cls, col in FILENAMES:
  101. if v["_class"] == cls and v[col] is not None:
  102. v[col] = encode_codename(v[col], extra="%.")
  103. if v[col] in {"", ".", ".."}:
  104. logger.critical(
  105. "The dump contains an instance of %s whose %s "
  106. "field contains an invalid filename: `%s'.",
  107. cls, col, v[col])
  108. sys.exit(1)
  109. for cls, col in FILENAME_DICTS:
  110. if v["_class"] == cls and v[col] is not None:
  111. v[col] = {encode_codename(k, extra="%."): v
  112. for k, v in v[col].items()}
  113. for k in v[col]:
  114. if k in {"", ".", ".."}:
  115. logger.critical(
  116. "The dump contains an instance of %s whose %s "
  117. "field contains an invalid filename: `%s'.",
  118. cls, col, v[col])
  119. sys.exit(1)
  120. for cls, col in DIGESTS:
  121. if v["_class"] == cls and v[col] is not None:
  122. if not re.match("^([0-9a-f]{40}|x)$", v[col]):
  123. logger.critical(
  124. "The dump contains an instance of %s whose %s "
  125. "field contains an invalid SHA-1 digest: `%s'.",
  126. cls, col, v[col])
  127. sys.exit(1)
  128. for cls, col in IP_ADDRESSES:
  129. if v["_class"] == cls and v[col] is not None:
  130. v[col] = list(network.strip() for network in v[col].split())
  131. for network in v[col]:
  132. try:
  133. ipaddress.ip_network(network)
  134. except ValueError:
  135. logger.critical(
  136. "The dump contains an instance of %s whose %s "
  137. "field contains an invalid IPv4 address: `%s'.",
  138. cls, col, v[col])
  139. sys.exit(1)
  140. return self.objs