Test.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. #!/usr/bin/env python3
  2. # Contest Management System - http://cms-dev.github.io/
  3. # Copyright © 2010-2017 Stefano Maggiolo <s.maggiolo@gmail.com>
  4. # Copyright © 2013-2015 Luca Versari <veluca93@gmail.com>
  5. # Copyright © 2013 Giovanni Mascellani <mascellani@poisson.phc.unipi.it>
  6. # Copyright © 2013-2018 Luca Wehrstedt <luca.wehrstedt@gmail.com>
  7. #
  8. # This program is free software: you can redistribute it and/or modify
  9. # it under the terms of the GNU Affero General Public License as
  10. # published by the Free Software Foundation, either version 3 of the
  11. # License, or (at your option) any later version.
  12. #
  13. # This program is distributed in the hope that it will be useful,
  14. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. # GNU Affero General Public License for more details.
  17. #
  18. # You should have received a copy of the GNU Affero General Public License
  19. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  20. import atexit
  21. import logging
  22. import os
  23. import select
  24. import sys
  25. import cmscontrib.loaders
  26. from cms.db import Executable
  27. from cms.db.filecacher import FileCacher
  28. from cms.grading import format_status_text
  29. from cms.grading.Job import EvaluationJob
  30. from cms.service.esoperations import ESOperation
  31. from cmscommon.terminal import move_cursor, add_color_to_string, \
  32. colors, directions
  33. # TODO - Use a context object instead of global variables
  34. task = None
  35. file_cacher = None
  36. tested_something = False
  37. sols = []
  38. def usage():
  39. print("""%s base_dir executable [assume]"
  40. base_dir: directory of the task
  41. executable: solution to test (relative to the task's directory)
  42. language: programming language of the solution, e.g. "C++11 / gcc",
  43. assume: if it's y, answer yes to every question
  44. if it's n, answer no to every question
  45. """ % sys.argv[0])
  46. def mem_human(mem):
  47. if mem is None:
  48. return 'None'
  49. if mem > 1024 * 1024 * 1024:
  50. return "%4.3gG" % (mem / (1024 * 1024 * 1024))
  51. if mem > 1024 * 1024:
  52. return "%4.3gM" % (mem / (1024 * 1024))
  53. if mem > 1024:
  54. return "%4.3gK" % (mem / 1024)
  55. return "%4d" % mem
  56. class NullLogger:
  57. def __init__(self):
  58. def p(*args):
  59. pass
  60. self.info = p
  61. self.warning = p
  62. self.critical = print
  63. def print_at_exit():
  64. print()
  65. print()
  66. for s in sols:
  67. print("%s: %3d" % (
  68. add_color_to_string("%30s" % s[0], colors.BLACK,
  69. bold=True),
  70. s[1])
  71. )
  72. logger = logging.getLogger()
  73. def test_testcases(base_dir, solution, language, assume=None):
  74. global task, file_cacher
  75. # Use a FileCacher with a NullBackend in order to avoid to fill
  76. # the database with junk
  77. if file_cacher is None:
  78. file_cacher = FileCacher(null=True)
  79. cmscontrib.loaders.italy_yaml.logger = NullLogger()
  80. # Load the task
  81. # TODO - This implies copying a lot of data to the FileCacher,
  82. # which is annoying if you have to do it continuously; it would be
  83. # better to use a persistent cache (although local, possibly
  84. # filesystem-based instead of database-based) and somehow detect
  85. # when the task has already been loaded
  86. if task is None:
  87. loader = cmscontrib.loaders.italy_yaml.YamlLoader(base_dir,
  88. file_cacher)
  89. task = loader.get_task(get_statement=False)
  90. # Prepare the EvaluationJob
  91. dataset = task.active_dataset
  92. digest = file_cacher.put_file_from_path(
  93. os.path.join(base_dir, solution),
  94. "Solution %s for task %s" % (solution, task.name))
  95. executables = {task.name: Executable(filename=task.name, digest=digest)}
  96. jobs = [(t, EvaluationJob(
  97. operation=ESOperation(
  98. ESOperation.EVALUATION,
  99. None,
  100. dataset.id,
  101. dataset.testcases[t].codename),
  102. language=language.name,
  103. task_type=dataset.task_type,
  104. task_type_parameters=dataset.task_type_parameters,
  105. managers=dict(dataset.managers),
  106. executables=executables,
  107. input=dataset.testcases[t].input, output=dataset.testcases[t].output,
  108. time_limit=dataset.time_limit,
  109. memory_limit=dataset.memory_limit)) for t in dataset.testcases]
  110. tasktype = dataset.task_type_object
  111. ask_again = True
  112. last_status = "ok"
  113. status = "ok"
  114. stop = False
  115. info = []
  116. points = []
  117. comments = []
  118. tcnames = []
  119. for jobinfo in sorted(jobs):
  120. print(jobinfo[0])
  121. sys.stdout.flush()
  122. job = jobinfo[1]
  123. # Skip the testcase if we decide to consider everything to
  124. # timeout
  125. if stop:
  126. info.append("Time limit exceeded")
  127. points.append(0.0)
  128. comments.append("Timeout.")
  129. move_cursor(directions.UP, erase=True)
  130. continue
  131. # Evaluate testcase
  132. last_status = status
  133. tasktype.evaluate(job, file_cacher)
  134. status = job.plus.get("exit_status")
  135. info.append((job.plus.get("execution_time"),
  136. job.plus.get("execution_memory")))
  137. points.append(float(job.outcome))
  138. # Avoid printing unneeded newline
  139. job.text = [t.rstrip() if isinstance(t, str) else t for t in job.text]
  140. comments.append(format_status_text(job.text))
  141. tcnames.append(jobinfo[0])
  142. # If we saw two consecutive timeouts, ask wether we want to
  143. # consider everything to timeout
  144. if ask_again and status == "timeout" and last_status == "timeout":
  145. print("Want to stop and consider everything to timeout? [y/N] ",
  146. end='')
  147. sys.stdout.flush()
  148. if assume is not None:
  149. tmp = assume
  150. print(tmp)
  151. else:
  152. # User input with a timeout of 5 seconds, at the end of which
  153. # we automatically say "n". ready will be a list of input ready
  154. # for reading, or an empty list if the timeout expired.
  155. # See: http://stackoverflow.com/a/2904057
  156. ready, _, _ = select.select([sys.stdin], [], [], 5)
  157. if ready:
  158. tmp = sys.stdin.readline().strip().lower()
  159. else:
  160. tmp = 'n'
  161. print(tmp)
  162. if tmp in ['y', 'yes']:
  163. stop = True
  164. else:
  165. ask_again = False
  166. print()
  167. move_cursor(directions.UP, erase=True)
  168. # Subtasks scoring
  169. subtasks = dataset.score_type_parameters
  170. if not isinstance(subtasks, list) or len(subtasks) == 0:
  171. subtasks = [[100, len(info)]]
  172. if dataset.score_type == 'GroupMin':
  173. scoreFun = min
  174. else:
  175. if dataset.score_type != 'Sum':
  176. logger.warning("Score type %s not yet supported! Using Sum"
  177. % dataset.score_type)
  178. def scoreFun(x):
  179. return sum(x) / len(x)
  180. pos = 0
  181. sts = []
  182. # For each subtask generate a list of testcase it owns, the score gained
  183. # and the highest time and memory usage.
  184. for i in subtasks:
  185. stscores = []
  186. stsdata = []
  187. worst = [0, 0]
  188. try:
  189. for _ in range(i[1]):
  190. stscores.append(points[pos])
  191. stsdata.append((tcnames[pos], points[pos],
  192. comments[pos], info[pos]))
  193. if info[pos][0] > worst[0]:
  194. worst[0] = info[pos][0]
  195. if info[pos][1] > worst[1]:
  196. worst[1] = info[pos][1]
  197. pos += 1
  198. sts.append((scoreFun(stscores) * i[0], i[0], stsdata, worst))
  199. except:
  200. sts.append((0, i[0], stsdata, [0, 0]))
  201. # Result pretty printing
  202. # Strips sol/ and _EVAL from the solution's name
  203. solution = solution[4:-5]
  204. print()
  205. clen = max(len(c) for c in comments)
  206. for st, d in enumerate(sts):
  207. print(
  208. "Subtask %d:" % st,
  209. add_color_to_string(
  210. "%5.2f/%d" % (d[0], d[1]),
  211. colors.RED if abs(d[0] - d[1]) > 0.01 else colors.GREEN,
  212. bold=True
  213. )
  214. )
  215. for (i, p, c, w) in d[2]:
  216. print(
  217. "%s)" % i,
  218. add_color_to_string(
  219. "%5.2lf" % p,
  220. colors.RED if abs(p - 1) > 0.01 else colors.BLACK
  221. ),
  222. "--- %s [Time:" % c.ljust(clen),
  223. add_color_to_string(
  224. ("%5.3f" % w[0]) if w[0] is not None else "N/A",
  225. colors.BLUE if w[0] is not None and w[0] >= 0.95 * d[3][0]
  226. else colors.BLACK
  227. ),
  228. "Memory:",
  229. add_color_to_string(
  230. "%5s" % mem_human(w[1]) if w[1] is not None else "N/A",
  231. colors.BLUE if w[1] is not None and w[1] >= 0.95 * d[3][1]
  232. else colors.BLACK,
  233. ),
  234. end="]"
  235. )
  236. move_cursor(directions.RIGHT, 1000)
  237. move_cursor(directions.LEFT, len(solution) - 1)
  238. print(add_color_to_string(solution, colors.BLACK, bold=True))
  239. print()
  240. sols.append((solution, sum([st[0] for st in sts])))
  241. global tested_something
  242. if not tested_something:
  243. tested_something = True
  244. atexit.register(print_at_exit)
  245. return zip(points, comments, info)
  246. def clean_test_env():
  247. """Clean the testing environment, mostly to reclaim disk space.
  248. """
  249. # We're done: since we have no way to reuse this cache, we destroy
  250. # it to free space. See the TODO above.
  251. global file_cacher, task
  252. if file_cacher is not None:
  253. file_cacher.destroy_cache()
  254. file_cacher = None
  255. task = None
  256. if __name__ == "__main__":
  257. if len(sys.argv) < 4:
  258. usage()
  259. if len(sys.argv) == 4:
  260. assume = None
  261. else:
  262. assume = sys.argv[4]
  263. test_testcases(sys.argv[1], sys.argv[2], sys.argv[3], assume=assume)