ExportSubmissions.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. #!/usr/bin/env python3
  2. # Contest Management System - http://cms-dev.github.io/
  3. # Copyright © 2015-2016 William Di Luigi <williamdiluigi@gmail.com>
  4. # Copyright © 2016-2017 Stefano Maggiolo <s.maggiolo@gmail.com>
  5. # Copyright © 2017 Myungwoo Chun <mc.tamaki@gmail.com>
  6. #
  7. # This program is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU Affero General Public License as
  9. # published by the Free Software Foundation, either version 3 of the
  10. # License, or (at your option) any later version.
  11. #
  12. # This program is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU Affero General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU Affero General Public License
  18. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. """Utility to export submissions to a folder.
  20. """
  21. import argparse
  22. import logging
  23. import os
  24. import sys
  25. from cms import utf8_decoder
  26. from cms.db import Dataset, File, FSObject, Participation, SessionGen, \
  27. Submission, SubmissionResult, Task, User
  28. from cms.grading import languagemanager
  29. logger = logging.getLogger(__name__)
  30. # Templates for the comment at the beginning of the exported submission.
  31. # Note that output only submissions will contain an initial, C-style formatted
  32. # comment, so to recover the original file one will need to use tail -n +6.
  33. _RAW_TEMPLATE_DATA = """
  34. * user: %s
  35. * fname: %s
  36. * lname: %s
  37. * task: %s
  38. * score: %s
  39. * date: %s
  40. """
  41. TEMPLATE = {
  42. ".c": "/**%s*/\n" % _RAW_TEMPLATE_DATA,
  43. ".pas": "(**%s*)\n" % _RAW_TEMPLATE_DATA,
  44. ".py": "\"\"\"%s\"\"\"\n" % _RAW_TEMPLATE_DATA,
  45. ".php": "<?php\n/**%s*/\n?>" % _RAW_TEMPLATE_DATA,
  46. ".hs": "{-%s-}\n" % _RAW_TEMPLATE_DATA,
  47. }
  48. TEMPLATE[".cpp"] = TEMPLATE[".c"]
  49. TEMPLATE[".java"] = TEMPLATE[".c"]
  50. TEMPLATE[".txt"] = TEMPLATE[".c"]
  51. def filter_top_scoring(results, unique):
  52. """Filter results keeping only the top scoring submissions for each user
  53. and task
  54. results ([Submission]): the starting list of submissions
  55. unique (bool): if True, keep only the first top-scoring submission
  56. return ([Submission]): the filtered submissions
  57. """
  58. usertask = {}
  59. for row in results:
  60. key = (row[6], row[10]) # u_id, t_id
  61. value = (-row[3], row[2], row) # sr_score, s_timestamp
  62. if unique:
  63. if key not in usertask or usertask[key][0] > value:
  64. usertask[key] = [value]
  65. else:
  66. if key not in usertask or usertask[key][0][0] > value[0]:
  67. usertask[key] = [value]
  68. elif usertask[key][0][0] == value[0]:
  69. usertask[key].append(value)
  70. results = []
  71. for key, values in usertask.items():
  72. for value in values:
  73. results.append(value[2]) # the "old" row
  74. return results
  75. def main():
  76. """Parse arguments and launch process.
  77. """
  78. parser = argparse.ArgumentParser(
  79. description="Export CMS submissions to a folder.\n",
  80. formatter_class=argparse.RawTextHelpFormatter)
  81. parser.add_argument("-c", "--contest-id", action="store", type=int,
  82. help="id of contest (default: all contests)")
  83. parser.add_argument("-t", "--task-id", action="store", type=int,
  84. help="id of task (default: all tasks)")
  85. parser.add_argument("-u", "--user-id", action="store", type=int,
  86. help="id of user (default: all users)")
  87. parser.add_argument("-s", "--submission-id", action="store", type=int,
  88. help="id of submission (default: all submissions)")
  89. parser.add_argument("--utf8", action="store_true",
  90. help="if set, the files will be encoded in utf8"
  91. " when possible")
  92. parser.add_argument("--add-info", action="store_true",
  93. help="if set, information on the submission will"
  94. " be added in the first lines of each file")
  95. parser.add_argument("--min-score", action="store", type=float,
  96. help="ignore submissions which scored strictly"
  97. " less than this (default: 0.0)",
  98. default=0.0)
  99. parser.add_argument("--filename", action="store", type=utf8_decoder,
  100. help="the filename format to use\n"
  101. "Variables:\n"
  102. " id: submission id\n"
  103. " file: filename without extension\n"
  104. " ext: filename extension\n"
  105. " time: submission timestamp\n"
  106. " user: username\n"
  107. " task: taskname\n"
  108. " score: raw score\n"
  109. " (default: {id}.{file}{ext})",
  110. default="{id}.{file}{ext}")
  111. parser.add_argument("output_dir", action="store", type=utf8_decoder,
  112. help="directory where to save the submissions")
  113. group = parser.add_mutually_exclusive_group(required=False)
  114. group.add_argument("--unique", action="store_true",
  115. help="if set, only the earliest best submission"
  116. " will be exported for each (user, task)")
  117. group.add_argument("--best", action="store_true",
  118. help="if set, only the best submissions will be"
  119. " exported for each (user, task)")
  120. args = parser.parse_args()
  121. if args.add_info and not args.utf8:
  122. logger.critical("If --add-info is specified, then --utf8 must be"
  123. " specified as well.")
  124. return 1
  125. if not os.path.exists(args.output_dir):
  126. os.mkdir(args.output_dir)
  127. if not os.path.isdir(args.output_dir):
  128. logger.critical("The output-dir parameter must point to a directory")
  129. return 1
  130. with SessionGen() as session:
  131. q = session.query(Submission)\
  132. .join(Submission.task)\
  133. .join(Submission.files)\
  134. .join(Submission.results)\
  135. .join(SubmissionResult.dataset)\
  136. .join(Submission.participation)\
  137. .join(Participation.user)\
  138. .filter(Dataset.id == Task.active_dataset_id)\
  139. .filter(SubmissionResult.score >= args.min_score)\
  140. .with_entities(Submission.id, Submission.language,
  141. Submission.timestamp,
  142. SubmissionResult.score,
  143. File.filename, File.digest,
  144. User.id, User.username, User.first_name,
  145. User.last_name,
  146. Task.id, Task.name)
  147. if args.contest_id:
  148. q = q.filter(Participation.contest_id == args.contest_id)
  149. if args.task_id:
  150. q = q.filter(Submission.task_id == args.task_id)
  151. if args.user_id:
  152. q = q.filter(Participation.user_id == args.user_id)
  153. if args.submission_id:
  154. q = q.filter(Submission.id == args.submission_id)
  155. results = q.all()
  156. if args.unique or args.best:
  157. results = filter_top_scoring(results, args.unique)
  158. print("%s file(s) will be created." % len(results))
  159. if input("Continue? [Y/n] ").strip().lower() not in ["y", ""]:
  160. return 0
  161. done = 0
  162. for row in results:
  163. s_id, s_language, s_timestamp, sr_score, f_filename, f_digest, \
  164. u_id, u_name, u_fname, u_lname, t_id, t_name = row
  165. timef = s_timestamp.strftime('%Y%m%dT%H%M%S')
  166. ext = languagemanager.get_language(s_language).source_extension \
  167. if s_language else '.txt'
  168. filename_base, filename_ext = os.path.splitext(
  169. f_filename.replace('.%l', ext)
  170. )
  171. # "name" is a deprecated specifier with the same meaning as "file"
  172. filename = args.filename.format(id=s_id, file=filename_base,
  173. name=filename_base,
  174. ext=filename_ext,
  175. time=timef, user=u_name,
  176. task=t_name,
  177. score=sr_score)
  178. filename = os.path.join(args.output_dir, filename)
  179. if os.path.exists(filename):
  180. logger.warning("Skipping file '%s' because it already exists",
  181. filename)
  182. continue
  183. filedir = os.path.dirname(filename)
  184. if not os.path.exists(filedir):
  185. os.makedirs(filedir)
  186. if not os.path.isdir(filedir):
  187. logger.warning("%s is not a directory, skipped.", filedir)
  188. continue
  189. fso = FSObject.get_from_digest(f_digest, session)
  190. assert fso is not None
  191. with fso.get_lobject(mode="rb") as file_obj:
  192. data = file_obj.read()
  193. if args.utf8:
  194. try:
  195. data = utf8_decoder(data)
  196. except TypeError:
  197. logger.critical("Could not guess encoding of file "
  198. "'%s'. Aborting.",
  199. filename)
  200. sys.exit(1)
  201. if args.add_info:
  202. data = TEMPLATE[ext] % (
  203. u_name,
  204. u_fname,
  205. u_lname,
  206. t_name,
  207. sr_score,
  208. s_timestamp
  209. ) + data
  210. # Print utf8-encoded, possibly altered data
  211. with open(filename, "wt", encoding="utf-8") as f_out:
  212. f_out.write(data)
  213. else:
  214. # Print raw, untouched binary data
  215. with open(filename, "wb") as f_out:
  216. f_out.write(data)
  217. done += 1
  218. print(done, "/", len(results))
  219. return 0
  220. if __name__ == "__main__":
  221. sys.exit(main())