123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264 |
- #!/usr/bin/env python3
- # Contest Management System - http://cms-dev.github.io/
- # Copyright © 2015-2016 William Di Luigi <williamdiluigi@gmail.com>
- # Copyright © 2016-2017 Stefano Maggiolo <s.maggiolo@gmail.com>
- # Copyright © 2017 Myungwoo Chun <mc.tamaki@gmail.com>
- #
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU Affero General Public License as
- # published by the Free Software Foundation, either version 3 of the
- # License, or (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU Affero General Public License for more details.
- #
- # You should have received a copy of the GNU Affero General Public License
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
- """Utility to export submissions to a folder.
- """
- import argparse
- import logging
- import os
- import sys
- from cms import utf8_decoder
- from cms.db import Dataset, File, FSObject, Participation, SessionGen, \
- Submission, SubmissionResult, Task, User
- from cms.grading import languagemanager
- logger = logging.getLogger(__name__)
- # Templates for the comment at the beginning of the exported submission.
- # Note that output only submissions will contain an initial, C-style formatted
- # comment, so to recover the original file one will need to use tail -n +6.
- _RAW_TEMPLATE_DATA = """
- * user: %s
- * fname: %s
- * lname: %s
- * task: %s
- * score: %s
- * date: %s
- """
- TEMPLATE = {
- ".c": "/**%s*/\n" % _RAW_TEMPLATE_DATA,
- ".pas": "(**%s*)\n" % _RAW_TEMPLATE_DATA,
- ".py": "\"\"\"%s\"\"\"\n" % _RAW_TEMPLATE_DATA,
- ".php": "<?php\n/**%s*/\n?>" % _RAW_TEMPLATE_DATA,
- ".hs": "{-%s-}\n" % _RAW_TEMPLATE_DATA,
- }
- TEMPLATE[".cpp"] = TEMPLATE[".c"]
- TEMPLATE[".java"] = TEMPLATE[".c"]
- TEMPLATE[".txt"] = TEMPLATE[".c"]
- def filter_top_scoring(results, unique):
- """Filter results keeping only the top scoring submissions for each user
- and task
- results ([Submission]): the starting list of submissions
- unique (bool): if True, keep only the first top-scoring submission
- return ([Submission]): the filtered submissions
- """
- usertask = {}
- for row in results:
- key = (row[6], row[10]) # u_id, t_id
- value = (-row[3], row[2], row) # sr_score, s_timestamp
- if unique:
- if key not in usertask or usertask[key][0] > value:
- usertask[key] = [value]
- else:
- if key not in usertask or usertask[key][0][0] > value[0]:
- usertask[key] = [value]
- elif usertask[key][0][0] == value[0]:
- usertask[key].append(value)
- results = []
- for key, values in usertask.items():
- for value in values:
- results.append(value[2]) # the "old" row
- return results
- def main():
- """Parse arguments and launch process.
- """
- parser = argparse.ArgumentParser(
- description="Export CMS submissions to a folder.\n",
- formatter_class=argparse.RawTextHelpFormatter)
- parser.add_argument("-c", "--contest-id", action="store", type=int,
- help="id of contest (default: all contests)")
- parser.add_argument("-t", "--task-id", action="store", type=int,
- help="id of task (default: all tasks)")
- parser.add_argument("-u", "--user-id", action="store", type=int,
- help="id of user (default: all users)")
- parser.add_argument("-s", "--submission-id", action="store", type=int,
- help="id of submission (default: all submissions)")
- parser.add_argument("--utf8", action="store_true",
- help="if set, the files will be encoded in utf8"
- " when possible")
- parser.add_argument("--add-info", action="store_true",
- help="if set, information on the submission will"
- " be added in the first lines of each file")
- parser.add_argument("--min-score", action="store", type=float,
- help="ignore submissions which scored strictly"
- " less than this (default: 0.0)",
- default=0.0)
- parser.add_argument("--filename", action="store", type=utf8_decoder,
- help="the filename format to use\n"
- "Variables:\n"
- " id: submission id\n"
- " file: filename without extension\n"
- " ext: filename extension\n"
- " time: submission timestamp\n"
- " user: username\n"
- " task: taskname\n"
- " score: raw score\n"
- " (default: {id}.{file}{ext})",
- default="{id}.{file}{ext}")
- parser.add_argument("output_dir", action="store", type=utf8_decoder,
- help="directory where to save the submissions")
- group = parser.add_mutually_exclusive_group(required=False)
- group.add_argument("--unique", action="store_true",
- help="if set, only the earliest best submission"
- " will be exported for each (user, task)")
- group.add_argument("--best", action="store_true",
- help="if set, only the best submissions will be"
- " exported for each (user, task)")
- args = parser.parse_args()
- if args.add_info and not args.utf8:
- logger.critical("If --add-info is specified, then --utf8 must be"
- " specified as well.")
- return 1
- if not os.path.exists(args.output_dir):
- os.mkdir(args.output_dir)
- if not os.path.isdir(args.output_dir):
- logger.critical("The output-dir parameter must point to a directory")
- return 1
- with SessionGen() as session:
- q = session.query(Submission)\
- .join(Submission.task)\
- .join(Submission.files)\
- .join(Submission.results)\
- .join(SubmissionResult.dataset)\
- .join(Submission.participation)\
- .join(Participation.user)\
- .filter(Dataset.id == Task.active_dataset_id)\
- .filter(SubmissionResult.score >= args.min_score)\
- .with_entities(Submission.id, Submission.language,
- Submission.timestamp,
- SubmissionResult.score,
- File.filename, File.digest,
- User.id, User.username, User.first_name,
- User.last_name,
- Task.id, Task.name)
- if args.contest_id:
- q = q.filter(Participation.contest_id == args.contest_id)
- if args.task_id:
- q = q.filter(Submission.task_id == args.task_id)
- if args.user_id:
- q = q.filter(Participation.user_id == args.user_id)
- if args.submission_id:
- q = q.filter(Submission.id == args.submission_id)
- results = q.all()
- if args.unique or args.best:
- results = filter_top_scoring(results, args.unique)
- print("%s file(s) will be created." % len(results))
- if input("Continue? [Y/n] ").strip().lower() not in ["y", ""]:
- return 0
- done = 0
- for row in results:
- s_id, s_language, s_timestamp, sr_score, f_filename, f_digest, \
- u_id, u_name, u_fname, u_lname, t_id, t_name = row
- timef = s_timestamp.strftime('%Y%m%dT%H%M%S')
- ext = languagemanager.get_language(s_language).source_extension \
- if s_language else '.txt'
- filename_base, filename_ext = os.path.splitext(
- f_filename.replace('.%l', ext)
- )
- # "name" is a deprecated specifier with the same meaning as "file"
- filename = args.filename.format(id=s_id, file=filename_base,
- name=filename_base,
- ext=filename_ext,
- time=timef, user=u_name,
- task=t_name,
- score=sr_score)
- filename = os.path.join(args.output_dir, filename)
- if os.path.exists(filename):
- logger.warning("Skipping file '%s' because it already exists",
- filename)
- continue
- filedir = os.path.dirname(filename)
- if not os.path.exists(filedir):
- os.makedirs(filedir)
- if not os.path.isdir(filedir):
- logger.warning("%s is not a directory, skipped.", filedir)
- continue
- fso = FSObject.get_from_digest(f_digest, session)
- assert fso is not None
- with fso.get_lobject(mode="rb") as file_obj:
- data = file_obj.read()
- if args.utf8:
- try:
- data = utf8_decoder(data)
- except TypeError:
- logger.critical("Could not guess encoding of file "
- "'%s'. Aborting.",
- filename)
- sys.exit(1)
- if args.add_info:
- data = TEMPLATE[ext] % (
- u_name,
- u_fname,
- u_lname,
- t_name,
- sr_score,
- s_timestamp
- ) + data
- # Print utf8-encoded, possibly altered data
- with open(filename, "wt", encoding="utf-8") as f_out:
- f_out.write(data)
- else:
- # Print raw, untouched binary data
- with open(filename, "wb") as f_out:
- f_out.write(data)
- done += 1
- print(done, "/", len(results))
- return 0
- if __name__ == "__main__":
- sys.exit(main())
|