CleanFiles.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. #!/usr/bin/env python3
  2. # Contest Management System - http://cms-dev.github.io/
  3. # Copyright © 2016 Luca Versari <veluca93@gmail.com>
  4. # Copyright © 2016 Stefano Maggiolo <s.maggiolo@gmail.com>
  5. # Copyright © 2018 Luca Wehrstedt <luca.wehrstedt@gmail.com>
  6. #
  7. # This program is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU Affero General Public License as
  9. # published by the Free Software Foundation, either version 3 of the
  10. # License, or (at your option) any later version.
  11. #
  12. # This program is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU Affero General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU Affero General Public License
  18. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. """This script scans the whole database for file objects references
  20. and removes unreferenced file objects from the file store. If required,
  21. it also replaces all the executable digests in the database with a
  22. tombstone digest, to make executables removable in the clean pass.
  23. """
  24. import argparse
  25. import logging
  26. import sys
  27. from cms.db import SessionGen, Digest, Executable, enumerate_files
  28. from cms.db.filecacher import FileCacher
  29. logger = logging.getLogger()
  30. def make_tombstone(session):
  31. count = 0
  32. for exe in session.query(Executable).all():
  33. if exe.digest != Digest.TOMBSTONE:
  34. count += 1
  35. exe.digest = Digest.TOMBSTONE
  36. logger.info("Replaced %d executables with the tombstone.", count)
  37. def clean_files(session, dry_run):
  38. filecacher = FileCacher()
  39. files = set(file[0] for file in filecacher.list())
  40. logger.info("A total number of %d files are present in the file store",
  41. len(files))
  42. found_digests = enumerate_files(session)
  43. logger.info("Found %d digests while scanning", len(found_digests))
  44. files -= found_digests
  45. logger.info("%d digests are orphan.", len(files))
  46. total_size = 0
  47. for orphan in files:
  48. total_size += filecacher.get_size(orphan)
  49. logger.info("Orphan files take %s bytes of disk space",
  50. "{:,}".format(total_size))
  51. if not dry_run:
  52. for count, orphan in enumerate(files):
  53. filecacher.delete(orphan)
  54. if count % 100 == 0:
  55. logger.info("%d files deleted from the file store", count)
  56. logger.info("All orphan files have been deleted")
  57. def main():
  58. parser = argparse.ArgumentParser(
  59. description="Remove unused file objects from the database. "
  60. "If -t is specified, also replace all executables with the tombstone")
  61. parser.add_argument("-t", "--tombstone", action="store_true")
  62. parser.add_argument("-n", "--dry-run", action="store_true")
  63. args = parser.parse_args()
  64. with SessionGen() as session:
  65. if args.tombstone:
  66. make_tombstone(session)
  67. clean_files(session, args.dry_run)
  68. if not args.dry_run:
  69. session.commit()
  70. return 0
  71. if __name__ == "__main__":
  72. sys.exit(main())