archive.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. #!/usr/bin/env python3
  2. # Contest Management System - http://cms-dev.github.io/
  3. # Copyright © 2014-2015 William Di Luigi <williamdiluigi@gmail.com>
  4. # Copyright © 2014 Stefano Maggiolo <s.maggiolo@gmail.com>
  5. #
  6. # This program is free software: you can redistribute it and/or modify
  7. # it under the terms of the GNU Affero General Public License as
  8. # published by the Free Software Foundation, either version 3 of the
  9. # License, or (at your option) any later version.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU Affero General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU Affero General Public License
  17. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. """Abstraction layer for reading from and writing to archives.
  19. """
  20. import os
  21. import shutil
  22. import tempfile
  23. import patoolib
  24. from patoolib.util import PatoolError
  25. from cms import config
  26. class ArchiveException(Exception):
  27. """Exception for when the interaction with the Archive class is
  28. incorrect.
  29. """
  30. pass
  31. class Archive:
  32. """Class to manage archives.
  33. This class has static methods to test, extract, and create
  34. archives. Moreover, an instance of this class can be create to
  35. manage an existing archive. At the moment, all operations depend
  36. on calling first the unpack method, that extract the archive in a
  37. temporary directory.
  38. """
  39. @staticmethod
  40. def is_supported(path):
  41. """Return whether the file at path is supported by patoolib.
  42. path (string): the path to test.
  43. return (bool): whether path is supported.
  44. """
  45. try:
  46. patoolib.test_archive(path, interactive=False)
  47. return True
  48. except PatoolError:
  49. return False
  50. @staticmethod
  51. def create_from_dir(from_dir, archive_path):
  52. """Create a new archive containing all files in from_dir.
  53. from_dir (string): directory with the files to archive.
  54. archive_path (string): the new archive's path.
  55. """
  56. files = tuple(os.listdir(from_dir))
  57. cwd = os.getcwd()
  58. os.chdir(from_dir)
  59. patoolib.create_archive(archive_path, files, interactive=False)
  60. os.chdir(cwd)
  61. @staticmethod
  62. def extract_to_dir(archive_path, to_dir):
  63. """Extract the content of an archive in to_dir.
  64. archive_path (string): path of the archive to extract.
  65. to_dir (string): destination directory.
  66. """
  67. patoolib.extract_archive(archive_path, outdir=to_dir, interactive=False)
  68. @staticmethod
  69. def from_raw_data(raw_data):
  70. """Create an Archive object out of raw archive data.
  71. This method treats the given string as archive data: it dumps it
  72. into a temporary file, then creates an Archive object. Since the
  73. user did not provide a path, we assume that when cleanup() is
  74. called the temporary file should be deleted as well as unpacked
  75. data.
  76. raw_data (bytes): the actual bytes that form the archive.
  77. return (Archive|None): an object that represents the new
  78. archive or None, if raw_data doesn't represent an archive.
  79. """
  80. temp_file, temp_filename = tempfile.mkstemp(dir=config.temp_dir)
  81. with open(temp_file, "wb") as temp_file:
  82. temp_file.write(raw_data)
  83. try:
  84. return Archive(temp_filename, delete_source=True)
  85. except ArchiveException:
  86. os.remove(temp_filename)
  87. return None
  88. def __init__(self, path, delete_source=False):
  89. """Init.
  90. path (string): the path of the archive.
  91. delete_source (bool): whether the source archive should be
  92. deleted at cleanup or not.
  93. """
  94. if not Archive.is_supported(path):
  95. raise ArchiveException("This type of archive is not supported.")
  96. self.delete_source = delete_source
  97. self.path = path
  98. self.temp_dir = None
  99. def unpack(self):
  100. """Extract archive's content to a temporary directory.
  101. return (string): the path of the temporary directory.
  102. """
  103. self.temp_dir = tempfile.mkdtemp(dir=config.temp_dir)
  104. patoolib.extract_archive(self.path, outdir=self.temp_dir,
  105. interactive=False)
  106. return self.temp_dir
  107. def repack(self, target):
  108. """Repack to a new archive all the files which were unpacked in
  109. self.temp_dir.
  110. target (string): the new archive path.
  111. """
  112. if self.temp_dir is None:
  113. raise ArchiveException("The unpack() method must be called first.")
  114. Archive.create_from_dir(self.temp_dir, target)
  115. def cleanup(self):
  116. """Remove temporary directory, if needed.
  117. """
  118. if self.temp_dir is not None and os.path.exists(self.temp_dir):
  119. shutil.rmtree(self.temp_dir)
  120. self.temp_dir = None
  121. if self.delete_source:
  122. try:
  123. os.remove(self.path)
  124. except OSError:
  125. # Cannot delete source, it is not a big problem.
  126. pass
  127. def namelist(self):
  128. """Returns all pathnames for this archive.
  129. return ([string]): list of files in the archive.
  130. raise (NotImplementedError): when the archive was unpacked
  131. first.
  132. """
  133. if self.temp_dir is None:
  134. # Unfortunately, this "prints" names to the screen, so it's
  135. # not very handy.
  136. # patoolib.list_archive(self.path)
  137. raise NotImplementedError("Cannot list before unpacking.")
  138. else:
  139. names = []
  140. for path, _, filenames in os.walk(self.temp_dir):
  141. for filename in filenames:
  142. names.append(os.path.relpath(os.path.join(path, filename),
  143. self.temp_dir))
  144. return names
  145. def read(self, file_path):
  146. """Read a single file and return its file object.
  147. file_path (string): path of the file in the archive.
  148. return (file): handler for the file.
  149. raise (NotImplementedError): when the archive was unpacked
  150. first.
  151. """
  152. if self.temp_dir is None:
  153. # Unfortunately, patoolib does not expose an API to do this.
  154. raise NotImplementedError("Cannot read before unpacking.")
  155. else:
  156. return open(os.path.join(self.temp_dir, file_path), "rb")
  157. def write(self, file_path, file_object):
  158. """Writes a file in the archive in place.
  159. file_path (string): new path in the archive.
  160. file_object (object): file-like object.
  161. raise (NotImplementedError): always; this method is not yet
  162. implemented.
  163. """
  164. if self.temp_dir is None:
  165. # Unfortunately, patoolib does not expose an API to do this.
  166. raise NotImplementedError("Cannot write before unpacking.")
  167. else:
  168. raise NotImplementedError(
  169. "You should write the file directly, in the "
  170. "folder returned by unpack(), and then "
  171. "call the repack() method.")