zipfly.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. # -*- coding: utf-8 -*-
  2. __version__ = '6.0.5'
  3. # v
  4. import io
  5. import stat
  6. import zipfile
  7. ZIP64_LIMIT = (1 << 31) + 1
  8. class LargePredictionSize(Exception):
  9. """
  10. Raised when Buffer is larger than ZIP64
  11. """
  12. class ZipflyStream(io.RawIOBase):
  13. """
  14. The RawIOBase ABC extends IOBase. It deals with
  15. the reading and writing of bytes to a stream. FileIO subclasses
  16. RawIOBase to provide an interface to files in the machine’s file system.
  17. """
  18. def __init__(self):
  19. self._buffer = b''
  20. self._size = 0
  21. def writable(self):
  22. return True
  23. def write(self, b):
  24. if self.closed:
  25. raise RuntimeError("ZipFly stream was closed!")
  26. self._buffer += b
  27. return len(b)
  28. def get(self):
  29. chunk = self._buffer
  30. self._buffer = b''
  31. self._size += len(chunk)
  32. return chunk
  33. def size(self):
  34. return self._size
  35. class ZipFly:
  36. def __init__(self,
  37. mode = 'w',
  38. paths = [],
  39. chunksize = 0x8000,
  40. compression = zipfile.ZIP_STORED,
  41. allowZip64 = True,
  42. compresslevel = None,
  43. storesize = 0,
  44. filesystem = 'fs',
  45. arcname = 'n',
  46. encode = 'utf-8',):
  47. """
  48. @param store size : int : size of all files
  49. in paths without compression
  50. """
  51. if mode not in ('w',):
  52. raise RuntimeError("ZipFly requires 'w' mode")
  53. if compression not in ( zipfile.ZIP_STORED,):
  54. raise RuntimeError("Not compression supported")
  55. if compresslevel not in (None, ):
  56. raise RuntimeError("Not compression level supported")
  57. if isinstance(chunksize, str):
  58. chunksize = int(chunksize, 16)
  59. self.comment = f'Written using Zipfly v{__version__}'
  60. self.mode = mode
  61. self.paths = paths
  62. self.filesystem = filesystem
  63. self.arcname = arcname
  64. self.compression = compression
  65. self.chunksize = chunksize
  66. self.allowZip64 = allowZip64
  67. self.compresslevel = compresslevel
  68. self.storesize = storesize
  69. self.encode = encode
  70. self.ezs = int('0x8e', 16) # empty zip size in bytes
  71. def set_comment(self, comment):
  72. if not isinstance(comment, bytes):
  73. comment = str.encode(comment)
  74. if len(comment) >= zipfile.ZIP_MAX_COMMENT:
  75. # trunk comment
  76. comment = comment[:zipfile.ZIP_MAX_COMMENT]
  77. self.comment = comment
  78. def reader(self, entry):
  79. def get_chunk():
  80. return entry.read( self.chunksize )
  81. return get_chunk()
  82. def buffer_size(self):
  83. '''
  84. FOR UNIT TESTING (not used)
  85. using to get the buffer size
  86. this size is different from the size of each file added
  87. '''
  88. for i in self.generator(): pass
  89. return self._buffer_size
  90. def buffer_prediction_size(self):
  91. if not self.allowZip64:
  92. raise RuntimeError("ZIP64 extensions required")
  93. # End of Central Directory Record
  94. EOCD = int('0x16', 16)
  95. FILE_OFFSET = int('0x5e', 16) * len(self.paths)
  96. tmp_comment = self.comment
  97. if isinstance(self.comment, bytes):
  98. tmp_comment = ( self.comment ).decode()
  99. size_comment = len(tmp_comment.encode( self.encode ))
  100. # path-name
  101. size_paths = 0
  102. #for path in self.paths:
  103. for idx in range(len(self.paths)):
  104. '''
  105. getting bytes from character in UTF-8 format
  106. example:
  107. '传' has 3 bytes in utf-8 format ( b'\xe4\xbc\xa0' )
  108. '''
  109. #path = paths[idx]
  110. name = self.arcname
  111. if not self.arcname in self.paths[idx]:
  112. name = self.filesystem
  113. tmp_name = self.paths[idx][name]
  114. if (tmp_name)[0] in ('/', ):
  115. # is dir then trunk
  116. tmp_name = (tmp_name)[ 1 : len( tmp_name ) ]
  117. size_paths += (len(tmp_name.encode( self.encode )) - int( '0x1', 16)) * int('0x2', 16)
  118. # zipsize
  119. zs = sum([EOCD,FILE_OFFSET,size_comment,size_paths,self.storesize,])
  120. if zs > ZIP64_LIMIT:
  121. raise LargePredictionSize(
  122. "Prediction size for zip file greater than 2 GB not supported"
  123. )
  124. return zs
  125. def generator(self):
  126. # stream
  127. stream = ZipflyStream()
  128. with zipfile.ZipFile(
  129. stream,
  130. mode = self.mode,
  131. compression = self.compression,
  132. allowZip64 = self.allowZip64,) as zf:
  133. for path in self.paths:
  134. if not self.filesystem in path:
  135. raise RuntimeError(f"'{self.filesystem}' key is required")
  136. """
  137. filesystem should be the path to a file or directory on the filesystem.
  138. arcname is the name which it will have within the archive (by default,
  139. this will be the same as filename
  140. """
  141. if not self.arcname in path:
  142. # arcname will be default path
  143. path[self.arcname] = path[self.filesystem]
  144. z_info = zipfile.ZipInfo.from_file(
  145. path[self.filesystem],
  146. path[self.arcname]
  147. )
  148. with open( path[self.filesystem], 'rb' ) as e:
  149. # Read from filesystem:
  150. with zf.open( z_info, mode=self.mode ) as d:
  151. for chunk in iter( lambda: e.read(self.chunksize), b'' ):
  152. d.write(chunk)
  153. yield stream.get()
  154. self.set_comment(self.comment)
  155. zf.comment = self.comment
  156. yield stream.get()
  157. self._buffer_size = stream.size()
  158. # Flush and close this stream.
  159. stream.close()
  160. def get_size(self):
  161. return self._buffer_size