Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/zipstream/__init__.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """ | |
3 Iterable ZIP archive generator. | |
4 | |
5 Derived directly from zipfile.py | |
6 """ | |
7 from __future__ import unicode_literals, print_function, with_statement | |
8 | |
9 __version__ = '1.1.4' | |
10 | |
11 import os | |
12 import sys | |
13 import stat | |
14 import struct | |
15 import time | |
16 import zipfile | |
17 | |
18 from .compat import ( | |
19 str, bytes, | |
20 ZIP64_VERSION, | |
21 ZIP_BZIP2, BZIP2_VERSION, | |
22 ZIP_LZMA, LZMA_VERSION, | |
23 SEEK_SET, SEEK_CUR, SEEK_END) | |
24 | |
25 from zipfile import ( | |
26 ZIP_STORED, ZIP64_LIMIT, ZIP_FILECOUNT_LIMIT, ZIP_MAX_COMMENT, | |
27 ZIP_DEFLATED, | |
28 structCentralDir, structEndArchive64, structEndArchive, structEndArchive64Locator, | |
29 stringCentralDir, stringEndArchive64, stringEndArchive, stringEndArchive64Locator, | |
30 structFileHeader, stringFileHeader, | |
31 zlib, crc32) | |
32 | |
33 stringDataDescriptor = b'PK\x07\x08' # magic number for data descriptor | |
34 | |
35 | |
36 def _get_compressor(compress_type): | |
37 if compress_type == ZIP_DEFLATED: | |
38 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) | |
39 elif compress_type == ZIP_BZIP2: | |
40 from zipfile import bz2 | |
41 return bz2.BZ2Compressor() | |
42 elif compress_type == ZIP_LZMA: | |
43 from zipfile import LZMACompressor | |
44 return LZMACompressor() | |
45 else: | |
46 return None | |
47 | |
48 | |
49 class PointerIO(object): | |
50 def __init__(self, mode='wb'): | |
51 if mode not in ('wb', ): | |
52 raise RuntimeError('zipstream.ZipFile() requires mode "wb"') | |
53 self.data_pointer = 0 | |
54 self.__mode = mode | |
55 self.__closed = False | |
56 | |
57 @property | |
58 def mode(self): | |
59 return self.__mode | |
60 | |
61 @property | |
62 def closed(self): | |
63 return self.__closed | |
64 | |
65 def close(self): | |
66 self.__closed = True | |
67 | |
68 def flush(self): | |
69 pass | |
70 | |
71 def next(self): | |
72 raise NotImplementedError() | |
73 | |
74 # def seek(self, offset, whence=None): | |
75 # if whence == SEEK_SET: | |
76 # if offset < 0: | |
77 # raise ValueError('negative seek value -1') | |
78 # self.data_pointer = offset | |
79 # elif whence == SEEK_CUR: | |
80 # self.data_pointer = max(0, self.data_pointer + offset) | |
81 # elif whence == SEEK_END: | |
82 # self.data_pointer = max(0, offset) | |
83 # return self.data_pointer | |
84 | |
85 def tell(self): | |
86 return self.data_pointer | |
87 | |
88 def truncate(size=None): | |
89 raise NotImplementedError() | |
90 | |
91 def write(self, data): | |
92 if self.closed: | |
93 raise ValueError('I/O operation on closed file') | |
94 | |
95 if isinstance(data, str): | |
96 data = data.encode('utf-8') | |
97 if not isinstance(data, bytes): | |
98 raise TypeError('expected bytes') | |
99 self.data_pointer += len(data) | |
100 return data | |
101 | |
102 | |
103 class ZipInfo(zipfile.ZipInfo): | |
104 def __init__(self, *args, **kwargs): | |
105 zipfile.ZipInfo.__init__(self, *args, **kwargs) | |
106 self.flag_bits = 0x08 # ZIP flag bits, bit 3 indicates presence of data descriptor | |
107 | |
108 def FileHeader(self, zip64=None): | |
109 """Return the per-file header as a string.""" | |
110 dt = self.date_time | |
111 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] | |
112 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) | |
113 if self.flag_bits & 0x08: | |
114 # Set these to zero because we write them after the file data | |
115 CRC = compress_size = file_size = 0 | |
116 else: | |
117 CRC = self.CRC | |
118 compress_size = self.compress_size | |
119 file_size = self.file_size | |
120 | |
121 extra = self.extra | |
122 | |
123 min_version = 0 | |
124 if zip64 is None: | |
125 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT | |
126 if zip64: | |
127 fmt = b'<HHQQ' | |
128 extra = extra + struct.pack(fmt, | |
129 1, struct.calcsize(fmt)-4, file_size, compress_size) | |
130 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: | |
131 if not zip64: | |
132 raise LargeZipFile("Filesize would require ZIP64 extensions") | |
133 # File is larger than what fits into a 4 byte integer, | |
134 # fall back to the ZIP64 extension | |
135 file_size = 0xffffffff | |
136 compress_size = 0xffffffff | |
137 min_version = ZIP64_VERSION | |
138 | |
139 if self.compress_type == ZIP_BZIP2: | |
140 min_version = max(BZIP2_VERSION, min_version) | |
141 elif self.compress_type == ZIP_LZMA: | |
142 min_version = max(LZMA_VERSION, min_version) | |
143 | |
144 self.extract_version = max(min_version, self.extract_version) | |
145 self.create_version = max(min_version, self.create_version) | |
146 filename, flag_bits = self._encodeFilenameFlags() | |
147 header = struct.pack(structFileHeader, stringFileHeader, | |
148 self.extract_version, self.reserved, flag_bits, | |
149 self.compress_type, dostime, dosdate, CRC, | |
150 compress_size, file_size, | |
151 len(filename), len(extra)) | |
152 return header + filename + extra | |
153 | |
154 def DataDescriptor(self): | |
155 """ | |
156 crc-32 4 bytes | |
157 compressed size 4 bytes | |
158 uncompressed size 4 bytes | |
159 """ | |
160 if self.compress_size > ZIP64_LIMIT or self.file_size > ZIP64_LIMIT: | |
161 fmt = b'<4sLQQ' | |
162 else: | |
163 fmt = b'<4sLLL' | |
164 return struct.pack(fmt, stringDataDescriptor, self.CRC, self.compress_size, self.file_size) | |
165 | |
166 | |
167 class ZipFile(zipfile.ZipFile): | |
168 def __init__(self, fileobj=None, mode='w', compression=ZIP_STORED, allowZip64=False): | |
169 """Open the ZIP file with mode write "w".""" | |
170 if mode not in ('w', ): | |
171 raise RuntimeError('zipstream.ZipFile() requires mode "w"') | |
172 if fileobj is None: | |
173 fileobj = PointerIO() | |
174 | |
175 self._comment = b'' | |
176 zipfile.ZipFile.__init__(self, fileobj, mode=mode, compression=compression, allowZip64=allowZip64) | |
177 # TODO: Refractor to write queue with args + kwargs matching write() | |
178 self.paths_to_write = [] | |
179 | |
180 def __iter__(self): | |
181 for data in self.flush(): | |
182 yield data | |
183 for data in self.__close(): | |
184 yield data | |
185 | |
186 def __enter__(self): | |
187 return self | |
188 | |
189 def __exit__(self, type, value, traceback): | |
190 self.close() | |
191 | |
192 def flush(self): | |
193 while self.paths_to_write: | |
194 kwargs = self.paths_to_write.pop(0) | |
195 for data in self.__write(**kwargs): | |
196 yield data | |
197 | |
198 @property | |
199 def comment(self): | |
200 """The comment text associated with the ZIP file.""" | |
201 return self._comment | |
202 | |
203 @comment.setter | |
204 def comment(self, comment): | |
205 if not isinstance(comment, bytes): | |
206 raise TypeError("comment: expected bytes, got %s" % type(comment)) | |
207 # check for valid comment length | |
208 if len(comment) >= ZIP_MAX_COMMENT: | |
209 if self.debug: | |
210 print('Archive comment is too long; truncating to %d bytes' | |
211 % ZIP_MAX_COMMENT) | |
212 comment = comment[:ZIP_MAX_COMMENT] | |
213 self._comment = comment | |
214 self._didModify = True | |
215 | |
216 def write(self, filename, arcname=None, compress_type=None): | |
217 # TODO: Reflect python's Zipfile.write | |
218 # - if filename is file, write as file | |
219 # - if filename is directory, write an empty directory | |
220 kwargs = {'filename': filename, 'arcname': arcname, 'compress_type': compress_type} | |
221 self.paths_to_write.append(kwargs) | |
222 | |
223 def write_iter(self, arcname, iterable, compress_type=None, buffer_size=None, date_time=None): | |
224 """Write the bytes iterable `iterable` to the archive under the name `arcname`.""" | |
225 kwargs = {'arcname': arcname, 'iterable': iterable, 'compress_type': compress_type, 'buffer_size': buffer_size, 'date_time': date_time} | |
226 self.paths_to_write.append(kwargs) | |
227 | |
228 def writestr(self, arcname, data, compress_type=None, buffer_size=None, date_time=None): | |
229 """ | |
230 Writes a str into ZipFile by wrapping data as a generator | |
231 """ | |
232 def _iterable(): | |
233 yield data | |
234 return self.write_iter(arcname, _iterable(), compress_type=compress_type, buffer_size=buffer_size, date_time=date_time) | |
235 | |
236 def __write(self, filename=None, iterable=None, arcname=None, compress_type=None, buffer_size=None, date_time=None): | |
237 """Put the bytes from filename into the archive under the name | |
238 `arcname`.""" | |
239 if not self.fp: | |
240 raise RuntimeError( | |
241 "Attempt to write to ZIP archive that was already closed") | |
242 if (filename is None and iterable is None) or (filename is not None and iterable is not None): | |
243 raise ValueError("either (exclusively) filename or iterable shall be not None") | |
244 | |
245 if filename: | |
246 st = os.stat(filename) | |
247 isdir = stat.S_ISDIR(st.st_mode) | |
248 mtime = time.localtime(st.st_mtime) | |
249 date_time = mtime[0:6] | |
250 else: | |
251 st, isdir = None, False | |
252 if date_time is not None and isinstance(date_time, time.struct_time): | |
253 date_time = date_time[0:6] | |
254 if date_time is None: | |
255 date_time = time.localtime()[0:6] | |
256 # Create ZipInfo instance to store file information | |
257 if arcname is None: | |
258 arcname = filename | |
259 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) | |
260 while arcname[0] in (os.sep, os.altsep): | |
261 arcname = arcname[1:] | |
262 if isdir: | |
263 arcname += '/' | |
264 zinfo = ZipInfo(arcname, date_time) | |
265 if st: | |
266 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes | |
267 else: | |
268 zinfo.external_attr = 0o600 << 16 # ?rw------- | |
269 if compress_type is None: | |
270 zinfo.compress_type = self.compression | |
271 else: | |
272 zinfo.compress_type = compress_type | |
273 | |
274 if st: | |
275 zinfo.file_size = st[6] | |
276 else: | |
277 zinfo.file_size = buffer_size or 0 | |
278 zinfo.flag_bits = 0x00 | |
279 zinfo.flag_bits |= 0x08 # ZIP flag bits, bit 3 indicates presence of data descriptor | |
280 zinfo.header_offset = self.fp.tell() # Start of header bytes | |
281 if zinfo.compress_type == ZIP_LZMA: | |
282 # Compressed data includes an end-of-stream (EOS) marker | |
283 zinfo.flag_bits |= 0x02 | |
284 | |
285 self._writecheck(zinfo) | |
286 self._didModify = True | |
287 | |
288 if isdir: | |
289 zinfo.file_size = 0 | |
290 zinfo.compress_size = 0 | |
291 zinfo.CRC = 0 | |
292 self.filelist.append(zinfo) | |
293 self.NameToInfo[zinfo.filename] = zinfo | |
294 yield self.fp.write(zinfo.FileHeader(False)) | |
295 return | |
296 | |
297 cmpr = _get_compressor(zinfo.compress_type) | |
298 | |
299 # Must overwrite CRC and sizes with correct data later | |
300 zinfo.CRC = CRC = 0 | |
301 zinfo.compress_size = compress_size = 0 | |
302 # Compressed size can be larger than uncompressed size | |
303 zip64 = self._allowZip64 and \ | |
304 zinfo.file_size * 1.05 > ZIP64_LIMIT | |
305 yield self.fp.write(zinfo.FileHeader(zip64)) | |
306 file_size = 0 | |
307 if filename: | |
308 with open(filename, 'rb') as fp: | |
309 while 1: | |
310 buf = fp.read(1024 * 8) | |
311 if not buf: | |
312 break | |
313 file_size = file_size + len(buf) | |
314 CRC = crc32(buf, CRC) & 0xffffffff | |
315 if cmpr: | |
316 buf = cmpr.compress(buf) | |
317 compress_size = compress_size + len(buf) | |
318 yield self.fp.write(buf) | |
319 else: # we have an iterable | |
320 for buf in iterable: | |
321 file_size = file_size + len(buf) | |
322 CRC = crc32(buf, CRC) & 0xffffffff | |
323 if cmpr: | |
324 buf = cmpr.compress(buf) | |
325 compress_size = compress_size + len(buf) | |
326 yield self.fp.write(buf) | |
327 if cmpr: | |
328 buf = cmpr.flush() | |
329 compress_size = compress_size + len(buf) | |
330 yield self.fp.write(buf) | |
331 zinfo.compress_size = compress_size | |
332 else: | |
333 zinfo.compress_size = file_size | |
334 zinfo.CRC = CRC | |
335 zinfo.file_size = file_size | |
336 if not zip64 and self._allowZip64: | |
337 if file_size > ZIP64_LIMIT: | |
338 raise RuntimeError('File size has increased during compressing') | |
339 if compress_size > ZIP64_LIMIT: | |
340 raise RuntimeError('Compressed size larger than uncompressed size') | |
341 | |
342 # Seek backwards and write file header (which will now include | |
343 # correct CRC and file sizes) | |
344 # position = self.fp.tell() # Preserve current position in file | |
345 # self.fp.seek(zinfo.header_offset, 0) | |
346 # self.fp.write(zinfo.FileHeader(zip64)) | |
347 # self.fp.seek(position, 0) | |
348 yield self.fp.write(zinfo.DataDescriptor()) | |
349 self.filelist.append(zinfo) | |
350 self.NameToInfo[zinfo.filename] = zinfo | |
351 | |
352 def __close(self): | |
353 """Close the file, and for mode "w" write the ending | |
354 records.""" | |
355 if self.fp is None: | |
356 return | |
357 | |
358 try: | |
359 if self.mode in ('w', 'a') and self._didModify: # write ending records | |
360 count = 0 | |
361 pos1 = self.fp.tell() | |
362 for zinfo in self.filelist: # write central directory | |
363 count = count + 1 | |
364 dt = zinfo.date_time | |
365 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] | |
366 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) | |
367 extra = [] | |
368 if zinfo.file_size > ZIP64_LIMIT \ | |
369 or zinfo.compress_size > ZIP64_LIMIT: | |
370 extra.append(zinfo.file_size) | |
371 extra.append(zinfo.compress_size) | |
372 file_size = 0xffffffff | |
373 compress_size = 0xffffffff | |
374 else: | |
375 file_size = zinfo.file_size | |
376 compress_size = zinfo.compress_size | |
377 | |
378 if zinfo.header_offset > ZIP64_LIMIT: | |
379 extra.append(zinfo.header_offset) | |
380 header_offset = 0xffffffff | |
381 else: | |
382 header_offset = zinfo.header_offset | |
383 | |
384 extra_data = zinfo.extra | |
385 min_version = 0 | |
386 if extra: | |
387 # Append a ZIP64 field to the extra's | |
388 extra_data = struct.pack( | |
389 b'<HH' + b'Q'*len(extra), | |
390 1, 8*len(extra), *extra) + extra_data | |
391 min_version = ZIP64_VERSION | |
392 | |
393 if zinfo.compress_type == ZIP_BZIP2: | |
394 min_version = max(BZIP2_VERSION, min_version) | |
395 elif zinfo.compress_type == ZIP_LZMA: | |
396 min_version = max(LZMA_VERSION, min_version) | |
397 | |
398 extract_version = max(min_version, zinfo.extract_version) | |
399 create_version = max(min_version, zinfo.create_version) | |
400 try: | |
401 filename, flag_bits = zinfo._encodeFilenameFlags() | |
402 centdir = struct.pack(structCentralDir, | |
403 stringCentralDir, create_version, | |
404 zinfo.create_system, extract_version, zinfo.reserved, | |
405 flag_bits, zinfo.compress_type, dostime, dosdate, | |
406 zinfo.CRC, compress_size, file_size, | |
407 len(filename), len(extra_data), len(zinfo.comment), | |
408 0, zinfo.internal_attr, zinfo.external_attr, | |
409 header_offset) | |
410 except DeprecationWarning: | |
411 print((structCentralDir, stringCentralDir, create_version, | |
412 zinfo.create_system, extract_version, zinfo.reserved, | |
413 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, | |
414 zinfo.CRC, compress_size, file_size, | |
415 len(zinfo.filename), len(extra_data), len(zinfo.comment), | |
416 0, zinfo.internal_attr, zinfo.external_attr, | |
417 header_offset), file=sys.stderr) | |
418 raise | |
419 yield self.fp.write(centdir) | |
420 yield self.fp.write(filename) | |
421 yield self.fp.write(extra_data) | |
422 yield self.fp.write(zinfo.comment) | |
423 | |
424 pos2 = self.fp.tell() | |
425 # Write end-of-zip-archive record | |
426 centDirCount = count | |
427 centDirSize = pos2 - pos1 | |
428 centDirOffset = pos1 | |
429 if (centDirCount >= ZIP_FILECOUNT_LIMIT or | |
430 centDirOffset > ZIP64_LIMIT or | |
431 centDirSize > ZIP64_LIMIT): | |
432 # Need to write the ZIP64 end-of-archive records | |
433 zip64endrec = struct.pack( | |
434 structEndArchive64, stringEndArchive64, | |
435 44, 45, 45, 0, 0, centDirCount, centDirCount, | |
436 centDirSize, centDirOffset) | |
437 yield self.fp.write(zip64endrec) | |
438 | |
439 zip64locrec = struct.pack( | |
440 structEndArchive64Locator, | |
441 stringEndArchive64Locator, 0, pos2, 1) | |
442 yield self.fp.write(zip64locrec) | |
443 centDirCount = min(centDirCount, 0xFFFF) | |
444 centDirSize = min(centDirSize, 0xFFFFFFFF) | |
445 centDirOffset = min(centDirOffset, 0xFFFFFFFF) | |
446 | |
447 endrec = struct.pack(structEndArchive, stringEndArchive, | |
448 0, 0, centDirCount, centDirCount, | |
449 centDirSize, centDirOffset, len(self._comment)) | |
450 yield self.fp.write(endrec) | |
451 yield self.fp.write(self._comment) | |
452 self.fp.flush() | |
453 finally: | |
454 fp = self.fp | |
455 self.fp = None | |
456 if not self._filePassed: | |
457 fp.close() |