comparison env/lib/python3.9/site-packages/zipstream/__init__.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # -*- coding: utf-8 -*-
2 """
3 Iterable ZIP archive generator.
4
5 Derived directly from zipfile.py
6 """
7 from __future__ import unicode_literals, print_function, with_statement
8
9 __version__ = '1.1.4'
10
11 import os
12 import sys
13 import stat
14 import struct
15 import time
16 import zipfile
17
18 from .compat import (
19 str, bytes,
20 ZIP64_VERSION,
21 ZIP_BZIP2, BZIP2_VERSION,
22 ZIP_LZMA, LZMA_VERSION,
23 SEEK_SET, SEEK_CUR, SEEK_END)
24
25 from zipfile import (
26 ZIP_STORED, ZIP64_LIMIT, ZIP_FILECOUNT_LIMIT, ZIP_MAX_COMMENT,
27 ZIP_DEFLATED,
28 structCentralDir, structEndArchive64, structEndArchive, structEndArchive64Locator,
29 stringCentralDir, stringEndArchive64, stringEndArchive, stringEndArchive64Locator,
30 structFileHeader, stringFileHeader,
31 zlib, crc32)
32
33 stringDataDescriptor = b'PK\x07\x08' # magic number for data descriptor
34
35
36 def _get_compressor(compress_type):
37 if compress_type == ZIP_DEFLATED:
38 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
39 elif compress_type == ZIP_BZIP2:
40 from zipfile import bz2
41 return bz2.BZ2Compressor()
42 elif compress_type == ZIP_LZMA:
43 from zipfile import LZMACompressor
44 return LZMACompressor()
45 else:
46 return None
47
48
49 class PointerIO(object):
50 def __init__(self, mode='wb'):
51 if mode not in ('wb', ):
52 raise RuntimeError('zipstream.ZipFile() requires mode "wb"')
53 self.data_pointer = 0
54 self.__mode = mode
55 self.__closed = False
56
57 @property
58 def mode(self):
59 return self.__mode
60
61 @property
62 def closed(self):
63 return self.__closed
64
65 def close(self):
66 self.__closed = True
67
68 def flush(self):
69 pass
70
71 def next(self):
72 raise NotImplementedError()
73
74 # def seek(self, offset, whence=None):
75 # if whence == SEEK_SET:
76 # if offset < 0:
77 # raise ValueError('negative seek value -1')
78 # self.data_pointer = offset
79 # elif whence == SEEK_CUR:
80 # self.data_pointer = max(0, self.data_pointer + offset)
81 # elif whence == SEEK_END:
82 # self.data_pointer = max(0, offset)
83 # return self.data_pointer
84
85 def tell(self):
86 return self.data_pointer
87
88 def truncate(size=None):
89 raise NotImplementedError()
90
91 def write(self, data):
92 if self.closed:
93 raise ValueError('I/O operation on closed file')
94
95 if isinstance(data, str):
96 data = data.encode('utf-8')
97 if not isinstance(data, bytes):
98 raise TypeError('expected bytes')
99 self.data_pointer += len(data)
100 return data
101
102
103 class ZipInfo(zipfile.ZipInfo):
104 def __init__(self, *args, **kwargs):
105 zipfile.ZipInfo.__init__(self, *args, **kwargs)
106 self.flag_bits = 0x08 # ZIP flag bits, bit 3 indicates presence of data descriptor
107
108 def FileHeader(self, zip64=None):
109 """Return the per-file header as a string."""
110 dt = self.date_time
111 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
112 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
113 if self.flag_bits & 0x08:
114 # Set these to zero because we write them after the file data
115 CRC = compress_size = file_size = 0
116 else:
117 CRC = self.CRC
118 compress_size = self.compress_size
119 file_size = self.file_size
120
121 extra = self.extra
122
123 min_version = 0
124 if zip64 is None:
125 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
126 if zip64:
127 fmt = b'<HHQQ'
128 extra = extra + struct.pack(fmt,
129 1, struct.calcsize(fmt)-4, file_size, compress_size)
130 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
131 if not zip64:
132 raise LargeZipFile("Filesize would require ZIP64 extensions")
133 # File is larger than what fits into a 4 byte integer,
134 # fall back to the ZIP64 extension
135 file_size = 0xffffffff
136 compress_size = 0xffffffff
137 min_version = ZIP64_VERSION
138
139 if self.compress_type == ZIP_BZIP2:
140 min_version = max(BZIP2_VERSION, min_version)
141 elif self.compress_type == ZIP_LZMA:
142 min_version = max(LZMA_VERSION, min_version)
143
144 self.extract_version = max(min_version, self.extract_version)
145 self.create_version = max(min_version, self.create_version)
146 filename, flag_bits = self._encodeFilenameFlags()
147 header = struct.pack(structFileHeader, stringFileHeader,
148 self.extract_version, self.reserved, flag_bits,
149 self.compress_type, dostime, dosdate, CRC,
150 compress_size, file_size,
151 len(filename), len(extra))
152 return header + filename + extra
153
154 def DataDescriptor(self):
155 """
156 crc-32 4 bytes
157 compressed size 4 bytes
158 uncompressed size 4 bytes
159 """
160 if self.compress_size > ZIP64_LIMIT or self.file_size > ZIP64_LIMIT:
161 fmt = b'<4sLQQ'
162 else:
163 fmt = b'<4sLLL'
164 return struct.pack(fmt, stringDataDescriptor, self.CRC, self.compress_size, self.file_size)
165
166
167 class ZipFile(zipfile.ZipFile):
168 def __init__(self, fileobj=None, mode='w', compression=ZIP_STORED, allowZip64=False):
169 """Open the ZIP file with mode write "w"."""
170 if mode not in ('w', ):
171 raise RuntimeError('zipstream.ZipFile() requires mode "w"')
172 if fileobj is None:
173 fileobj = PointerIO()
174
175 self._comment = b''
176 zipfile.ZipFile.__init__(self, fileobj, mode=mode, compression=compression, allowZip64=allowZip64)
177 # TODO: Refractor to write queue with args + kwargs matching write()
178 self.paths_to_write = []
179
180 def __iter__(self):
181 for data in self.flush():
182 yield data
183 for data in self.__close():
184 yield data
185
186 def __enter__(self):
187 return self
188
189 def __exit__(self, type, value, traceback):
190 self.close()
191
192 def flush(self):
193 while self.paths_to_write:
194 kwargs = self.paths_to_write.pop(0)
195 for data in self.__write(**kwargs):
196 yield data
197
198 @property
199 def comment(self):
200 """The comment text associated with the ZIP file."""
201 return self._comment
202
203 @comment.setter
204 def comment(self, comment):
205 if not isinstance(comment, bytes):
206 raise TypeError("comment: expected bytes, got %s" % type(comment))
207 # check for valid comment length
208 if len(comment) >= ZIP_MAX_COMMENT:
209 if self.debug:
210 print('Archive comment is too long; truncating to %d bytes'
211 % ZIP_MAX_COMMENT)
212 comment = comment[:ZIP_MAX_COMMENT]
213 self._comment = comment
214 self._didModify = True
215
216 def write(self, filename, arcname=None, compress_type=None):
217 # TODO: Reflect python's Zipfile.write
218 # - if filename is file, write as file
219 # - if filename is directory, write an empty directory
220 kwargs = {'filename': filename, 'arcname': arcname, 'compress_type': compress_type}
221 self.paths_to_write.append(kwargs)
222
223 def write_iter(self, arcname, iterable, compress_type=None, buffer_size=None, date_time=None):
224 """Write the bytes iterable `iterable` to the archive under the name `arcname`."""
225 kwargs = {'arcname': arcname, 'iterable': iterable, 'compress_type': compress_type, 'buffer_size': buffer_size, 'date_time': date_time}
226 self.paths_to_write.append(kwargs)
227
228 def writestr(self, arcname, data, compress_type=None, buffer_size=None, date_time=None):
229 """
230 Writes a str into ZipFile by wrapping data as a generator
231 """
232 def _iterable():
233 yield data
234 return self.write_iter(arcname, _iterable(), compress_type=compress_type, buffer_size=buffer_size, date_time=date_time)
235
236 def __write(self, filename=None, iterable=None, arcname=None, compress_type=None, buffer_size=None, date_time=None):
237 """Put the bytes from filename into the archive under the name
238 `arcname`."""
239 if not self.fp:
240 raise RuntimeError(
241 "Attempt to write to ZIP archive that was already closed")
242 if (filename is None and iterable is None) or (filename is not None and iterable is not None):
243 raise ValueError("either (exclusively) filename or iterable shall be not None")
244
245 if filename:
246 st = os.stat(filename)
247 isdir = stat.S_ISDIR(st.st_mode)
248 mtime = time.localtime(st.st_mtime)
249 date_time = mtime[0:6]
250 else:
251 st, isdir = None, False
252 if date_time is not None and isinstance(date_time, time.struct_time):
253 date_time = date_time[0:6]
254 if date_time is None:
255 date_time = time.localtime()[0:6]
256 # Create ZipInfo instance to store file information
257 if arcname is None:
258 arcname = filename
259 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
260 while arcname[0] in (os.sep, os.altsep):
261 arcname = arcname[1:]
262 if isdir:
263 arcname += '/'
264 zinfo = ZipInfo(arcname, date_time)
265 if st:
266 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
267 else:
268 zinfo.external_attr = 0o600 << 16 # ?rw-------
269 if compress_type is None:
270 zinfo.compress_type = self.compression
271 else:
272 zinfo.compress_type = compress_type
273
274 if st:
275 zinfo.file_size = st[6]
276 else:
277 zinfo.file_size = buffer_size or 0
278 zinfo.flag_bits = 0x00
279 zinfo.flag_bits |= 0x08 # ZIP flag bits, bit 3 indicates presence of data descriptor
280 zinfo.header_offset = self.fp.tell() # Start of header bytes
281 if zinfo.compress_type == ZIP_LZMA:
282 # Compressed data includes an end-of-stream (EOS) marker
283 zinfo.flag_bits |= 0x02
284
285 self._writecheck(zinfo)
286 self._didModify = True
287
288 if isdir:
289 zinfo.file_size = 0
290 zinfo.compress_size = 0
291 zinfo.CRC = 0
292 self.filelist.append(zinfo)
293 self.NameToInfo[zinfo.filename] = zinfo
294 yield self.fp.write(zinfo.FileHeader(False))
295 return
296
297 cmpr = _get_compressor(zinfo.compress_type)
298
299 # Must overwrite CRC and sizes with correct data later
300 zinfo.CRC = CRC = 0
301 zinfo.compress_size = compress_size = 0
302 # Compressed size can be larger than uncompressed size
303 zip64 = self._allowZip64 and \
304 zinfo.file_size * 1.05 > ZIP64_LIMIT
305 yield self.fp.write(zinfo.FileHeader(zip64))
306 file_size = 0
307 if filename:
308 with open(filename, 'rb') as fp:
309 while 1:
310 buf = fp.read(1024 * 8)
311 if not buf:
312 break
313 file_size = file_size + len(buf)
314 CRC = crc32(buf, CRC) & 0xffffffff
315 if cmpr:
316 buf = cmpr.compress(buf)
317 compress_size = compress_size + len(buf)
318 yield self.fp.write(buf)
319 else: # we have an iterable
320 for buf in iterable:
321 file_size = file_size + len(buf)
322 CRC = crc32(buf, CRC) & 0xffffffff
323 if cmpr:
324 buf = cmpr.compress(buf)
325 compress_size = compress_size + len(buf)
326 yield self.fp.write(buf)
327 if cmpr:
328 buf = cmpr.flush()
329 compress_size = compress_size + len(buf)
330 yield self.fp.write(buf)
331 zinfo.compress_size = compress_size
332 else:
333 zinfo.compress_size = file_size
334 zinfo.CRC = CRC
335 zinfo.file_size = file_size
336 if not zip64 and self._allowZip64:
337 if file_size > ZIP64_LIMIT:
338 raise RuntimeError('File size has increased during compressing')
339 if compress_size > ZIP64_LIMIT:
340 raise RuntimeError('Compressed size larger than uncompressed size')
341
342 # Seek backwards and write file header (which will now include
343 # correct CRC and file sizes)
344 # position = self.fp.tell() # Preserve current position in file
345 # self.fp.seek(zinfo.header_offset, 0)
346 # self.fp.write(zinfo.FileHeader(zip64))
347 # self.fp.seek(position, 0)
348 yield self.fp.write(zinfo.DataDescriptor())
349 self.filelist.append(zinfo)
350 self.NameToInfo[zinfo.filename] = zinfo
351
352 def __close(self):
353 """Close the file, and for mode "w" write the ending
354 records."""
355 if self.fp is None:
356 return
357
358 try:
359 if self.mode in ('w', 'a') and self._didModify: # write ending records
360 count = 0
361 pos1 = self.fp.tell()
362 for zinfo in self.filelist: # write central directory
363 count = count + 1
364 dt = zinfo.date_time
365 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
366 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
367 extra = []
368 if zinfo.file_size > ZIP64_LIMIT \
369 or zinfo.compress_size > ZIP64_LIMIT:
370 extra.append(zinfo.file_size)
371 extra.append(zinfo.compress_size)
372 file_size = 0xffffffff
373 compress_size = 0xffffffff
374 else:
375 file_size = zinfo.file_size
376 compress_size = zinfo.compress_size
377
378 if zinfo.header_offset > ZIP64_LIMIT:
379 extra.append(zinfo.header_offset)
380 header_offset = 0xffffffff
381 else:
382 header_offset = zinfo.header_offset
383
384 extra_data = zinfo.extra
385 min_version = 0
386 if extra:
387 # Append a ZIP64 field to the extra's
388 extra_data = struct.pack(
389 b'<HH' + b'Q'*len(extra),
390 1, 8*len(extra), *extra) + extra_data
391 min_version = ZIP64_VERSION
392
393 if zinfo.compress_type == ZIP_BZIP2:
394 min_version = max(BZIP2_VERSION, min_version)
395 elif zinfo.compress_type == ZIP_LZMA:
396 min_version = max(LZMA_VERSION, min_version)
397
398 extract_version = max(min_version, zinfo.extract_version)
399 create_version = max(min_version, zinfo.create_version)
400 try:
401 filename, flag_bits = zinfo._encodeFilenameFlags()
402 centdir = struct.pack(structCentralDir,
403 stringCentralDir, create_version,
404 zinfo.create_system, extract_version, zinfo.reserved,
405 flag_bits, zinfo.compress_type, dostime, dosdate,
406 zinfo.CRC, compress_size, file_size,
407 len(filename), len(extra_data), len(zinfo.comment),
408 0, zinfo.internal_attr, zinfo.external_attr,
409 header_offset)
410 except DeprecationWarning:
411 print((structCentralDir, stringCentralDir, create_version,
412 zinfo.create_system, extract_version, zinfo.reserved,
413 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
414 zinfo.CRC, compress_size, file_size,
415 len(zinfo.filename), len(extra_data), len(zinfo.comment),
416 0, zinfo.internal_attr, zinfo.external_attr,
417 header_offset), file=sys.stderr)
418 raise
419 yield self.fp.write(centdir)
420 yield self.fp.write(filename)
421 yield self.fp.write(extra_data)
422 yield self.fp.write(zinfo.comment)
423
424 pos2 = self.fp.tell()
425 # Write end-of-zip-archive record
426 centDirCount = count
427 centDirSize = pos2 - pos1
428 centDirOffset = pos1
429 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
430 centDirOffset > ZIP64_LIMIT or
431 centDirSize > ZIP64_LIMIT):
432 # Need to write the ZIP64 end-of-archive records
433 zip64endrec = struct.pack(
434 structEndArchive64, stringEndArchive64,
435 44, 45, 45, 0, 0, centDirCount, centDirCount,
436 centDirSize, centDirOffset)
437 yield self.fp.write(zip64endrec)
438
439 zip64locrec = struct.pack(
440 structEndArchive64Locator,
441 stringEndArchive64Locator, 0, pos2, 1)
442 yield self.fp.write(zip64locrec)
443 centDirCount = min(centDirCount, 0xFFFF)
444 centDirSize = min(centDirSize, 0xFFFFFFFF)
445 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
446
447 endrec = struct.pack(structEndArchive, stringEndArchive,
448 0, 0, centDirCount, centDirCount,
449 centDirSize, centDirOffset, len(self._comment))
450 yield self.fp.write(endrec)
451 yield self.fp.write(self._comment)
452 self.fp.flush()
453 finally:
454 fp = self.fp
455 self.fp = None
456 if not self._filePassed:
457 fp.close()