comparison env/lib/python3.9/site-packages/boltons/fileutils.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # -*- coding: utf-8 -*-
2 """Virtually every Python programmer has used Python for wrangling
3 disk contents, and ``fileutils`` collects solutions to some of the
4 most commonly-found gaps in the standard library.
5 """
6
7 from __future__ import print_function
8
9 import os
10 import re
11 import sys
12 import stat
13 import errno
14 import fnmatch
15 from shutil import copy2, copystat, Error
16
17
18 __all__ = ['mkdir_p', 'atomic_save', 'AtomicSaver', 'FilePerms',
19 'iter_find_files', 'copytree']
20
21
22 FULL_PERMS = 511 # 0777 that both Python 2 and 3 can digest
23 RW_PERMS = 438
24 _SINGLE_FULL_PERM = 7 # or 07 in Python 2
25 try:
26 basestring
27 except NameError:
28 unicode = str # Python 3 compat
29 basestring = (str, bytes)
30
31
32 def mkdir_p(path):
33 """Creates a directory and any parent directories that may need to
34 be created along the way, without raising errors for any existing
35 directories. This function mimics the behavior of the ``mkdir -p``
36 command available in Linux/BSD environments, but also works on
37 Windows.
38 """
39 try:
40 os.makedirs(path)
41 except OSError as exc:
42 if exc.errno == errno.EEXIST and os.path.isdir(path):
43 return
44 raise
45 return
46
47
48 class FilePerms(object):
49 """The :class:`FilePerms` type is used to represent standard POSIX
50 filesystem permissions:
51
52 * Read
53 * Write
54 * Execute
55
56 Across three classes of user:
57
58 * Owning (u)ser
59 * Owner's (g)roup
60 * Any (o)ther user
61
62 This class assists with computing new permissions, as well as
63 working with numeric octal ``777``-style and ``rwx``-style
64 permissions. Currently it only considers the bottom 9 permission
65 bits; it does not support sticky bits or more advanced permission
66 systems.
67
68 Args:
69 user (str): A string in the 'rwx' format, omitting characters
70 for which owning user's permissions are not provided.
71 group (str): A string in the 'rwx' format, omitting characters
72 for which owning group permissions are not provided.
73 other (str): A string in the 'rwx' format, omitting characters
74 for which owning other/world permissions are not provided.
75
76 There are many ways to use :class:`FilePerms`:
77
78 >>> FilePerms(user='rwx', group='xrw', other='wxr') # note character order
79 FilePerms(user='rwx', group='rwx', other='rwx')
80 >>> int(FilePerms('r', 'r', ''))
81 288
82 >>> oct(288)[-3:] # XXX Py3k
83 '440'
84
85 See also the :meth:`FilePerms.from_int` and
86 :meth:`FilePerms.from_path` classmethods for useful alternative
87 ways to construct :class:`FilePerms` objects.
88 """
89 # TODO: consider more than the lower 9 bits
90 class _FilePermProperty(object):
91 _perm_chars = 'rwx'
92 _perm_set = frozenset('rwx')
93 _perm_val = {'r': 4, 'w': 2, 'x': 1} # for sorting
94
95 def __init__(self, attribute, offset):
96 self.attribute = attribute
97 self.offset = offset
98
99 def __get__(self, fp_obj, type_=None):
100 if fp_obj is None:
101 return self
102 return getattr(fp_obj, self.attribute)
103
104 def __set__(self, fp_obj, value):
105 cur = getattr(fp_obj, self.attribute)
106 if cur == value:
107 return
108 try:
109 invalid_chars = set(str(value)) - self._perm_set
110 except TypeError:
111 raise TypeError('expected string, not %r' % value)
112 if invalid_chars:
113 raise ValueError('got invalid chars %r in permission'
114 ' specification %r, expected empty string'
115 ' or one or more of %r'
116 % (invalid_chars, value, self._perm_chars))
117
118 sort_key = lambda c: self._perm_val[c]
119 new_value = ''.join(sorted(set(value),
120 key=sort_key, reverse=True))
121 setattr(fp_obj, self.attribute, new_value)
122 self._update_integer(fp_obj, new_value)
123
124 def _update_integer(self, fp_obj, value):
125 mode = 0
126 key = 'xwr'
127 for symbol in value:
128 bit = 2 ** key.index(symbol)
129 mode |= (bit << (self.offset * 3))
130 fp_obj._integer |= mode
131
132 def __init__(self, user='', group='', other=''):
133 self._user, self._group, self._other = '', '', ''
134 self._integer = 0
135 self.user = user
136 self.group = group
137 self.other = other
138
139 @classmethod
140 def from_int(cls, i):
141 """Create a :class:`FilePerms` object from an integer.
142
143 >>> FilePerms.from_int(0o644) # note the leading zero-oh for octal
144 FilePerms(user='rw', group='r', other='r')
145 """
146 i &= FULL_PERMS
147 key = ('', 'x', 'w', 'xw', 'r', 'rx', 'rw', 'rwx')
148 parts = []
149 while i:
150 parts.append(key[i & _SINGLE_FULL_PERM])
151 i >>= 3
152 parts.reverse()
153 return cls(*parts)
154
155 @classmethod
156 def from_path(cls, path):
157 """Make a new :class:`FilePerms` object based on the permissions
158 assigned to the file or directory at *path*.
159
160 Args:
161 path (str): Filesystem path of the target file.
162
163 Here's an example that holds true on most systems:
164
165 >>> import tempfile
166 >>> 'r' in FilePerms.from_path(tempfile.gettempdir()).user
167 True
168 """
169 stat_res = os.stat(path)
170 return cls.from_int(stat.S_IMODE(stat_res.st_mode))
171
172 def __int__(self):
173 return self._integer
174
175 # Sphinx tip: attribute docstrings come after the attribute
176 user = _FilePermProperty('_user', 2)
177 "Stores the ``rwx``-formatted *user* permission."
178 group = _FilePermProperty('_group', 1)
179 "Stores the ``rwx``-formatted *group* permission."
180 other = _FilePermProperty('_other', 0)
181 "Stores the ``rwx``-formatted *other* permission."
182
183 def __repr__(self):
184 cn = self.__class__.__name__
185 return ('%s(user=%r, group=%r, other=%r)'
186 % (cn, self.user, self.group, self.other))
187
188 ####
189
190
191 _TEXT_OPENFLAGS = os.O_RDWR | os.O_CREAT | os.O_EXCL
192 if hasattr(os, 'O_NOINHERIT'):
193 _TEXT_OPENFLAGS |= os.O_NOINHERIT
194 if hasattr(os, 'O_NOFOLLOW'):
195 _TEXT_OPENFLAGS |= os.O_NOFOLLOW
196 _BIN_OPENFLAGS = _TEXT_OPENFLAGS
197 if hasattr(os, 'O_BINARY'):
198 _BIN_OPENFLAGS |= os.O_BINARY
199
200
201 try:
202 import fcntl as fcntl
203 except ImportError:
204 def set_cloexec(fd):
205 "Dummy set_cloexec for platforms without fcntl support"
206 pass
207 else:
208 def set_cloexec(fd):
209 """Does a best-effort :func:`fcntl.fcntl` call to set a fd to be
210 automatically closed by any future child processes.
211
212 Implementation from the :mod:`tempfile` module.
213 """
214 try:
215 flags = fcntl.fcntl(fd, fcntl.F_GETFD, 0)
216 except IOError:
217 pass
218 else:
219 # flags read successfully, modify
220 flags |= fcntl.FD_CLOEXEC
221 fcntl.fcntl(fd, fcntl.F_SETFD, flags)
222 return
223
224
225 def atomic_save(dest_path, **kwargs):
226 """A convenient interface to the :class:`AtomicSaver` type. See the
227 :class:`AtomicSaver` documentation for details.
228 """
229 return AtomicSaver(dest_path, **kwargs)
230
231
232 def path_to_unicode(path):
233 if isinstance(path, unicode):
234 return path
235 encoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
236 return path.decode(encoding)
237
238
239 if os.name == 'nt':
240 import ctypes
241 from ctypes import c_wchar_p
242 from ctypes.wintypes import DWORD, LPVOID
243
244 _ReplaceFile = ctypes.windll.kernel32.ReplaceFile
245 _ReplaceFile.argtypes = [c_wchar_p, c_wchar_p, c_wchar_p,
246 DWORD, LPVOID, LPVOID]
247
248 def replace(src, dst):
249 # argument names match stdlib docs, docstring below
250 try:
251 # ReplaceFile fails if the dest file does not exist, so
252 # first try to rename it into position
253 os.rename(src, dst)
254 return
255 except WindowsError as we:
256 if we.errno == errno.EEXIST:
257 pass # continue with the ReplaceFile logic below
258 else:
259 raise
260
261 src = path_to_unicode(src)
262 dst = path_to_unicode(dst)
263 res = _ReplaceFile(c_wchar_p(dst), c_wchar_p(src),
264 None, 0, None, None)
265 if not res:
266 raise OSError('failed to replace %r with %r' % (dst, src))
267 return
268
269 def atomic_rename(src, dst, overwrite=False):
270 "Rename *src* to *dst*, replacing *dst* if *overwrite is True"
271 if overwrite:
272 replace(src, dst)
273 else:
274 os.rename(src, dst)
275 return
276 else:
277 # wrapper func for cross compat + docs
278 def replace(src, dst):
279 # os.replace does the same thing on unix
280 return os.rename(src, dst)
281
282 def atomic_rename(src, dst, overwrite=False):
283 "Rename *src* to *dst*, replacing *dst* if *overwrite is True"
284 if overwrite:
285 os.rename(src, dst)
286 else:
287 os.link(src, dst)
288 os.unlink(src)
289 return
290
291
292 _atomic_rename = atomic_rename # backwards compat
293
294 replace.__doc__ = """Similar to :func:`os.replace` in Python 3.3+,
295 this function will atomically create or replace the file at path
296 *dst* with the file at path *src*.
297
298 On Windows, this function uses the ReplaceFile API for maximum
299 possible atomicity on a range of filesystems.
300 """
301
302
303 class AtomicSaver(object):
304 """``AtomicSaver`` is a configurable `context manager`_ that provides
305 a writable :class:`file` which will be moved into place as long as
306 no exceptions are raised within the context manager's block. These
307 "part files" are created in the same directory as the destination
308 path to ensure atomic move operations (i.e., no cross-filesystem
309 moves occur).
310
311 Args:
312 dest_path (str): The path where the completed file will be
313 written.
314 overwrite (bool): Whether to overwrite the destination file if
315 it exists at completion time. Defaults to ``True``.
316 file_perms (int): Integer representation of file permissions
317 for the newly-created file. Defaults are, when the
318 destination path already exists, to copy the permissions
319 from the previous file, or if the file did not exist, to
320 respect the user's configured `umask`_, usually resulting
321 in octal 0644 or 0664.
322 text_mode (bool): Whether to open the destination file in text
323 mode.
324 part_file (str): Name of the temporary *part_file*. Defaults
325 to *dest_path* + ``.part``. Note that this argument is
326 just the filename, and not the full path of the part
327 file. To guarantee atomic saves, part files are always
328 created in the same directory as the destination path.
329 overwrite_part (bool): Whether to overwrite the *part_file*,
330 should it exist at setup time. Defaults to ``False``,
331 which results in an :exc:`OSError` being raised on
332 pre-existing part files. Be careful of setting this to
333 ``True`` in situations when multiple threads or processes
334 could be writing to the same part file.
335 rm_part_on_exc (bool): Remove *part_file* on exception cases.
336 Defaults to ``True``, but ``False`` can be useful for
337 recovery in some cases. Note that resumption is not
338 automatic and by default an :exc:`OSError` is raised if
339 the *part_file* exists.
340
341 Practically, the AtomicSaver serves a few purposes:
342
343 * Avoiding overwriting an existing, valid file with a partially
344 written one.
345 * Providing a reasonable guarantee that a part file only has one
346 writer at a time.
347 * Optional recovery of partial data in failure cases.
348
349 .. _context manager: https://docs.python.org/2/reference/compound_stmts.html#with
350 .. _umask: https://en.wikipedia.org/wiki/Umask
351
352 """
353 _default_file_perms = RW_PERMS
354
355 # TODO: option to abort if target file modify date has changed since start?
356 def __init__(self, dest_path, **kwargs):
357 self.dest_path = dest_path
358 self.overwrite = kwargs.pop('overwrite', True)
359 self.file_perms = kwargs.pop('file_perms', None)
360 self.overwrite_part = kwargs.pop('overwrite_part', False)
361 self.part_filename = kwargs.pop('part_file', None)
362 self.rm_part_on_exc = kwargs.pop('rm_part_on_exc', True)
363 self.text_mode = kwargs.pop('text_mode', False)
364 self.buffering = kwargs.pop('buffering', -1)
365 if kwargs:
366 raise TypeError('unexpected kwargs: %r' % (kwargs.keys(),))
367
368 self.dest_path = os.path.abspath(self.dest_path)
369 self.dest_dir = os.path.dirname(self.dest_path)
370 if not self.part_filename:
371 self.part_path = dest_path + '.part'
372 else:
373 self.part_path = os.path.join(self.dest_dir, self.part_filename)
374 self.mode = 'w+' if self.text_mode else 'w+b'
375 self.open_flags = _TEXT_OPENFLAGS if self.text_mode else _BIN_OPENFLAGS
376
377 self.part_file = None
378
379 def _open_part_file(self):
380 do_chmod = True
381 file_perms = self.file_perms
382 if file_perms is None:
383 try:
384 # try to copy from file being replaced
385 stat_res = os.stat(self.dest_path)
386 file_perms = stat.S_IMODE(stat_res.st_mode)
387 except (OSError, IOError):
388 # default if no destination file exists
389 file_perms = self._default_file_perms
390 do_chmod = False # respect the umask
391
392 fd = os.open(self.part_path, self.open_flags, file_perms)
393 set_cloexec(fd)
394 self.part_file = os.fdopen(fd, self.mode, self.buffering)
395
396 # if default perms are overridden by the user or previous dest_path
397 # chmod away the effects of the umask
398 if do_chmod:
399 try:
400 os.chmod(self.part_path, file_perms)
401 except (OSError, IOError):
402 self.part_file.close()
403 raise
404 return
405
406 def setup(self):
407 """Called on context manager entry (the :keyword:`with` statement),
408 the ``setup()`` method creates the temporary file in the same
409 directory as the destination file.
410
411 ``setup()`` tests for a writable directory with rename permissions
412 early, as the part file may not be written to immediately (not
413 using :func:`os.access` because of the potential issues of
414 effective vs. real privileges).
415
416 If the caller is not using the :class:`AtomicSaver` as a
417 context manager, this method should be called explicitly
418 before writing.
419 """
420 if os.path.lexists(self.dest_path):
421 if not self.overwrite:
422 raise OSError(errno.EEXIST,
423 'Overwrite disabled and file already exists',
424 self.dest_path)
425 if self.overwrite_part and os.path.lexists(self.part_path):
426 os.unlink(self.part_path)
427 self._open_part_file()
428 return
429
430 def __enter__(self):
431 self.setup()
432 return self.part_file
433
434 def __exit__(self, exc_type, exc_val, exc_tb):
435 self.part_file.close()
436 if exc_type:
437 if self.rm_part_on_exc:
438 try:
439 os.unlink(self.part_path)
440 except Exception:
441 pass # avoid masking original error
442 return
443 try:
444 atomic_rename(self.part_path, self.dest_path,
445 overwrite=self.overwrite)
446 except OSError:
447 if self.rm_part_on_exc:
448 try:
449 os.unlink(self.part_path)
450 except Exception:
451 pass # avoid masking original error
452 raise # could not save destination file
453 return
454
455
456 def iter_find_files(directory, patterns, ignored=None, include_dirs=False):
457 """Returns a generator that yields file paths under a *directory*,
458 matching *patterns* using `glob`_ syntax (e.g., ``*.txt``). Also
459 supports *ignored* patterns.
460
461 Args:
462 directory (str): Path that serves as the root of the
463 search. Yielded paths will include this as a prefix.
464 patterns (str or list): A single pattern or list of
465 glob-formatted patterns to find under *directory*.
466 ignored (str or list): A single pattern or list of
467 glob-formatted patterns to ignore.
468 include_dirs (bool): Whether to include directories that match
469 patterns, as well. Defaults to ``False``.
470
471 For example, finding Python files in the current directory:
472
473 >>> _CUR_DIR = os.path.dirname(os.path.abspath(__file__))
474 >>> filenames = sorted(iter_find_files(_CUR_DIR, '*.py'))
475 >>> os.path.basename(filenames[-1])
476 'urlutils.py'
477
478 Or, Python files while ignoring emacs lockfiles:
479
480 >>> filenames = iter_find_files(_CUR_DIR, '*.py', ignored='.#*')
481
482 .. _glob: https://en.wikipedia.org/wiki/Glob_%28programming%29
483
484 """
485 if isinstance(patterns, basestring):
486 patterns = [patterns]
487 pats_re = re.compile('|'.join([fnmatch.translate(p) for p in patterns]))
488
489 if not ignored:
490 ignored = []
491 elif isinstance(ignored, basestring):
492 ignored = [ignored]
493 ign_re = re.compile('|'.join([fnmatch.translate(p) for p in ignored]))
494 for root, dirs, files in os.walk(directory):
495 if include_dirs:
496 for basename in dirs:
497 if pats_re.match(basename):
498 if ignored and ign_re.match(basename):
499 continue
500 filename = os.path.join(root, basename)
501 yield filename
502
503 for basename in files:
504 if pats_re.match(basename):
505 if ignored and ign_re.match(basename):
506 continue
507 filename = os.path.join(root, basename)
508 yield filename
509 return
510
511
512 def copy_tree(src, dst, symlinks=False, ignore=None):
513 """The ``copy_tree`` function is an exact copy of the built-in
514 :func:`shutil.copytree`, with one key difference: it will not
515 raise an exception if part of the tree already exists. It achieves
516 this by using :func:`mkdir_p`.
517
518 Args:
519 src (str): Path of the source directory to copy.
520 dst (str): Destination path. Existing directories accepted.
521 symlinks (bool): If ``True``, copy symlinks rather than their
522 contents.
523 ignore (callable): A callable that takes a path and directory
524 listing, returning the files within the listing to be ignored.
525
526 For more details, check out :func:`shutil.copytree` and
527 :func:`shutil.copy2`.
528
529 """
530 names = os.listdir(src)
531 if ignore is not None:
532 ignored_names = ignore(src, names)
533 else:
534 ignored_names = set()
535
536 mkdir_p(dst)
537 errors = []
538 for name in names:
539 if name in ignored_names:
540 continue
541 srcname = os.path.join(src, name)
542 dstname = os.path.join(dst, name)
543 try:
544 if symlinks and os.path.islink(srcname):
545 linkto = os.readlink(srcname)
546 os.symlink(linkto, dstname)
547 elif os.path.isdir(srcname):
548 copytree(srcname, dstname, symlinks, ignore)
549 else:
550 # Will raise a SpecialFileError for unsupported file types
551 copy2(srcname, dstname)
552 # catch the Error from the recursive copytree so that we can
553 # continue with other files
554 except Error as e:
555 errors.extend(e.args[0])
556 except EnvironmentError as why:
557 errors.append((srcname, dstname, str(why)))
558 try:
559 copystat(src, dst)
560 except OSError as why:
561 if WindowsError is not None and isinstance(why, WindowsError):
562 # Copying file access times may fail on Windows
563 pass
564 else:
565 errors.append((src, dst, str(why)))
566 if errors:
567 raise Error(errors)
568
569
570 copytree = copy_tree # alias for drop-in replacement of shutil
571
572
573 try:
574 file
575 except NameError:
576 file = object
577
578
579 # like open(os.devnull) but with even fewer side effects
580 class DummyFile(file):
581 # TODO: raise ValueErrors on closed for all methods?
582 # TODO: enforce read/write
583 def __init__(self, path, mode='r', buffering=None):
584 self.name = path
585 self.mode = mode
586 self.closed = False
587 self.errors = None
588 self.isatty = False
589 self.encoding = None
590 self.newlines = None
591 self.softspace = 0
592
593 def close(self):
594 self.closed = True
595
596 def fileno(self):
597 return -1
598
599 def flush(self):
600 if self.closed:
601 raise ValueError('I/O operation on a closed file')
602 return
603
604 def next(self):
605 raise StopIteration()
606
607 def read(self, size=0):
608 if self.closed:
609 raise ValueError('I/O operation on a closed file')
610 return ''
611
612 def readline(self, size=0):
613 if self.closed:
614 raise ValueError('I/O operation on a closed file')
615 return ''
616
617 def readlines(self, size=0):
618 if self.closed:
619 raise ValueError('I/O operation on a closed file')
620 return []
621
622 def seek(self):
623 if self.closed:
624 raise ValueError('I/O operation on a closed file')
625 return
626
627 def tell(self):
628 if self.closed:
629 raise ValueError('I/O operation on a closed file')
630 return 0
631
632 def truncate(self):
633 if self.closed:
634 raise ValueError('I/O operation on a closed file')
635 return
636
637 def write(self, string):
638 if self.closed:
639 raise ValueError('I/O operation on a closed file')
640 return
641
642 def writelines(self, list_of_strings):
643 if self.closed:
644 raise ValueError('I/O operation on a closed file')
645 return
646
647 def __next__(self):
648 raise StopIteration()
649
650 def __enter__(self):
651 if self.closed:
652 raise ValueError('I/O operation on a closed file')
653 return
654
655 def __exit__(self, exc_type, exc_val, exc_tb):
656 return
657
658
659 if __name__ == '__main__':
660 with atomic_save('/tmp/final.txt') as f:
661 f.write('rofl')
662 f.write('\n')