diff env/lib/python3.7/site-packages/boltons/jsonutils.py @ 5:9b1c78e6ba9c draft default tip

"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author shellac
date Mon, 01 Jun 2020 08:59:25 -0400
parents 79f47841a781
children
line wrap: on
line diff
--- a/env/lib/python3.7/site-packages/boltons/jsonutils.py	Thu May 14 16:47:39 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,227 +0,0 @@
-# -*- coding: utf-8 -*-
-"""``jsonutils`` aims to provide various helpers for working with
-JSON. Currently it focuses on providing a reliable and intuitive means
-of working with `JSON Lines`_-formatted files.
-
-.. _JSON Lines: http://jsonlines.org/
-
-"""
-
-from __future__ import print_function
-
-import os
-import json
-
-
-DEFAULT_BLOCKSIZE = 4096
-
-# reverse iter lines algorithm:
-#
-#  - if it ends in a newline, add an empty string to the line list
-#  - if there's one item, then prepend it to the buffer, continue
-#  - if there's more than one item, pop the last item and prepend it
-#    to the buffer, yielding it
-#  - yield all remaining items in reverse, except for the first
-#  - first item becomes the new buffer
-#
-#  - when the outer loop completes, yield the buffer
-
-
-__all__ = ['JSONLIterator', 'reverse_iter_lines']
-
-
-def reverse_iter_lines(file_obj, blocksize=DEFAULT_BLOCKSIZE, preseek=True):
-    """Returns an iterator over the lines from a file object, in
-    reverse order, i.e., last line first, first line last. Uses the
-    :meth:`file.seek` method of file objects, and is tested compatible with
-    :class:`file` objects, as well as :class:`StringIO.StringIO`.
-
-    Args:
-        file_obj (file): An open file object. Note that ``reverse_iter_lines``
-            mutably reads from the file and other functions should not mutably
-            interact with the file object.
-        blocksize (int): The block size to pass to :meth:`file.read()`
-        preseek (bool): Tells the function whether or not to automatically
-            seek to the end of the file. Defaults to ``True``.
-            ``preseek=False`` is useful in cases when the
-            file cursor is already in position, either at the end of
-            the file or in the middle for relative reverse line
-            generation.
-    """
-    if preseek:
-        file_obj.seek(0, os.SEEK_END)
-    cur_pos = file_obj.tell()
-    buff = ''
-    while 0 < cur_pos:
-        read_size = min(blocksize, cur_pos)
-        cur_pos -= read_size
-        file_obj.seek(cur_pos, os.SEEK_SET)
-        cur = file_obj.read(read_size)
-        lines = cur.splitlines()
-        if cur[-1] == '\n':
-            lines.append('')
-        if len(lines) == 1:
-            buff = lines[0] + buff
-            continue
-        last = lines.pop()
-        yield last + buff
-        for line in lines[:0:-1]:
-            yield line
-        buff = lines[0]
-    if buff:
-        # TODO: test this, does an empty buffer always mean don't yield?
-        yield buff
-
-
-"""
-TODO: allow passthroughs for:
-
-json.load(fp[, encoding[, cls[, object_hook[, parse_float[, parse_int[, parse_constant[, object_pairs_hook[, **kw]]]]]]]])
-"""
-
-
-class JSONLIterator(object):
-    """The ``JSONLIterator`` is used to iterate over JSON-encoded objects
-    stored in the `JSON Lines format`_ (one object per line).
-
-    Most notably it has the ability to efficiently read from the
-    bottom of files, making it very effective for reading in simple
-    append-only JSONL use cases. It also has the ability to start from
-    anywhere in the file and ignore corrupted lines.
-
-    Args:
-        file_obj (file): An open file object.
-        ignore_errors (bool): Whether to skip over lines that raise an error on
-            deserialization (:func:`json.loads`).
-        reverse (bool): Controls the direction of the iteration.
-            Defaults to ``False``. If set to ``True`` and *rel_seek*
-            is unset, seeks to the end of the file before iteration
-            begins.
-        rel_seek (float): Used to preseek the start position of
-            iteration. Set to 0.0 for the start of the file, 1.0 for the
-            end, and anything in between.
-
-    .. _JSON Lines format: http://jsonlines.org/
-    """
-    def __init__(self, file_obj,
-                 ignore_errors=False, reverse=False, rel_seek=None):
-        self._reverse = bool(reverse)
-        self._file_obj = file_obj
-        self.ignore_errors = ignore_errors
-
-        if rel_seek is None:
-            if reverse:
-                rel_seek = 1.0
-        elif not -1.0 < rel_seek < 1.0:
-            raise ValueError("'rel_seek' expected a float between"
-                             " -1.0 and 1.0, not %r" % rel_seek)
-        elif rel_seek < 0:
-            rel_seek = 1.0 - rel_seek
-        self._rel_seek = rel_seek
-        self._blocksize = 4096
-        if rel_seek is not None:
-            self._init_rel_seek()
-        if self._reverse:
-            self._line_iter = reverse_iter_lines(self._file_obj,
-                                                 blocksize=self._blocksize,
-                                                 preseek=False)
-        else:
-            self._line_iter = iter(self._file_obj)
-
-    @property
-    def cur_byte_pos(self):
-        "A property representing where in the file the iterator is reading."
-        return self._file_obj.tell()
-
-    def _align_to_newline(self):
-        "Aligns the file object's position to the next newline."
-        fo, bsize = self._file_obj, self._blocksize
-        cur, total_read = '', 0
-        cur_pos = fo.tell()
-        while '\n' not in cur:
-            cur = fo.read(bsize)
-            total_read += bsize
-        try:
-            newline_offset = cur.index('\n') + total_read - bsize
-        except ValueError:
-            raise  # TODO: seek to end?
-        fo.seek(cur_pos + newline_offset)
-
-    def _init_rel_seek(self):
-        "Sets the file object's position to the relative location set above."
-        rs, fo = self._rel_seek, self._file_obj
-        if rs == 0.0:
-            fo.seek(0, os.SEEK_SET)
-        else:
-            fo.seek(0, os.SEEK_END)
-            size = fo.tell()
-            if rs == 1.0:
-                self._cur_pos = size
-            else:
-                target = int(size * rs)
-                fo.seek(target, os.SEEK_SET)
-                self._align_to_newline()
-                self._cur_pos = fo.tell()
-
-    def __iter__(self):
-        return self
-
-    def next(self):
-        """Yields one :class:`dict` loaded with :func:`json.loads`, advancing
-        the file object by one line. Raises :exc:`StopIteration` upon reaching
-        the end of the file (or beginning, if ``reverse`` was set to ``True``.
-        """
-        while 1:
-            line = next(self._line_iter).lstrip()
-            if not line:
-                continue
-            try:
-                obj = json.loads(line)
-            except Exception:
-                if not self.ignore_errors:
-                    raise
-                continue
-            return obj
-
-    __next__ = next
-
-
-if __name__ == '__main__':
-    def _main():
-        import sys
-        if '-h' in sys.argv or '--help' in sys.argv:
-            print('loads one or more JSON Line files for basic validation.')
-            return
-        verbose = False
-        if '-v' in sys.argv or '--verbose' in sys.argv:
-            verbose = True
-        file_count, obj_count = 0, 0
-        filenames = sys.argv[1:]
-        for filename in filenames:
-            if filename in ('-h', '--help', '-v', '--verbose'):
-                continue
-            file_count += 1
-            with open(filename, 'rb') as file_obj:
-                iterator = JSONLIterator(file_obj)
-                cur_obj_count = 0
-                while 1:
-                    try:
-                        next(iterator)
-                    except ValueError:
-                        print('error reading object #%s around byte %s in %s'
-                              % (cur_obj_count + 1, iterator.cur_byte_pos, filename))
-                        return
-                    except StopIteration:
-                        break
-                    obj_count += 1
-                    cur_obj_count += 1
-                    if verbose and obj_count and obj_count % 100 == 0:
-                        sys.stdout.write('.')
-                        if obj_count % 10000:
-                            sys.stdout.write('%s\n' % obj_count)
-        if verbose:
-            print('files checked: %s' % file_count)
-            print('objects loaded: %s' % obj_count)
-        return
-
-    _main()