view env/lib/python3.9/site-packages/boltons/listutils.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
line wrap: on
line source

# -*- coding: utf-8 -*-
"""Python's builtin :class:`list` is a very fast and efficient
sequence type, but it could be better for certain access patterns,
such as non-sequential insertion into a large lists. ``listutils``
provides a pure-Python solution to this problem.

For utilities for working with iterables and lists, check out
:mod:`iterutils`. For the a :class:`list`-based version of
:class:`collections.namedtuple`, check out :mod:`namedutils`.
"""

from __future__ import print_function, division

import operator
from math import log as math_log
from itertools import chain, islice

try:
    from typeutils import make_sentinel
    _MISSING = make_sentinel(var_name='_MISSING')
except ImportError:
    _MISSING = object()

try:
    xrange
except NameError:
    # Python 3 compat
    xrange = range

# TODO: expose splaylist?
__all__ = ['BList', 'BarrelList']


# TODO: comparators
# TODO: keep track of list lengths and bisect to the right list for
# faster getitem (and slightly slower setitem and delitem ops)

class BarrelList(list):
    """The ``BarrelList`` is a :class:`list` subtype backed by many
    dynamically-scaled sublists, to provide better scaling and random
    insertion/deletion characteristics. It is a subtype of the builtin
    :class:`list` and has an identical API, supporting indexing,
    slicing, sorting, etc. If application requirements call for
    something more performant, consider the `blist module available on
    PyPI`_.

    The name comes by way of Kurt Rose, who said it reminded him of
    barrel shifters. Not sure how, but it's BList-like, so the name
    stuck. BList is of course a reference to `B-trees`_.

    Args:
        iterable: An optional iterable of initial values for the list.

    >>> blist = BList(xrange(100000))
    >>> blist.pop(50000)
    50000
    >>> len(blist)
    99999
    >>> len(blist.lists)  # how many underlying lists
    8
    >>> slice_idx = blist.lists[0][-1]
    >>> blist[slice_idx:slice_idx + 2]
    BarrelList([11637, 11638])

    Slicing is supported and works just fine across list borders,
    returning another instance of the BarrelList.

    .. _blist module available on PyPI: https://pypi.python.org/pypi/blist
    .. _B-trees: https://en.wikipedia.org/wiki/B-tree

    """

    _size_factor = 1520
    "This size factor is the result of tuning using the tune() function below."

    def __init__(self, iterable=None):
        self.lists = [[]]
        if iterable:
            self.extend(iterable)

    @property
    def _cur_size_limit(self):
        len_self, size_factor = len(self), self._size_factor
        return int(round(size_factor * math_log(len_self + 2, 2)))

    def _translate_index(self, index):
        if index < 0:
            index += len(self)
        rel_idx, lists = index, self.lists
        for list_idx in range(len(lists)):
            len_list = len(lists[list_idx])
            if rel_idx < len_list:
                break
            rel_idx -= len_list
        if rel_idx < 0:
            return None, None
        return list_idx, rel_idx

    def _balance_list(self, list_idx):
        if list_idx < 0:
            list_idx += len(self.lists)
        cur_list, len_self = self.lists[list_idx], len(self)
        size_limit = self._cur_size_limit
        if len(cur_list) > size_limit:
            half_limit = size_limit // 2
            while len(cur_list) > half_limit:
                next_list_idx = list_idx + 1
                self.lists.insert(next_list_idx, cur_list[-half_limit:])
                del cur_list[-half_limit:]
            return True
        return False

    def insert(self, index, item):
        if len(self.lists) == 1:
            self.lists[0].insert(index, item)
            self._balance_list(0)
        else:
            list_idx, rel_idx = self._translate_index(index)
            if list_idx is None:
                raise IndexError()
            self.lists[list_idx].insert(rel_idx, item)
            self._balance_list(list_idx)
        return

    def append(self, item):
        self.lists[-1].append(item)

    def extend(self, iterable):
        self.lists[-1].extend(iterable)

    def pop(self, *a):
        lists = self.lists
        if len(lists) == 1 and not a:
            return self.lists[0].pop()
        index = a and a[0]
        if index == () or index is None or index == -1:
            ret = lists[-1].pop()
            if len(lists) > 1 and not lists[-1]:
                lists.pop()
        else:
            list_idx, rel_idx = self._translate_index(index)
            if list_idx is None:
                raise IndexError()
            ret = lists[list_idx].pop(rel_idx)
            self._balance_list(list_idx)
        return ret

    def iter_slice(self, start, stop, step=None):
        iterable = self  # TODO: optimization opportunities abound
        # start_list_idx, stop_list_idx = 0, len(self.lists)
        if start is None:
            start = 0
        if stop is None:
            stop = len(self)
        if step is not None and step < 0:
            step = -step
            start, stop = -start, -stop - 1
            iterable = reversed(self)
        if start < 0:
            start += len(self)
            # start_list_idx, start_rel_idx = self._translate_index(start)
        if stop < 0:
            stop += len(self)
            # stop_list_idx, stop_rel_idx = self._translate_index(stop)
        return islice(iterable, start, stop, step)

    def del_slice(self, start, stop, step=None):
        if step is not None and abs(step) > 1:  # punt
            new_list = chain(self.iter_slice(0, start, step),
                             self.iter_slice(stop, None, step))
            self.lists[0][:] = new_list
            self._balance_list(0)
            return
        if start is None:
            start = 0
        if stop is None:
            stop = len(self)
        start_list_idx, start_rel_idx = self._translate_index(start)
        stop_list_idx, stop_rel_idx = self._translate_index(stop)
        if start_list_idx is None:
            raise IndexError()
        if stop_list_idx is None:
            raise IndexError()

        if start_list_idx == stop_list_idx:
            del self.lists[start_list_idx][start_rel_idx:stop_rel_idx]
        elif start_list_idx < stop_list_idx:
            del self.lists[start_list_idx + 1:stop_list_idx]
            del self.lists[start_list_idx][start_rel_idx:]
            del self.lists[stop_list_idx][:stop_rel_idx]
        else:
            assert False, ('start list index should never translate to'
                           ' greater than stop list index')

    __delslice__ = del_slice

    @classmethod
    def from_iterable(cls, it):
        return cls(it)

    def __iter__(self):
        return chain.from_iterable(self.lists)

    def __reversed__(self):
        return chain.from_iterable(reversed(l) for l in reversed(self.lists))

    def __len__(self):
        return sum([len(l) for l in self.lists])

    def __contains__(self, item):
        for cur in self.lists:
            if item in cur:
                return True
        return False

    def __getitem__(self, index):
        try:
            start, stop, step = index.start, index.stop, index.step
        except AttributeError:
            index = operator.index(index)
        else:
            iter_slice = self.iter_slice(start, stop, step)
            ret = self.from_iterable(iter_slice)
            return ret
        list_idx, rel_idx = self._translate_index(index)
        if list_idx is None:
            raise IndexError()
        return self.lists[list_idx][rel_idx]

    def __delitem__(self, index):
        try:
            start, stop, step = index.start, index.stop, index.step
        except AttributeError:
            index = operator.index(index)
        else:
            self.del_slice(start, stop, step)
            return
        list_idx, rel_idx = self._translate_index(index)
        if list_idx is None:
            raise IndexError()
        del self.lists[list_idx][rel_idx]

    def __setitem__(self, index, item):
        try:
            start, stop, step = index.start, index.stop, index.step
        except AttributeError:
            index = operator.index(index)
        else:
            if len(self.lists) == 1:
                self.lists[0][index] = item
            else:
                tmp = list(self)
                tmp[index] = item
                self.lists[:] = [tmp]
            self._balance_list(0)
            return
        list_idx, rel_idx = self._translate_index(index)
        if list_idx is None:
            raise IndexError()
        self.lists[list_idx][rel_idx] = item

    def __getslice__(self, start, stop):
        iter_slice = self.iter_slice(start, stop, 1)
        return self.from_iterable(iter_slice)

    def __setslice__(self, start, stop, sequence):
        if len(self.lists) == 1:
            self.lists[0][start:stop] = sequence
        else:
            tmp = list(self)
            tmp[start:stop] = sequence
            self.lists[:] = [tmp]
        self._balance_list(0)
        return

    def __repr__(self):
        return '%s(%r)' % (self.__class__.__name__, list(self))

    def sort(self):
        # poor pythonist's mergesort, it's faster than sorted(self)
        # when the lists' average length is greater than 512.
        if len(self.lists) == 1:
            self.lists[0].sort()
        else:
            for li in self.lists:
                li.sort()
            tmp_sorted = sorted(chain.from_iterable(self.lists))
            del self.lists[:]
            self.lists[0] = tmp_sorted
            self._balance_list(0)

    def reverse(self):
        for cur in self.lists:
            cur.reverse()
        self.lists.reverse()

    def count(self, item):
        return sum([cur.count(item) for cur in self.lists])

    def index(self, item):
        len_accum = 0
        for cur in self.lists:
            try:
                rel_idx = cur.index(item)
                return len_accum + rel_idx
            except ValueError:
                len_accum += len(cur)
        raise ValueError('%r is not in list' % (item,))


BList = BarrelList


class SplayList(list):
    """Like a `splay tree`_, the SplayList facilitates moving higher
    utility items closer to the front of the list for faster access.

    .. _splay tree: https://en.wikipedia.org/wiki/Splay_tree
    """

    def shift(self, item_index, dest_index=0):
        if item_index == dest_index:
            return
        item = self.pop(item_index)
        self.insert(dest_index, item)

    def swap(self, item_index, dest_index):
        self[dest_index], self[item_index] = self[item_index], self[dest_index]