diff env/lib/python3.7/site-packages/webencodings/__init__.py @ 5:9b1c78e6ba9c draft default tip

"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author shellac
date Mon, 01 Jun 2020 08:59:25 -0400
parents 79f47841a781
children
line wrap: on
line diff
--- a/env/lib/python3.7/site-packages/webencodings/__init__.py	Thu May 14 16:47:39 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,342 +0,0 @@
-# coding: utf-8
-"""
-
-    webencodings
-    ~~~~~~~~~~~~
-
-    This is a Python implementation of the `WHATWG Encoding standard
-    <http://encoding.spec.whatwg.org/>`. See README for details.
-
-    :copyright: Copyright 2012 by Simon Sapin
-    :license: BSD, see LICENSE for details.
-
-"""
-
-from __future__ import unicode_literals
-
-import codecs
-
-from .labels import LABELS
-
-
-VERSION = '0.5.1'
-
-
-# Some names in Encoding are not valid Python aliases. Remap these.
-PYTHON_NAMES = {
-    'iso-8859-8-i': 'iso-8859-8',
-    'x-mac-cyrillic': 'mac-cyrillic',
-    'macintosh': 'mac-roman',
-    'windows-874': 'cp874'}
-
-CACHE = {}
-
-
-def ascii_lower(string):
-    r"""Transform (only) ASCII letters to lower case: A-Z is mapped to a-z.
-
-    :param string: An Unicode string.
-    :returns: A new Unicode string.
-
-    This is used for `ASCII case-insensitive
-    <http://encoding.spec.whatwg.org/#ascii-case-insensitive>`_
-    matching of encoding labels.
-    The same matching is also used, among other things,
-    for `CSS keywords <http://dev.w3.org/csswg/css-values/#keywords>`_.
-
-    This is different from the :meth:`~py:str.lower` method of Unicode strings
-    which also affect non-ASCII characters,
-    sometimes mapping them into the ASCII range:
-
-        >>> keyword = u'Bac\N{KELVIN SIGN}ground'
-        >>> assert keyword.lower() == u'background'
-        >>> assert ascii_lower(keyword) != keyword.lower()
-        >>> assert ascii_lower(keyword) == u'bac\N{KELVIN SIGN}ground'
-
-    """
-    # This turns out to be faster than unicode.translate()
-    return string.encode('utf8').lower().decode('utf8')
-
-
-def lookup(label):
-    """
-    Look for an encoding by its label.
-    This is the spec’s `get an encoding
-    <http://encoding.spec.whatwg.org/#concept-encoding-get>`_ algorithm.
-    Supported labels are listed there.
-
-    :param label: A string.
-    :returns:
-        An :class:`Encoding` object, or :obj:`None` for an unknown label.
-
-    """
-    # Only strip ASCII whitespace: U+0009, U+000A, U+000C, U+000D, and U+0020.
-    label = ascii_lower(label.strip('\t\n\f\r '))
-    name = LABELS.get(label)
-    if name is None:
-        return None
-    encoding = CACHE.get(name)
-    if encoding is None:
-        if name == 'x-user-defined':
-            from .x_user_defined import codec_info
-        else:
-            python_name = PYTHON_NAMES.get(name, name)
-            # Any python_name value that gets to here should be valid.
-            codec_info = codecs.lookup(python_name)
-        encoding = Encoding(name, codec_info)
-        CACHE[name] = encoding
-    return encoding
-
-
-def _get_encoding(encoding_or_label):
-    """
-    Accept either an encoding object or label.
-
-    :param encoding: An :class:`Encoding` object or a label string.
-    :returns: An :class:`Encoding` object.
-    :raises: :exc:`~exceptions.LookupError` for an unknown label.
-
-    """
-    if hasattr(encoding_or_label, 'codec_info'):
-        return encoding_or_label
-
-    encoding = lookup(encoding_or_label)
-    if encoding is None:
-        raise LookupError('Unknown encoding label: %r' % encoding_or_label)
-    return encoding
-
-
-class Encoding(object):
-    """Reresents a character encoding such as UTF-8,
-    that can be used for decoding or encoding.
-
-    .. attribute:: name
-
-        Canonical name of the encoding
-
-    .. attribute:: codec_info
-
-        The actual implementation of the encoding,
-        a stdlib :class:`~codecs.CodecInfo` object.
-        See :func:`codecs.register`.
-
-    """
-    def __init__(self, name, codec_info):
-        self.name = name
-        self.codec_info = codec_info
-
-    def __repr__(self):
-        return '<Encoding %s>' % self.name
-
-
-#: The UTF-8 encoding. Should be used for new content and formats.
-UTF8 = lookup('utf-8')
-
-_UTF16LE = lookup('utf-16le')
-_UTF16BE = lookup('utf-16be')
-
-
-def decode(input, fallback_encoding, errors='replace'):
-    """
-    Decode a single string.
-
-    :param input: A byte string
-    :param fallback_encoding:
-        An :class:`Encoding` object or a label string.
-        The encoding to use if :obj:`input` does note have a BOM.
-    :param errors: Type of error handling. See :func:`codecs.register`.
-    :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
-    :return:
-        A ``(output, encoding)`` tuple of an Unicode string
-        and an :obj:`Encoding`.
-
-    """
-    # Fail early if `encoding` is an invalid label.
-    fallback_encoding = _get_encoding(fallback_encoding)
-    bom_encoding, input = _detect_bom(input)
-    encoding = bom_encoding or fallback_encoding
-    return encoding.codec_info.decode(input, errors)[0], encoding
-
-
-def _detect_bom(input):
-    """Return (bom_encoding, input), with any BOM removed from the input."""
-    if input.startswith(b'\xFF\xFE'):
-        return _UTF16LE, input[2:]
-    if input.startswith(b'\xFE\xFF'):
-        return _UTF16BE, input[2:]
-    if input.startswith(b'\xEF\xBB\xBF'):
-        return UTF8, input[3:]
-    return None, input
-
-
-def encode(input, encoding=UTF8, errors='strict'):
-    """
-    Encode a single string.
-
-    :param input: An Unicode string.
-    :param encoding: An :class:`Encoding` object or a label string.
-    :param errors: Type of error handling. See :func:`codecs.register`.
-    :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
-    :return: A byte string.
-
-    """
-    return _get_encoding(encoding).codec_info.encode(input, errors)[0]
-
-
-def iter_decode(input, fallback_encoding, errors='replace'):
-    """
-    "Pull"-based decoder.
-
-    :param input:
-        An iterable of byte strings.
-
-        The input is first consumed just enough to determine the encoding
-        based on the precense of a BOM,
-        then consumed on demand when the return value is.
-    :param fallback_encoding:
-        An :class:`Encoding` object or a label string.
-        The encoding to use if :obj:`input` does note have a BOM.
-    :param errors: Type of error handling. See :func:`codecs.register`.
-    :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
-    :returns:
-        An ``(output, encoding)`` tuple.
-        :obj:`output` is an iterable of Unicode strings,
-        :obj:`encoding` is the :obj:`Encoding` that is being used.
-
-    """
-
-    decoder = IncrementalDecoder(fallback_encoding, errors)
-    generator = _iter_decode_generator(input, decoder)
-    encoding = next(generator)
-    return generator, encoding
-
-
-def _iter_decode_generator(input, decoder):
-    """Return a generator that first yields the :obj:`Encoding`,
-    then yields output chukns as Unicode strings.
-
-    """
-    decode = decoder.decode
-    input = iter(input)
-    for chunck in input:
-        output = decode(chunck)
-        if output:
-            assert decoder.encoding is not None
-            yield decoder.encoding
-            yield output
-            break
-    else:
-        # Input exhausted without determining the encoding
-        output = decode(b'', final=True)
-        assert decoder.encoding is not None
-        yield decoder.encoding
-        if output:
-            yield output
-        return
-
-    for chunck in input:
-        output = decode(chunck)
-        if output:
-            yield output
-    output = decode(b'', final=True)
-    if output:
-        yield output
-
-
-def iter_encode(input, encoding=UTF8, errors='strict'):
-    """
-    “Pull”-based encoder.
-
-    :param input: An iterable of Unicode strings.
-    :param encoding: An :class:`Encoding` object or a label string.
-    :param errors: Type of error handling. See :func:`codecs.register`.
-    :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
-    :returns: An iterable of byte strings.
-
-    """
-    # Fail early if `encoding` is an invalid label.
-    encode = IncrementalEncoder(encoding, errors).encode
-    return _iter_encode_generator(input, encode)
-
-
-def _iter_encode_generator(input, encode):
-    for chunck in input:
-        output = encode(chunck)
-        if output:
-            yield output
-    output = encode('', final=True)
-    if output:
-        yield output
-
-
-class IncrementalDecoder(object):
-    """
-    “Push”-based decoder.
-
-    :param fallback_encoding:
-        An :class:`Encoding` object or a label string.
-        The encoding to use if :obj:`input` does note have a BOM.
-    :param errors: Type of error handling. See :func:`codecs.register`.
-    :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
-
-    """
-    def __init__(self, fallback_encoding, errors='replace'):
-        # Fail early if `encoding` is an invalid label.
-        self._fallback_encoding = _get_encoding(fallback_encoding)
-        self._errors = errors
-        self._buffer = b''
-        self._decoder = None
-        #: The actual :class:`Encoding` that is being used,
-        #: or :obj:`None` if that is not determined yet.
-        #: (Ie. if there is not enough input yet to determine
-        #: if there is a BOM.)
-        self.encoding = None  # Not known yet.
-
-    def decode(self, input, final=False):
-        """Decode one chunk of the input.
-
-        :param input: A byte string.
-        :param final:
-            Indicate that no more input is available.
-            Must be :obj:`True` if this is the last call.
-        :returns: An Unicode string.
-
-        """
-        decoder = self._decoder
-        if decoder is not None:
-            return decoder(input, final)
-
-        input = self._buffer + input
-        encoding, input = _detect_bom(input)
-        if encoding is None:
-            if len(input) < 3 and not final:  # Not enough data yet.
-                self._buffer = input
-                return ''
-            else:  # No BOM
-                encoding = self._fallback_encoding
-        decoder = encoding.codec_info.incrementaldecoder(self._errors).decode
-        self._decoder = decoder
-        self.encoding = encoding
-        return decoder(input, final)
-
-
-class IncrementalEncoder(object):
-    """
-    “Push”-based encoder.
-
-    :param encoding: An :class:`Encoding` object or a label string.
-    :param errors: Type of error handling. See :func:`codecs.register`.
-    :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
-
-    .. method:: encode(input, final=False)
-
-        :param input: An Unicode string.
-        :param final:
-            Indicate that no more input is available.
-            Must be :obj:`True` if this is the last call.
-        :returns: A byte string.
-
-    """
-    def __init__(self, encoding=UTF8, errors='strict'):
-        encoding = _get_encoding(encoding)
-        self.encode = encoding.codec_info.incrementalencoder(errors).encode