diff env/lib/python3.7/site-packages/future/utils/surrogateescape.py @ 5:9b1c78e6ba9c draft default tip

"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author shellac
date Mon, 01 Jun 2020 08:59:25 -0400
parents 79f47841a781
children
line wrap: on
line diff
--- a/env/lib/python3.7/site-packages/future/utils/surrogateescape.py	Thu May 14 16:47:39 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,198 +0,0 @@
-"""
-This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error
-handler of Python 3.
-
-Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc
-"""
-
-# This code is released under the Python license and the BSD 2-clause license
-
-import codecs
-import sys
-
-from future import utils
-
-
-FS_ERRORS = 'surrogateescape'
-
-#     # -- Python 2/3 compatibility -------------------------------------
-#     FS_ERRORS = 'my_surrogateescape'
-
-def u(text):
-    if utils.PY3:
-        return text
-    else:
-        return text.decode('unicode_escape')
-
-def b(data):
-    if utils.PY3:
-        return data.encode('latin1')
-    else:
-        return data
-
-if utils.PY3:
-    _unichr = chr
-    bytes_chr = lambda code: bytes((code,))
-else:
-    _unichr = unichr
-    bytes_chr = chr
-
-def surrogateescape_handler(exc):
-    """
-    Pure Python implementation of the PEP 383: the "surrogateescape" error
-    handler of Python 3. Undecodable bytes will be replaced by a Unicode
-    character U+DCxx on decoding, and these are translated into the
-    original bytes on encoding.
-    """
-    mystring = exc.object[exc.start:exc.end]
-
-    try:
-        if isinstance(exc, UnicodeDecodeError):
-            # mystring is a byte-string in this case
-            decoded = replace_surrogate_decode(mystring)
-        elif isinstance(exc, UnicodeEncodeError):
-            # In the case of u'\udcc3'.encode('ascii',
-            # 'this_surrogateescape_handler'), both Python 2.x and 3.x raise an
-            # exception anyway after this function is called, even though I think
-            # it's doing what it should. It seems that the strict encoder is called
-            # to encode the unicode string that this function returns ...
-            decoded = replace_surrogate_encode(mystring)
-        else:
-            raise exc
-    except NotASurrogateError:
-        raise exc
-    return (decoded, exc.end)
-
-
-class NotASurrogateError(Exception):
-    pass
-
-
-def replace_surrogate_encode(mystring):
-    """
-    Returns a (unicode) string, not the more logical bytes, because the codecs
-    register_error functionality expects this.
-    """
-    decoded = []
-    for ch in mystring:
-        # if utils.PY3:
-        #     code = ch
-        # else:
-        code = ord(ch)
-
-        # The following magic comes from Py3.3's Python/codecs.c file:
-        if not 0xD800 <= code <= 0xDCFF:
-            # Not a surrogate. Fail with the original exception.
-            raise NotASurrogateError
-        # mybytes = [0xe0 | (code >> 12),
-        #            0x80 | ((code >> 6) & 0x3f),
-        #            0x80 | (code & 0x3f)]
-        # Is this a good idea?
-        if 0xDC00 <= code <= 0xDC7F:
-            decoded.append(_unichr(code - 0xDC00))
-        elif code <= 0xDCFF:
-            decoded.append(_unichr(code - 0xDC00))
-        else:
-            raise NotASurrogateError
-    return str().join(decoded)
-
-
-def replace_surrogate_decode(mybytes):
-    """
-    Returns a (unicode) string
-    """
-    decoded = []
-    for ch in mybytes:
-        # We may be parsing newbytes (in which case ch is an int) or a native
-        # str on Py2
-        if isinstance(ch, int):
-            code = ch
-        else:
-            code = ord(ch)
-        if 0x80 <= code <= 0xFF:
-            decoded.append(_unichr(0xDC00 + code))
-        elif code <= 0x7F:
-            decoded.append(_unichr(code))
-        else:
-            # # It may be a bad byte
-            # # Try swallowing it.
-            # continue
-            # print("RAISE!")
-            raise NotASurrogateError
-    return str().join(decoded)
-
-
-def encodefilename(fn):
-    if FS_ENCODING == 'ascii':
-        # ASCII encoder of Python 2 expects that the error handler returns a
-        # Unicode string encodable to ASCII, whereas our surrogateescape error
-        # handler has to return bytes in 0x80-0xFF range.
-        encoded = []
-        for index, ch in enumerate(fn):
-            code = ord(ch)
-            if code < 128:
-                ch = bytes_chr(code)
-            elif 0xDC80 <= code <= 0xDCFF:
-                ch = bytes_chr(code - 0xDC00)
-            else:
-                raise UnicodeEncodeError(FS_ENCODING,
-                    fn, index, index+1,
-                    'ordinal not in range(128)')
-            encoded.append(ch)
-        return bytes().join(encoded)
-    elif FS_ENCODING == 'utf-8':
-        # UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF
-        # doesn't go through our error handler
-        encoded = []
-        for index, ch in enumerate(fn):
-            code = ord(ch)
-            if 0xD800 <= code <= 0xDFFF:
-                if 0xDC80 <= code <= 0xDCFF:
-                    ch = bytes_chr(code - 0xDC00)
-                    encoded.append(ch)
-                else:
-                    raise UnicodeEncodeError(
-                        FS_ENCODING,
-                        fn, index, index+1, 'surrogates not allowed')
-            else:
-                ch_utf8 = ch.encode('utf-8')
-                encoded.append(ch_utf8)
-        return bytes().join(encoded)
-    else:
-        return fn.encode(FS_ENCODING, FS_ERRORS)
-
-def decodefilename(fn):
-    return fn.decode(FS_ENCODING, FS_ERRORS)
-
-FS_ENCODING = 'ascii'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
-# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')
-# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
-
-
-# normalize the filesystem encoding name.
-# For example, we expect "utf-8", not "UTF8".
-FS_ENCODING = codecs.lookup(FS_ENCODING).name
-
-
-def register_surrogateescape():
-    """
-    Registers the surrogateescape error handler on Python 2 (only)
-    """
-    if utils.PY3:
-        return
-    try:
-        codecs.lookup_error(FS_ERRORS)
-    except LookupError:
-        codecs.register_error(FS_ERRORS, surrogateescape_handler)
-
-
-if __name__ == '__main__':
-    pass
-    # # Tests:
-    # register_surrogateescape()
-
-    # b = decodefilename(fn)
-    # assert b == encoded, "%r != %r" % (b, encoded)
-    # c = encodefilename(b)
-    # assert c == fn, '%r != %r' % (c, fn)
-    # # print("ok")