diff env/lib/python3.7/site-packages/lxml/doctestcompare.py @ 5:9b1c78e6ba9c draft default tip

"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author shellac
date Mon, 01 Jun 2020 08:59:25 -0400
parents 79f47841a781
children
line wrap: on
line diff
--- a/env/lib/python3.7/site-packages/lxml/doctestcompare.py	Thu May 14 16:47:39 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,507 +0,0 @@
-"""
-lxml-based doctest output comparison.
-
-Note: normally, you should just import the `lxml.usedoctest` and
-`lxml.html.usedoctest` modules from within a doctest, instead of this
-one::
-
-    >>> import lxml.usedoctest # for XML output
-
-    >>> import lxml.html.usedoctest # for HTML output
-
-To use this module directly, you must call ``lxmldoctest.install()``,
-which will cause doctest to use this in all subsequent calls.
-
-This changes the way output is checked and comparisons are made for
-XML or HTML-like content.
-
-XML or HTML content is noticed because the example starts with ``<``
-(it's HTML if it starts with ``<html``).  You can also use the
-``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing.
-
-Some rough wildcard-like things are allowed.  Whitespace is generally
-ignored (except in attributes).  In text (attributes and text in the
-body) you can use ``...`` as a wildcard.  In an example it also
-matches any trailing tags in the element, though it does not match
-leading tags.  You may create a tag ``<any>`` or include an ``any``
-attribute in the tag.  An ``any`` tag matches any tag, while the
-attribute matches any and all attributes.
-
-When a match fails, the reformatted example and gotten text is
-displayed (indented), and a rough diff-like output is given.  Anything
-marked with ``+`` is in the output but wasn't supposed to be, and
-similarly ``-`` means its in the example but wasn't in the output.
-
-You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP``
-"""
-
-from lxml import etree
-import sys
-import re
-import doctest
-try:
-    from html import escape as html_escape
-except ImportError:
-    from cgi import escape as html_escape
-
-__all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker',
-           'LHTMLOutputChecker', 'install', 'temp_install']
-
-try:
-    _basestring = basestring
-except NameError:
-    _basestring = (str, bytes)
-
-_IS_PYTHON_3 = sys.version_info[0] >= 3
-
-PARSE_HTML = doctest.register_optionflag('PARSE_HTML')
-PARSE_XML = doctest.register_optionflag('PARSE_XML')
-NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP')
-
-OutputChecker = doctest.OutputChecker
-
-def strip(v):
-    if v is None:
-        return None
-    else:
-        return v.strip()
-
-def norm_whitespace(v):
-    return _norm_whitespace_re.sub(' ', v)
-
-_html_parser = etree.HTMLParser(recover=False, remove_blank_text=True)
-
-def html_fromstring(html):
-    return etree.fromstring(html, _html_parser)
-
-# We use this to distinguish repr()s from elements:
-_repr_re = re.compile(r'^<[^>]+ (at|object) ')
-_norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
-
-class LXMLOutputChecker(OutputChecker):
-
-    empty_tags = (
-        'param', 'img', 'area', 'br', 'basefont', 'input',
-        'base', 'meta', 'link', 'col')
-
-    def get_default_parser(self):
-        return etree.XML
-
-    def check_output(self, want, got, optionflags):
-        alt_self = getattr(self, '_temp_override_self', None)
-        if alt_self is not None:
-            super_method = self._temp_call_super_check_output
-            self = alt_self
-        else:
-            super_method = OutputChecker.check_output
-        parser = self.get_parser(want, got, optionflags)
-        if not parser:
-            return super_method(
-                self, want, got, optionflags)
-        try:
-            want_doc = parser(want)
-        except etree.XMLSyntaxError:
-            return False
-        try:
-            got_doc = parser(got)
-        except etree.XMLSyntaxError:
-            return False
-        return self.compare_docs(want_doc, got_doc)
-
-    def get_parser(self, want, got, optionflags):
-        parser = None
-        if NOPARSE_MARKUP & optionflags:
-            return None
-        if PARSE_HTML & optionflags:
-            parser = html_fromstring
-        elif PARSE_XML & optionflags:
-            parser = etree.XML
-        elif (want.strip().lower().startswith('<html')
-              and got.strip().startswith('<html')):
-            parser = html_fromstring
-        elif (self._looks_like_markup(want)
-              and self._looks_like_markup(got)):
-            parser = self.get_default_parser()
-        return parser
-
-    def _looks_like_markup(self, s):
-        s = s.strip()
-        return (s.startswith('<')
-                and not _repr_re.search(s))
-
-    def compare_docs(self, want, got):
-        if not self.tag_compare(want.tag, got.tag):
-            return False
-        if not self.text_compare(want.text, got.text, True):
-            return False
-        if not self.text_compare(want.tail, got.tail, True):
-            return False
-        if 'any' not in want.attrib:
-            want_keys = sorted(want.attrib.keys())
-            got_keys = sorted(got.attrib.keys())
-            if want_keys != got_keys:
-                return False
-            for key in want_keys:
-                if not self.text_compare(want.attrib[key], got.attrib[key], False):
-                    return False
-        if want.text != '...' or len(want):
-            want_children = list(want)
-            got_children = list(got)
-            while want_children or got_children:
-                if not want_children or not got_children:
-                    return False
-                want_first = want_children.pop(0)
-                got_first = got_children.pop(0)
-                if not self.compare_docs(want_first, got_first):
-                    return False
-                if not got_children and want_first.tail == '...':
-                    break
-        return True
-
-    def text_compare(self, want, got, strip):
-        want = want or ''
-        got = got or ''
-        if strip:
-            want = norm_whitespace(want).strip()
-            got = norm_whitespace(got).strip()
-        want = '^%s$' % re.escape(want)
-        want = want.replace(r'\.\.\.', '.*')
-        if re.search(want, got):
-            return True
-        else:
-            return False
-
-    def tag_compare(self, want, got):
-        if want == 'any':
-            return True
-        if (not isinstance(want, _basestring)
-            or not isinstance(got, _basestring)):
-            return want == got
-        want = want or ''
-        got = got or ''
-        if want.startswith('{...}'):
-            # Ellipsis on the namespace
-            return want.split('}')[-1] == got.split('}')[-1]
-        else:
-            return want == got
-
-    def output_difference(self, example, got, optionflags):
-        want = example.want
-        parser = self.get_parser(want, got, optionflags)
-        errors = []
-        if parser is not None:
-            try:
-                want_doc = parser(want)
-            except etree.XMLSyntaxError:
-                e = sys.exc_info()[1]
-                errors.append('In example: %s' % e)
-            try:
-                got_doc = parser(got)
-            except etree.XMLSyntaxError:
-                e = sys.exc_info()[1]
-                errors.append('In actual output: %s' % e)
-        if parser is None or errors:
-            value = OutputChecker.output_difference(
-                self, example, got, optionflags)
-            if errors:
-                errors.append(value)
-                return '\n'.join(errors)
-            else:
-                return value
-        html = parser is html_fromstring
-        diff_parts = ['Expected:',
-                      self.format_doc(want_doc, html, 2),
-                      'Got:',
-                      self.format_doc(got_doc, html, 2),
-                      'Diff:',
-                      self.collect_diff(want_doc, got_doc, html, 2)]
-        return '\n'.join(diff_parts)
-
-    def html_empty_tag(self, el, html=True):
-        if not html:
-            return False
-        if el.tag not in self.empty_tags:
-            return False
-        if el.text or len(el):
-            # This shouldn't happen (contents in an empty tag)
-            return False
-        return True
-
-    def format_doc(self, doc, html, indent, prefix=''):
-        parts = []
-        if not len(doc):
-            # No children...
-            parts.append(' '*indent)
-            parts.append(prefix)
-            parts.append(self.format_tag(doc))
-            if not self.html_empty_tag(doc, html):
-                if strip(doc.text):
-                    parts.append(self.format_text(doc.text))
-                parts.append(self.format_end_tag(doc))
-            if strip(doc.tail):
-                parts.append(self.format_text(doc.tail))
-            parts.append('\n')
-            return ''.join(parts)
-        parts.append(' '*indent)
-        parts.append(prefix)
-        parts.append(self.format_tag(doc))
-        if not self.html_empty_tag(doc, html):
-            parts.append('\n')
-            if strip(doc.text):
-                parts.append(' '*indent)
-                parts.append(self.format_text(doc.text))
-                parts.append('\n')
-            for el in doc:
-                parts.append(self.format_doc(el, html, indent+2))
-            parts.append(' '*indent)
-            parts.append(self.format_end_tag(doc))
-            parts.append('\n')
-        if strip(doc.tail):
-            parts.append(' '*indent)
-            parts.append(self.format_text(doc.tail))
-            parts.append('\n')
-        return ''.join(parts)
-
-    def format_text(self, text, strip=True):
-        if text is None:
-            return ''
-        if strip:
-            text = text.strip()
-        return html_escape(text, 1)
-
-    def format_tag(self, el):
-        attrs = []
-        if isinstance(el, etree.CommentBase):
-            # FIXME: probably PIs should be handled specially too?
-            return '<!--'
-        for name, value in sorted(el.attrib.items()):
-            attrs.append('%s="%s"' % (name, self.format_text(value, False)))
-        if not attrs:
-            return '<%s>' % el.tag
-        return '<%s %s>' % (el.tag, ' '.join(attrs))
-    
-    def format_end_tag(self, el):
-        if isinstance(el, etree.CommentBase):
-            # FIXME: probably PIs should be handled specially too?
-            return '-->'
-        return '</%s>' % el.tag
-
-    def collect_diff(self, want, got, html, indent):
-        parts = []
-        if not len(want) and not len(got):
-            parts.append(' '*indent)
-            parts.append(self.collect_diff_tag(want, got))
-            if not self.html_empty_tag(got, html):
-                parts.append(self.collect_diff_text(want.text, got.text))
-                parts.append(self.collect_diff_end_tag(want, got))
-            parts.append(self.collect_diff_text(want.tail, got.tail))
-            parts.append('\n')
-            return ''.join(parts)
-        parts.append(' '*indent)
-        parts.append(self.collect_diff_tag(want, got))
-        parts.append('\n')
-        if strip(want.text) or strip(got.text):
-            parts.append(' '*indent)
-            parts.append(self.collect_diff_text(want.text, got.text))
-            parts.append('\n')
-        want_children = list(want)
-        got_children = list(got)
-        while want_children or got_children:
-            if not want_children:
-                parts.append(self.format_doc(got_children.pop(0), html, indent+2, '+'))
-                continue
-            if not got_children:
-                parts.append(self.format_doc(want_children.pop(0), html, indent+2, '-'))
-                continue
-            parts.append(self.collect_diff(
-                want_children.pop(0), got_children.pop(0), html, indent+2))
-        parts.append(' '*indent)
-        parts.append(self.collect_diff_end_tag(want, got))
-        parts.append('\n')
-        if strip(want.tail) or strip(got.tail):
-            parts.append(' '*indent)
-            parts.append(self.collect_diff_text(want.tail, got.tail))
-            parts.append('\n')
-        return ''.join(parts)
-
-    def collect_diff_tag(self, want, got):
-        if not self.tag_compare(want.tag, got.tag):
-            tag = '%s (got: %s)' % (want.tag, got.tag)
-        else:
-            tag = got.tag
-        attrs = []
-        any = want.tag == 'any' or 'any' in want.attrib
-        for name, value in sorted(got.attrib.items()):
-            if name not in want.attrib and not any:
-                attrs.append('+%s="%s"' % (name, self.format_text(value, False)))
-            else:
-                if name in want.attrib:
-                    text = self.collect_diff_text(want.attrib[name], value, False)
-                else:
-                    text = self.format_text(value, False)
-                attrs.append('%s="%s"' % (name, text))
-        if not any:
-            for name, value in sorted(want.attrib.items()):
-                if name in got.attrib:
-                    continue
-                attrs.append('-%s="%s"' % (name, self.format_text(value, False)))
-        if attrs:
-            tag = '<%s %s>' % (tag, ' '.join(attrs))
-        else:
-            tag = '<%s>' % tag
-        return tag
-
-    def collect_diff_end_tag(self, want, got):
-        if want.tag != got.tag:
-            tag = '%s (got: %s)' % (want.tag, got.tag)
-        else:
-            tag = got.tag
-        return '</%s>' % tag
-
-    def collect_diff_text(self, want, got, strip=True):
-        if self.text_compare(want, got, strip):
-            if not got:
-                return ''
-            return self.format_text(got, strip)
-        text = '%s (got: %s)' % (want, got)
-        return self.format_text(text, strip)
-
-class LHTMLOutputChecker(LXMLOutputChecker):
-    def get_default_parser(self):
-        return html_fromstring
-    
-def install(html=False):
-    """
-    Install doctestcompare for all future doctests.
-
-    If html is true, then by default the HTML parser will be used;
-    otherwise the XML parser is used.
-    """
-    if html:
-        doctest.OutputChecker = LHTMLOutputChecker
-    else:
-        doctest.OutputChecker = LXMLOutputChecker
-
-def temp_install(html=False, del_module=None):
-    """
-    Use this *inside* a doctest to enable this checker for this
-    doctest only.
-
-    If html is true, then by default the HTML parser will be used;
-    otherwise the XML parser is used.
-    """
-    if html:
-        Checker = LHTMLOutputChecker
-    else:
-        Checker = LXMLOutputChecker
-    frame = _find_doctest_frame()
-    dt_self = frame.f_locals['self']
-    checker = Checker()
-    old_checker = dt_self._checker
-    dt_self._checker = checker
-    # The unfortunate thing is that there is a local variable 'check'
-    # in the function that runs the doctests, that is a bound method
-    # into the output checker.  We have to update that.  We can't
-    # modify the frame, so we have to modify the object in place.  The
-    # only way to do this is to actually change the func_code
-    # attribute of the method.  We change it, and then wait for
-    # __record_outcome to be run, which signals the end of the __run
-    # method, at which point we restore the previous check_output
-    # implementation.
-    if _IS_PYTHON_3:
-        check_func = frame.f_locals['check'].__func__
-        checker_check_func = checker.check_output.__func__
-    else:
-        check_func = frame.f_locals['check'].im_func
-        checker_check_func = checker.check_output.im_func
-    # Because we can't patch up func_globals, this is the only global
-    # in check_output that we care about:
-    doctest.etree = etree
-    _RestoreChecker(dt_self, old_checker, checker,
-                    check_func, checker_check_func,
-                    del_module)
-
-class _RestoreChecker(object):
-    def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func,
-                 del_module):
-        self.dt_self = dt_self
-        self.checker = old_checker
-        self.checker._temp_call_super_check_output = self.call_super
-        self.checker._temp_override_self = new_checker
-        self.check_func = check_func
-        self.clone_func = clone_func
-        self.del_module = del_module
-        self.install_clone()
-        self.install_dt_self()
-    def install_clone(self):
-        if _IS_PYTHON_3:
-            self.func_code = self.check_func.__code__
-            self.func_globals = self.check_func.__globals__
-            self.check_func.__code__ = self.clone_func.__code__
-        else:
-            self.func_code = self.check_func.func_code
-            self.func_globals = self.check_func.func_globals
-            self.check_func.func_code = self.clone_func.func_code
-    def uninstall_clone(self):
-        if _IS_PYTHON_3:
-            self.check_func.__code__ = self.func_code
-        else:
-            self.check_func.func_code = self.func_code
-    def install_dt_self(self):
-        self.prev_func = self.dt_self._DocTestRunner__record_outcome
-        self.dt_self._DocTestRunner__record_outcome = self
-    def uninstall_dt_self(self):
-        self.dt_self._DocTestRunner__record_outcome = self.prev_func
-    def uninstall_module(self):
-        if self.del_module:
-            import sys
-            del sys.modules[self.del_module]
-            if '.' in self.del_module:
-                package, module = self.del_module.rsplit('.', 1)
-                package_mod = sys.modules[package]
-                delattr(package_mod, module)
-    def __call__(self, *args, **kw):
-        self.uninstall_clone()
-        self.uninstall_dt_self()
-        del self.checker._temp_override_self
-        del self.checker._temp_call_super_check_output
-        result = self.prev_func(*args, **kw)
-        self.uninstall_module()
-        return result
-    def call_super(self, *args, **kw):
-        self.uninstall_clone()
-        try:
-            return self.check_func(*args, **kw)
-        finally:
-            self.install_clone()
-            
-def _find_doctest_frame():
-    import sys
-    frame = sys._getframe(1)
-    while frame:
-        l = frame.f_locals
-        if 'BOOM' in l:
-            # Sign of doctest
-            return frame
-        frame = frame.f_back
-    raise LookupError(
-        "Could not find doctest (only use this function *inside* a doctest)")
-    
-__test__ = {
-    'basic': '''
-    >>> temp_install()
-    >>> print """<xml a="1" b="2">stuff</xml>"""
-    <xml b="2" a="1">...</xml>
-    >>> print """<xml xmlns="http://example.com"><tag   attr="bar"   /></xml>"""
-    <xml xmlns="...">
-      <tag attr="..." />
-    </xml>
-    >>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS
-    <xml>...foo /></xml>
-    '''}
-
-if __name__ == '__main__':
-    import doctest
-    doctest.testmod()
-    
-