diff env/lib/python3.9/site-packages/bleach/_vendor/html5lib/filters/optionaltags.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/env/lib/python3.9/site-packages/bleach/_vendor/html5lib/filters/optionaltags.py	Mon Mar 22 18:12:50 2021 +0000
@@ -0,0 +1,207 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from . import base
+
+
+class Filter(base.Filter):
+    """Removes optional tags from the token stream"""
+    def slider(self):
+        previous1 = previous2 = None
+        for token in self.source:
+            if previous1 is not None:
+                yield previous2, previous1, token
+            previous2 = previous1
+            previous1 = token
+        if previous1 is not None:
+            yield previous2, previous1, None
+
+    def __iter__(self):
+        for previous, token, next in self.slider():
+            type = token["type"]
+            if type == "StartTag":
+                if (token["data"] or
+                        not self.is_optional_start(token["name"], previous, next)):
+                    yield token
+            elif type == "EndTag":
+                if not self.is_optional_end(token["name"], next):
+                    yield token
+            else:
+                yield token
+
+    def is_optional_start(self, tagname, previous, next):
+        type = next and next["type"] or None
+        if tagname in 'html':
+            # An html element's start tag may be omitted if the first thing
+            # inside the html element is not a space character or a comment.
+            return type not in ("Comment", "SpaceCharacters")
+        elif tagname == 'head':
+            # A head element's start tag may be omitted if the first thing
+            # inside the head element is an element.
+            # XXX: we also omit the start tag if the head element is empty
+            if type in ("StartTag", "EmptyTag"):
+                return True
+            elif type == "EndTag":
+                return next["name"] == "head"
+        elif tagname == 'body':
+            # A body element's start tag may be omitted if the first thing
+            # inside the body element is not a space character or a comment,
+            # except if the first thing inside the body element is a script
+            # or style element and the node immediately preceding the body
+            # element is a head element whose end tag has been omitted.
+            if type in ("Comment", "SpaceCharacters"):
+                return False
+            elif type == "StartTag":
+                # XXX: we do not look at the preceding event, so we never omit
+                # the body element's start tag if it's followed by a script or
+                # a style element.
+                return next["name"] not in ('script', 'style')
+            else:
+                return True
+        elif tagname == 'colgroup':
+            # A colgroup element's start tag may be omitted if the first thing
+            # inside the colgroup element is a col element, and if the element
+            # is not immediately preceded by another colgroup element whose
+            # end tag has been omitted.
+            if type in ("StartTag", "EmptyTag"):
+                # XXX: we do not look at the preceding event, so instead we never
+                # omit the colgroup element's end tag when it is immediately
+                # followed by another colgroup element. See is_optional_end.
+                return next["name"] == "col"
+            else:
+                return False
+        elif tagname == 'tbody':
+            # A tbody element's start tag may be omitted if the first thing
+            # inside the tbody element is a tr element, and if the element is
+            # not immediately preceded by a tbody, thead, or tfoot element
+            # whose end tag has been omitted.
+            if type == "StartTag":
+                # omit the thead and tfoot elements' end tag when they are
+                # immediately followed by a tbody element. See is_optional_end.
+                if previous and previous['type'] == 'EndTag' and \
+                        previous['name'] in ('tbody', 'thead', 'tfoot'):
+                    return False
+                return next["name"] == 'tr'
+            else:
+                return False
+        return False
+
+    def is_optional_end(self, tagname, next):
+        type = next and next["type"] or None
+        if tagname in ('html', 'head', 'body'):
+            # An html element's end tag may be omitted if the html element
+            # is not immediately followed by a space character or a comment.
+            return type not in ("Comment", "SpaceCharacters")
+        elif tagname in ('li', 'optgroup', 'tr'):
+            # A li element's end tag may be omitted if the li element is
+            # immediately followed by another li element or if there is
+            # no more content in the parent element.
+            # An optgroup element's end tag may be omitted if the optgroup
+            # element is immediately followed by another optgroup element,
+            # or if there is no more content in the parent element.
+            # A tr element's end tag may be omitted if the tr element is
+            # immediately followed by another tr element, or if there is
+            # no more content in the parent element.
+            if type == "StartTag":
+                return next["name"] == tagname
+            else:
+                return type == "EndTag" or type is None
+        elif tagname in ('dt', 'dd'):
+            # A dt element's end tag may be omitted if the dt element is
+            # immediately followed by another dt element or a dd element.
+            # A dd element's end tag may be omitted if the dd element is
+            # immediately followed by another dd element or a dt element,
+            # or if there is no more content in the parent element.
+            if type == "StartTag":
+                return next["name"] in ('dt', 'dd')
+            elif tagname == 'dd':
+                return type == "EndTag" or type is None
+            else:
+                return False
+        elif tagname == 'p':
+            # A p element's end tag may be omitted if the p element is
+            # immediately followed by an address, article, aside,
+            # blockquote, datagrid, dialog, dir, div, dl, fieldset,
+            # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu,
+            # nav, ol, p, pre, section, table, or ul, element, or if
+            # there is no more content in the parent element.
+            if type in ("StartTag", "EmptyTag"):
+                return next["name"] in ('address', 'article', 'aside',
+                                        'blockquote', 'datagrid', 'dialog',
+                                        'dir', 'div', 'dl', 'fieldset', 'footer',
+                                        'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
+                                        'header', 'hr', 'menu', 'nav', 'ol',
+                                        'p', 'pre', 'section', 'table', 'ul')
+            else:
+                return type == "EndTag" or type is None
+        elif tagname == 'option':
+            # An option element's end tag may be omitted if the option
+            # element is immediately followed by another option element,
+            # or if it is immediately followed by an <code>optgroup</code>
+            # element, or if there is no more content in the parent
+            # element.
+            if type == "StartTag":
+                return next["name"] in ('option', 'optgroup')
+            else:
+                return type == "EndTag" or type is None
+        elif tagname in ('rt', 'rp'):
+            # An rt element's end tag may be omitted if the rt element is
+            # immediately followed by an rt or rp element, or if there is
+            # no more content in the parent element.
+            # An rp element's end tag may be omitted if the rp element is
+            # immediately followed by an rt or rp element, or if there is
+            # no more content in the parent element.
+            if type == "StartTag":
+                return next["name"] in ('rt', 'rp')
+            else:
+                return type == "EndTag" or type is None
+        elif tagname == 'colgroup':
+            # A colgroup element's end tag may be omitted if the colgroup
+            # element is not immediately followed by a space character or
+            # a comment.
+            if type in ("Comment", "SpaceCharacters"):
+                return False
+            elif type == "StartTag":
+                # XXX: we also look for an immediately following colgroup
+                # element. See is_optional_start.
+                return next["name"] != 'colgroup'
+            else:
+                return True
+        elif tagname in ('thead', 'tbody'):
+            # A thead element's end tag may be omitted if the thead element
+            # is immediately followed by a tbody or tfoot element.
+            # A tbody element's end tag may be omitted if the tbody element
+            # is immediately followed by a tbody or tfoot element, or if
+            # there is no more content in the parent element.
+            # A tfoot element's end tag may be omitted if the tfoot element
+            # is immediately followed by a tbody element, or if there is no
+            # more content in the parent element.
+            # XXX: we never omit the end tag when the following element is
+            # a tbody. See is_optional_start.
+            if type == "StartTag":
+                return next["name"] in ['tbody', 'tfoot']
+            elif tagname == 'tbody':
+                return type == "EndTag" or type is None
+            else:
+                return False
+        elif tagname == 'tfoot':
+            # A tfoot element's end tag may be omitted if the tfoot element
+            # is immediately followed by a tbody element, or if there is no
+            # more content in the parent element.
+            # XXX: we never omit the end tag when the following element is
+            # a tbody. See is_optional_start.
+            if type == "StartTag":
+                return next["name"] == 'tbody'
+            else:
+                return type == "EndTag" or type is None
+        elif tagname in ('td', 'th'):
+            # A td element's end tag may be omitted if the td element is
+            # immediately followed by a td or th element, or if there is
+            # no more content in the parent element.
+            # A th element's end tag may be omitted if the th element is
+            # immediately followed by a td or th element, or if there is
+            # no more content in the parent element.
+            if type == "StartTag":
+                return next["name"] in ('td', 'th')
+            else:
+                return type == "EndTag" or type is None
+        return False