Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/bleach/_vendor/html5lib/filters/optionaltags.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac |
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4f3585e2f14b |
|---|---|
| 1 from __future__ import absolute_import, division, unicode_literals | |
| 2 | |
| 3 from . import base | |
| 4 | |
| 5 | |
| 6 class Filter(base.Filter): | |
| 7 """Removes optional tags from the token stream""" | |
| 8 def slider(self): | |
| 9 previous1 = previous2 = None | |
| 10 for token in self.source: | |
| 11 if previous1 is not None: | |
| 12 yield previous2, previous1, token | |
| 13 previous2 = previous1 | |
| 14 previous1 = token | |
| 15 if previous1 is not None: | |
| 16 yield previous2, previous1, None | |
| 17 | |
| 18 def __iter__(self): | |
| 19 for previous, token, next in self.slider(): | |
| 20 type = token["type"] | |
| 21 if type == "StartTag": | |
| 22 if (token["data"] or | |
| 23 not self.is_optional_start(token["name"], previous, next)): | |
| 24 yield token | |
| 25 elif type == "EndTag": | |
| 26 if not self.is_optional_end(token["name"], next): | |
| 27 yield token | |
| 28 else: | |
| 29 yield token | |
| 30 | |
| 31 def is_optional_start(self, tagname, previous, next): | |
| 32 type = next and next["type"] or None | |
| 33 if tagname in 'html': | |
| 34 # An html element's start tag may be omitted if the first thing | |
| 35 # inside the html element is not a space character or a comment. | |
| 36 return type not in ("Comment", "SpaceCharacters") | |
| 37 elif tagname == 'head': | |
| 38 # A head element's start tag may be omitted if the first thing | |
| 39 # inside the head element is an element. | |
| 40 # XXX: we also omit the start tag if the head element is empty | |
| 41 if type in ("StartTag", "EmptyTag"): | |
| 42 return True | |
| 43 elif type == "EndTag": | |
| 44 return next["name"] == "head" | |
| 45 elif tagname == 'body': | |
| 46 # A body element's start tag may be omitted if the first thing | |
| 47 # inside the body element is not a space character or a comment, | |
| 48 # except if the first thing inside the body element is a script | |
| 49 # or style element and the node immediately preceding the body | |
| 50 # element is a head element whose end tag has been omitted. | |
| 51 if type in ("Comment", "SpaceCharacters"): | |
| 52 return False | |
| 53 elif type == "StartTag": | |
| 54 # XXX: we do not look at the preceding event, so we never omit | |
| 55 # the body element's start tag if it's followed by a script or | |
| 56 # a style element. | |
| 57 return next["name"] not in ('script', 'style') | |
| 58 else: | |
| 59 return True | |
| 60 elif tagname == 'colgroup': | |
| 61 # A colgroup element's start tag may be omitted if the first thing | |
| 62 # inside the colgroup element is a col element, and if the element | |
| 63 # is not immediately preceded by another colgroup element whose | |
| 64 # end tag has been omitted. | |
| 65 if type in ("StartTag", "EmptyTag"): | |
| 66 # XXX: we do not look at the preceding event, so instead we never | |
| 67 # omit the colgroup element's end tag when it is immediately | |
| 68 # followed by another colgroup element. See is_optional_end. | |
| 69 return next["name"] == "col" | |
| 70 else: | |
| 71 return False | |
| 72 elif tagname == 'tbody': | |
| 73 # A tbody element's start tag may be omitted if the first thing | |
| 74 # inside the tbody element is a tr element, and if the element is | |
| 75 # not immediately preceded by a tbody, thead, or tfoot element | |
| 76 # whose end tag has been omitted. | |
| 77 if type == "StartTag": | |
| 78 # omit the thead and tfoot elements' end tag when they are | |
| 79 # immediately followed by a tbody element. See is_optional_end. | |
| 80 if previous and previous['type'] == 'EndTag' and \ | |
| 81 previous['name'] in ('tbody', 'thead', 'tfoot'): | |
| 82 return False | |
| 83 return next["name"] == 'tr' | |
| 84 else: | |
| 85 return False | |
| 86 return False | |
| 87 | |
| 88 def is_optional_end(self, tagname, next): | |
| 89 type = next and next["type"] or None | |
| 90 if tagname in ('html', 'head', 'body'): | |
| 91 # An html element's end tag may be omitted if the html element | |
| 92 # is not immediately followed by a space character or a comment. | |
| 93 return type not in ("Comment", "SpaceCharacters") | |
| 94 elif tagname in ('li', 'optgroup', 'tr'): | |
| 95 # A li element's end tag may be omitted if the li element is | |
| 96 # immediately followed by another li element or if there is | |
| 97 # no more content in the parent element. | |
| 98 # An optgroup element's end tag may be omitted if the optgroup | |
| 99 # element is immediately followed by another optgroup element, | |
| 100 # or if there is no more content in the parent element. | |
| 101 # A tr element's end tag may be omitted if the tr element is | |
| 102 # immediately followed by another tr element, or if there is | |
| 103 # no more content in the parent element. | |
| 104 if type == "StartTag": | |
| 105 return next["name"] == tagname | |
| 106 else: | |
| 107 return type == "EndTag" or type is None | |
| 108 elif tagname in ('dt', 'dd'): | |
| 109 # A dt element's end tag may be omitted if the dt element is | |
| 110 # immediately followed by another dt element or a dd element. | |
| 111 # A dd element's end tag may be omitted if the dd element is | |
| 112 # immediately followed by another dd element or a dt element, | |
| 113 # or if there is no more content in the parent element. | |
| 114 if type == "StartTag": | |
| 115 return next["name"] in ('dt', 'dd') | |
| 116 elif tagname == 'dd': | |
| 117 return type == "EndTag" or type is None | |
| 118 else: | |
| 119 return False | |
| 120 elif tagname == 'p': | |
| 121 # A p element's end tag may be omitted if the p element is | |
| 122 # immediately followed by an address, article, aside, | |
| 123 # blockquote, datagrid, dialog, dir, div, dl, fieldset, | |
| 124 # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu, | |
| 125 # nav, ol, p, pre, section, table, or ul, element, or if | |
| 126 # there is no more content in the parent element. | |
| 127 if type in ("StartTag", "EmptyTag"): | |
| 128 return next["name"] in ('address', 'article', 'aside', | |
| 129 'blockquote', 'datagrid', 'dialog', | |
| 130 'dir', 'div', 'dl', 'fieldset', 'footer', | |
| 131 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', | |
| 132 'header', 'hr', 'menu', 'nav', 'ol', | |
| 133 'p', 'pre', 'section', 'table', 'ul') | |
| 134 else: | |
| 135 return type == "EndTag" or type is None | |
| 136 elif tagname == 'option': | |
| 137 # An option element's end tag may be omitted if the option | |
| 138 # element is immediately followed by another option element, | |
| 139 # or if it is immediately followed by an <code>optgroup</code> | |
| 140 # element, or if there is no more content in the parent | |
| 141 # element. | |
| 142 if type == "StartTag": | |
| 143 return next["name"] in ('option', 'optgroup') | |
| 144 else: | |
| 145 return type == "EndTag" or type is None | |
| 146 elif tagname in ('rt', 'rp'): | |
| 147 # An rt element's end tag may be omitted if the rt element is | |
| 148 # immediately followed by an rt or rp element, or if there is | |
| 149 # no more content in the parent element. | |
| 150 # An rp element's end tag may be omitted if the rp element is | |
| 151 # immediately followed by an rt or rp element, or if there is | |
| 152 # no more content in the parent element. | |
| 153 if type == "StartTag": | |
| 154 return next["name"] in ('rt', 'rp') | |
| 155 else: | |
| 156 return type == "EndTag" or type is None | |
| 157 elif tagname == 'colgroup': | |
| 158 # A colgroup element's end tag may be omitted if the colgroup | |
| 159 # element is not immediately followed by a space character or | |
| 160 # a comment. | |
| 161 if type in ("Comment", "SpaceCharacters"): | |
| 162 return False | |
| 163 elif type == "StartTag": | |
| 164 # XXX: we also look for an immediately following colgroup | |
| 165 # element. See is_optional_start. | |
| 166 return next["name"] != 'colgroup' | |
| 167 else: | |
| 168 return True | |
| 169 elif tagname in ('thead', 'tbody'): | |
| 170 # A thead element's end tag may be omitted if the thead element | |
| 171 # is immediately followed by a tbody or tfoot element. | |
| 172 # A tbody element's end tag may be omitted if the tbody element | |
| 173 # is immediately followed by a tbody or tfoot element, or if | |
| 174 # there is no more content in the parent element. | |
| 175 # A tfoot element's end tag may be omitted if the tfoot element | |
| 176 # is immediately followed by a tbody element, or if there is no | |
| 177 # more content in the parent element. | |
| 178 # XXX: we never omit the end tag when the following element is | |
| 179 # a tbody. See is_optional_start. | |
| 180 if type == "StartTag": | |
| 181 return next["name"] in ['tbody', 'tfoot'] | |
| 182 elif tagname == 'tbody': | |
| 183 return type == "EndTag" or type is None | |
| 184 else: | |
| 185 return False | |
| 186 elif tagname == 'tfoot': | |
| 187 # A tfoot element's end tag may be omitted if the tfoot element | |
| 188 # is immediately followed by a tbody element, or if there is no | |
| 189 # more content in the parent element. | |
| 190 # XXX: we never omit the end tag when the following element is | |
| 191 # a tbody. See is_optional_start. | |
| 192 if type == "StartTag": | |
| 193 return next["name"] == 'tbody' | |
| 194 else: | |
| 195 return type == "EndTag" or type is None | |
| 196 elif tagname in ('td', 'th'): | |
| 197 # A td element's end tag may be omitted if the td element is | |
| 198 # immediately followed by a td or th element, or if there is | |
| 199 # no more content in the parent element. | |
| 200 # A th element's end tag may be omitted if the th element is | |
| 201 # immediately followed by a td or th element, or if there is | |
| 202 # no more content in the parent element. | |
| 203 if type == "StartTag": | |
| 204 return next["name"] in ('td', 'th') | |
| 205 else: | |
| 206 return type == "EndTag" or type is None | |
| 207 return False |
