Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/bleach/_vendor/html5lib/filters/optionaltags.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 from __future__ import absolute_import, division, unicode_literals | |
2 | |
3 from . import base | |
4 | |
5 | |
6 class Filter(base.Filter): | |
7 """Removes optional tags from the token stream""" | |
8 def slider(self): | |
9 previous1 = previous2 = None | |
10 for token in self.source: | |
11 if previous1 is not None: | |
12 yield previous2, previous1, token | |
13 previous2 = previous1 | |
14 previous1 = token | |
15 if previous1 is not None: | |
16 yield previous2, previous1, None | |
17 | |
18 def __iter__(self): | |
19 for previous, token, next in self.slider(): | |
20 type = token["type"] | |
21 if type == "StartTag": | |
22 if (token["data"] or | |
23 not self.is_optional_start(token["name"], previous, next)): | |
24 yield token | |
25 elif type == "EndTag": | |
26 if not self.is_optional_end(token["name"], next): | |
27 yield token | |
28 else: | |
29 yield token | |
30 | |
31 def is_optional_start(self, tagname, previous, next): | |
32 type = next and next["type"] or None | |
33 if tagname in 'html': | |
34 # An html element's start tag may be omitted if the first thing | |
35 # inside the html element is not a space character or a comment. | |
36 return type not in ("Comment", "SpaceCharacters") | |
37 elif tagname == 'head': | |
38 # A head element's start tag may be omitted if the first thing | |
39 # inside the head element is an element. | |
40 # XXX: we also omit the start tag if the head element is empty | |
41 if type in ("StartTag", "EmptyTag"): | |
42 return True | |
43 elif type == "EndTag": | |
44 return next["name"] == "head" | |
45 elif tagname == 'body': | |
46 # A body element's start tag may be omitted if the first thing | |
47 # inside the body element is not a space character or a comment, | |
48 # except if the first thing inside the body element is a script | |
49 # or style element and the node immediately preceding the body | |
50 # element is a head element whose end tag has been omitted. | |
51 if type in ("Comment", "SpaceCharacters"): | |
52 return False | |
53 elif type == "StartTag": | |
54 # XXX: we do not look at the preceding event, so we never omit | |
55 # the body element's start tag if it's followed by a script or | |
56 # a style element. | |
57 return next["name"] not in ('script', 'style') | |
58 else: | |
59 return True | |
60 elif tagname == 'colgroup': | |
61 # A colgroup element's start tag may be omitted if the first thing | |
62 # inside the colgroup element is a col element, and if the element | |
63 # is not immediately preceded by another colgroup element whose | |
64 # end tag has been omitted. | |
65 if type in ("StartTag", "EmptyTag"): | |
66 # XXX: we do not look at the preceding event, so instead we never | |
67 # omit the colgroup element's end tag when it is immediately | |
68 # followed by another colgroup element. See is_optional_end. | |
69 return next["name"] == "col" | |
70 else: | |
71 return False | |
72 elif tagname == 'tbody': | |
73 # A tbody element's start tag may be omitted if the first thing | |
74 # inside the tbody element is a tr element, and if the element is | |
75 # not immediately preceded by a tbody, thead, or tfoot element | |
76 # whose end tag has been omitted. | |
77 if type == "StartTag": | |
78 # omit the thead and tfoot elements' end tag when they are | |
79 # immediately followed by a tbody element. See is_optional_end. | |
80 if previous and previous['type'] == 'EndTag' and \ | |
81 previous['name'] in ('tbody', 'thead', 'tfoot'): | |
82 return False | |
83 return next["name"] == 'tr' | |
84 else: | |
85 return False | |
86 return False | |
87 | |
88 def is_optional_end(self, tagname, next): | |
89 type = next and next["type"] or None | |
90 if tagname in ('html', 'head', 'body'): | |
91 # An html element's end tag may be omitted if the html element | |
92 # is not immediately followed by a space character or a comment. | |
93 return type not in ("Comment", "SpaceCharacters") | |
94 elif tagname in ('li', 'optgroup', 'tr'): | |
95 # A li element's end tag may be omitted if the li element is | |
96 # immediately followed by another li element or if there is | |
97 # no more content in the parent element. | |
98 # An optgroup element's end tag may be omitted if the optgroup | |
99 # element is immediately followed by another optgroup element, | |
100 # or if there is no more content in the parent element. | |
101 # A tr element's end tag may be omitted if the tr element is | |
102 # immediately followed by another tr element, or if there is | |
103 # no more content in the parent element. | |
104 if type == "StartTag": | |
105 return next["name"] == tagname | |
106 else: | |
107 return type == "EndTag" or type is None | |
108 elif tagname in ('dt', 'dd'): | |
109 # A dt element's end tag may be omitted if the dt element is | |
110 # immediately followed by another dt element or a dd element. | |
111 # A dd element's end tag may be omitted if the dd element is | |
112 # immediately followed by another dd element or a dt element, | |
113 # or if there is no more content in the parent element. | |
114 if type == "StartTag": | |
115 return next["name"] in ('dt', 'dd') | |
116 elif tagname == 'dd': | |
117 return type == "EndTag" or type is None | |
118 else: | |
119 return False | |
120 elif tagname == 'p': | |
121 # A p element's end tag may be omitted if the p element is | |
122 # immediately followed by an address, article, aside, | |
123 # blockquote, datagrid, dialog, dir, div, dl, fieldset, | |
124 # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu, | |
125 # nav, ol, p, pre, section, table, or ul, element, or if | |
126 # there is no more content in the parent element. | |
127 if type in ("StartTag", "EmptyTag"): | |
128 return next["name"] in ('address', 'article', 'aside', | |
129 'blockquote', 'datagrid', 'dialog', | |
130 'dir', 'div', 'dl', 'fieldset', 'footer', | |
131 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', | |
132 'header', 'hr', 'menu', 'nav', 'ol', | |
133 'p', 'pre', 'section', 'table', 'ul') | |
134 else: | |
135 return type == "EndTag" or type is None | |
136 elif tagname == 'option': | |
137 # An option element's end tag may be omitted if the option | |
138 # element is immediately followed by another option element, | |
139 # or if it is immediately followed by an <code>optgroup</code> | |
140 # element, or if there is no more content in the parent | |
141 # element. | |
142 if type == "StartTag": | |
143 return next["name"] in ('option', 'optgroup') | |
144 else: | |
145 return type == "EndTag" or type is None | |
146 elif tagname in ('rt', 'rp'): | |
147 # An rt element's end tag may be omitted if the rt element is | |
148 # immediately followed by an rt or rp element, or if there is | |
149 # no more content in the parent element. | |
150 # An rp element's end tag may be omitted if the rp element is | |
151 # immediately followed by an rt or rp element, or if there is | |
152 # no more content in the parent element. | |
153 if type == "StartTag": | |
154 return next["name"] in ('rt', 'rp') | |
155 else: | |
156 return type == "EndTag" or type is None | |
157 elif tagname == 'colgroup': | |
158 # A colgroup element's end tag may be omitted if the colgroup | |
159 # element is not immediately followed by a space character or | |
160 # a comment. | |
161 if type in ("Comment", "SpaceCharacters"): | |
162 return False | |
163 elif type == "StartTag": | |
164 # XXX: we also look for an immediately following colgroup | |
165 # element. See is_optional_start. | |
166 return next["name"] != 'colgroup' | |
167 else: | |
168 return True | |
169 elif tagname in ('thead', 'tbody'): | |
170 # A thead element's end tag may be omitted if the thead element | |
171 # is immediately followed by a tbody or tfoot element. | |
172 # A tbody element's end tag may be omitted if the tbody element | |
173 # is immediately followed by a tbody or tfoot element, or if | |
174 # there is no more content in the parent element. | |
175 # A tfoot element's end tag may be omitted if the tfoot element | |
176 # is immediately followed by a tbody element, or if there is no | |
177 # more content in the parent element. | |
178 # XXX: we never omit the end tag when the following element is | |
179 # a tbody. See is_optional_start. | |
180 if type == "StartTag": | |
181 return next["name"] in ['tbody', 'tfoot'] | |
182 elif tagname == 'tbody': | |
183 return type == "EndTag" or type is None | |
184 else: | |
185 return False | |
186 elif tagname == 'tfoot': | |
187 # A tfoot element's end tag may be omitted if the tfoot element | |
188 # is immediately followed by a tbody element, or if there is no | |
189 # more content in the parent element. | |
190 # XXX: we never omit the end tag when the following element is | |
191 # a tbody. See is_optional_start. | |
192 if type == "StartTag": | |
193 return next["name"] == 'tbody' | |
194 else: | |
195 return type == "EndTag" or type is None | |
196 elif tagname in ('td', 'th'): | |
197 # A td element's end tag may be omitted if the td element is | |
198 # immediately followed by a td or th element, or if there is | |
199 # no more content in the parent element. | |
200 # A th element's end tag may be omitted if the th element is | |
201 # immediately followed by a td or th element, or if there is | |
202 # no more content in the parent element. | |
203 if type == "StartTag": | |
204 return next["name"] in ('td', 'th') | |
205 else: | |
206 return type == "EndTag" or type is None | |
207 return False |