comparison env/lib/python3.9/site-packages/bleach/_vendor/html5lib/filters/optionaltags.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 from __future__ import absolute_import, division, unicode_literals
2
3 from . import base
4
5
6 class Filter(base.Filter):
7 """Removes optional tags from the token stream"""
8 def slider(self):
9 previous1 = previous2 = None
10 for token in self.source:
11 if previous1 is not None:
12 yield previous2, previous1, token
13 previous2 = previous1
14 previous1 = token
15 if previous1 is not None:
16 yield previous2, previous1, None
17
18 def __iter__(self):
19 for previous, token, next in self.slider():
20 type = token["type"]
21 if type == "StartTag":
22 if (token["data"] or
23 not self.is_optional_start(token["name"], previous, next)):
24 yield token
25 elif type == "EndTag":
26 if not self.is_optional_end(token["name"], next):
27 yield token
28 else:
29 yield token
30
31 def is_optional_start(self, tagname, previous, next):
32 type = next and next["type"] or None
33 if tagname in 'html':
34 # An html element's start tag may be omitted if the first thing
35 # inside the html element is not a space character or a comment.
36 return type not in ("Comment", "SpaceCharacters")
37 elif tagname == 'head':
38 # A head element's start tag may be omitted if the first thing
39 # inside the head element is an element.
40 # XXX: we also omit the start tag if the head element is empty
41 if type in ("StartTag", "EmptyTag"):
42 return True
43 elif type == "EndTag":
44 return next["name"] == "head"
45 elif tagname == 'body':
46 # A body element's start tag may be omitted if the first thing
47 # inside the body element is not a space character or a comment,
48 # except if the first thing inside the body element is a script
49 # or style element and the node immediately preceding the body
50 # element is a head element whose end tag has been omitted.
51 if type in ("Comment", "SpaceCharacters"):
52 return False
53 elif type == "StartTag":
54 # XXX: we do not look at the preceding event, so we never omit
55 # the body element's start tag if it's followed by a script or
56 # a style element.
57 return next["name"] not in ('script', 'style')
58 else:
59 return True
60 elif tagname == 'colgroup':
61 # A colgroup element's start tag may be omitted if the first thing
62 # inside the colgroup element is a col element, and if the element
63 # is not immediately preceded by another colgroup element whose
64 # end tag has been omitted.
65 if type in ("StartTag", "EmptyTag"):
66 # XXX: we do not look at the preceding event, so instead we never
67 # omit the colgroup element's end tag when it is immediately
68 # followed by another colgroup element. See is_optional_end.
69 return next["name"] == "col"
70 else:
71 return False
72 elif tagname == 'tbody':
73 # A tbody element's start tag may be omitted if the first thing
74 # inside the tbody element is a tr element, and if the element is
75 # not immediately preceded by a tbody, thead, or tfoot element
76 # whose end tag has been omitted.
77 if type == "StartTag":
78 # omit the thead and tfoot elements' end tag when they are
79 # immediately followed by a tbody element. See is_optional_end.
80 if previous and previous['type'] == 'EndTag' and \
81 previous['name'] in ('tbody', 'thead', 'tfoot'):
82 return False
83 return next["name"] == 'tr'
84 else:
85 return False
86 return False
87
88 def is_optional_end(self, tagname, next):
89 type = next and next["type"] or None
90 if tagname in ('html', 'head', 'body'):
91 # An html element's end tag may be omitted if the html element
92 # is not immediately followed by a space character or a comment.
93 return type not in ("Comment", "SpaceCharacters")
94 elif tagname in ('li', 'optgroup', 'tr'):
95 # A li element's end tag may be omitted if the li element is
96 # immediately followed by another li element or if there is
97 # no more content in the parent element.
98 # An optgroup element's end tag may be omitted if the optgroup
99 # element is immediately followed by another optgroup element,
100 # or if there is no more content in the parent element.
101 # A tr element's end tag may be omitted if the tr element is
102 # immediately followed by another tr element, or if there is
103 # no more content in the parent element.
104 if type == "StartTag":
105 return next["name"] == tagname
106 else:
107 return type == "EndTag" or type is None
108 elif tagname in ('dt', 'dd'):
109 # A dt element's end tag may be omitted if the dt element is
110 # immediately followed by another dt element or a dd element.
111 # A dd element's end tag may be omitted if the dd element is
112 # immediately followed by another dd element or a dt element,
113 # or if there is no more content in the parent element.
114 if type == "StartTag":
115 return next["name"] in ('dt', 'dd')
116 elif tagname == 'dd':
117 return type == "EndTag" or type is None
118 else:
119 return False
120 elif tagname == 'p':
121 # A p element's end tag may be omitted if the p element is
122 # immediately followed by an address, article, aside,
123 # blockquote, datagrid, dialog, dir, div, dl, fieldset,
124 # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu,
125 # nav, ol, p, pre, section, table, or ul, element, or if
126 # there is no more content in the parent element.
127 if type in ("StartTag", "EmptyTag"):
128 return next["name"] in ('address', 'article', 'aside',
129 'blockquote', 'datagrid', 'dialog',
130 'dir', 'div', 'dl', 'fieldset', 'footer',
131 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
132 'header', 'hr', 'menu', 'nav', 'ol',
133 'p', 'pre', 'section', 'table', 'ul')
134 else:
135 return type == "EndTag" or type is None
136 elif tagname == 'option':
137 # An option element's end tag may be omitted if the option
138 # element is immediately followed by another option element,
139 # or if it is immediately followed by an <code>optgroup</code>
140 # element, or if there is no more content in the parent
141 # element.
142 if type == "StartTag":
143 return next["name"] in ('option', 'optgroup')
144 else:
145 return type == "EndTag" or type is None
146 elif tagname in ('rt', 'rp'):
147 # An rt element's end tag may be omitted if the rt element is
148 # immediately followed by an rt or rp element, or if there is
149 # no more content in the parent element.
150 # An rp element's end tag may be omitted if the rp element is
151 # immediately followed by an rt or rp element, or if there is
152 # no more content in the parent element.
153 if type == "StartTag":
154 return next["name"] in ('rt', 'rp')
155 else:
156 return type == "EndTag" or type is None
157 elif tagname == 'colgroup':
158 # A colgroup element's end tag may be omitted if the colgroup
159 # element is not immediately followed by a space character or
160 # a comment.
161 if type in ("Comment", "SpaceCharacters"):
162 return False
163 elif type == "StartTag":
164 # XXX: we also look for an immediately following colgroup
165 # element. See is_optional_start.
166 return next["name"] != 'colgroup'
167 else:
168 return True
169 elif tagname in ('thead', 'tbody'):
170 # A thead element's end tag may be omitted if the thead element
171 # is immediately followed by a tbody or tfoot element.
172 # A tbody element's end tag may be omitted if the tbody element
173 # is immediately followed by a tbody or tfoot element, or if
174 # there is no more content in the parent element.
175 # A tfoot element's end tag may be omitted if the tfoot element
176 # is immediately followed by a tbody element, or if there is no
177 # more content in the parent element.
178 # XXX: we never omit the end tag when the following element is
179 # a tbody. See is_optional_start.
180 if type == "StartTag":
181 return next["name"] in ['tbody', 'tfoot']
182 elif tagname == 'tbody':
183 return type == "EndTag" or type is None
184 else:
185 return False
186 elif tagname == 'tfoot':
187 # A tfoot element's end tag may be omitted if the tfoot element
188 # is immediately followed by a tbody element, or if there is no
189 # more content in the parent element.
190 # XXX: we never omit the end tag when the following element is
191 # a tbody. See is_optional_start.
192 if type == "StartTag":
193 return next["name"] == 'tbody'
194 else:
195 return type == "EndTag" or type is None
196 elif tagname in ('td', 'th'):
197 # A td element's end tag may be omitted if the td element is
198 # immediately followed by a td or th element, or if there is
199 # no more content in the parent element.
200 # A th element's end tag may be omitted if the th element is
201 # immediately followed by a td or th element, or if there is
202 # no more content in the parent element.
203 if type == "StartTag":
204 return next["name"] in ('td', 'th')
205 else:
206 return type == "EndTag" or type is None
207 return False