comparison env/lib/python3.9/site-packages/bleach/_vendor/html5lib/treewalkers/etree_lxml.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 from __future__ import absolute_import, division, unicode_literals
2 from six import text_type
3
4 from collections import OrderedDict
5
6 from lxml import etree
7 from ..treebuilders.etree import tag_regexp
8
9 from . import base
10
11 from .. import _ihatexml
12
13
14 def ensure_str(s):
15 if s is None:
16 return None
17 elif isinstance(s, text_type):
18 return s
19 else:
20 return s.decode("ascii", "strict")
21
22
23 class Root(object):
24 def __init__(self, et):
25 self.elementtree = et
26 self.children = []
27
28 try:
29 if et.docinfo.internalDTD:
30 self.children.append(Doctype(self,
31 ensure_str(et.docinfo.root_name),
32 ensure_str(et.docinfo.public_id),
33 ensure_str(et.docinfo.system_url)))
34 except AttributeError:
35 pass
36
37 try:
38 node = et.getroot()
39 except AttributeError:
40 node = et
41
42 while node.getprevious() is not None:
43 node = node.getprevious()
44 while node is not None:
45 self.children.append(node)
46 node = node.getnext()
47
48 self.text = None
49 self.tail = None
50
51 def __getitem__(self, key):
52 return self.children[key]
53
54 def getnext(self):
55 return None
56
57 def __len__(self):
58 return 1
59
60
61 class Doctype(object):
62 def __init__(self, root_node, name, public_id, system_id):
63 self.root_node = root_node
64 self.name = name
65 self.public_id = public_id
66 self.system_id = system_id
67
68 self.text = None
69 self.tail = None
70
71 def getnext(self):
72 return self.root_node.children[1]
73
74
75 class FragmentRoot(Root):
76 def __init__(self, children):
77 self.children = [FragmentWrapper(self, child) for child in children]
78 self.text = self.tail = None
79
80 def getnext(self):
81 return None
82
83
84 class FragmentWrapper(object):
85 def __init__(self, fragment_root, obj):
86 self.root_node = fragment_root
87 self.obj = obj
88 if hasattr(self.obj, 'text'):
89 self.text = ensure_str(self.obj.text)
90 else:
91 self.text = None
92 if hasattr(self.obj, 'tail'):
93 self.tail = ensure_str(self.obj.tail)
94 else:
95 self.tail = None
96
97 def __getattr__(self, name):
98 return getattr(self.obj, name)
99
100 def getnext(self):
101 siblings = self.root_node.children
102 idx = siblings.index(self)
103 if idx < len(siblings) - 1:
104 return siblings[idx + 1]
105 else:
106 return None
107
108 def __getitem__(self, key):
109 return self.obj[key]
110
111 def __bool__(self):
112 return bool(self.obj)
113
114 def getparent(self):
115 return None
116
117 def __str__(self):
118 return str(self.obj)
119
120 def __unicode__(self):
121 return str(self.obj)
122
123 def __len__(self):
124 return len(self.obj)
125
126
127 class TreeWalker(base.NonRecursiveTreeWalker):
128 def __init__(self, tree):
129 # pylint:disable=redefined-variable-type
130 if isinstance(tree, list):
131 self.fragmentChildren = set(tree)
132 tree = FragmentRoot(tree)
133 else:
134 self.fragmentChildren = set()
135 tree = Root(tree)
136 base.NonRecursiveTreeWalker.__init__(self, tree)
137 self.filter = _ihatexml.InfosetFilter()
138
139 def getNodeDetails(self, node):
140 if isinstance(node, tuple): # Text node
141 node, key = node
142 assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
143 return base.TEXT, ensure_str(getattr(node, key))
144
145 elif isinstance(node, Root):
146 return (base.DOCUMENT,)
147
148 elif isinstance(node, Doctype):
149 return base.DOCTYPE, node.name, node.public_id, node.system_id
150
151 elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"):
152 return base.TEXT, ensure_str(node.obj)
153
154 elif node.tag == etree.Comment:
155 return base.COMMENT, ensure_str(node.text)
156
157 elif node.tag == etree.Entity:
158 return base.ENTITY, ensure_str(node.text)[1:-1] # strip &;
159
160 else:
161 # This is assumed to be an ordinary element
162 match = tag_regexp.match(ensure_str(node.tag))
163 if match:
164 namespace, tag = match.groups()
165 else:
166 namespace = None
167 tag = ensure_str(node.tag)
168 attrs = OrderedDict()
169 for name, value in list(node.attrib.items()):
170 name = ensure_str(name)
171 value = ensure_str(value)
172 match = tag_regexp.match(name)
173 if match:
174 attrs[(match.group(1), match.group(2))] = value
175 else:
176 attrs[(None, name)] = value
177 return (base.ELEMENT, namespace, self.filter.fromXmlName(tag),
178 attrs, len(node) > 0 or node.text)
179
180 def getFirstChild(self, node):
181 assert not isinstance(node, tuple), "Text nodes have no children"
182
183 assert len(node) or node.text, "Node has no children"
184 if node.text:
185 return (node, "text")
186 else:
187 return node[0]
188
189 def getNextSibling(self, node):
190 if isinstance(node, tuple): # Text node
191 node, key = node
192 assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
193 if key == "text":
194 # XXX: we cannot use a "bool(node) and node[0] or None" construct here
195 # because node[0] might evaluate to False if it has no child element
196 if len(node):
197 return node[0]
198 else:
199 return None
200 else: # tail
201 return node.getnext()
202
203 return (node, "tail") if node.tail else node.getnext()
204
205 def getParentNode(self, node):
206 if isinstance(node, tuple): # Text node
207 node, key = node
208 assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
209 if key == "text":
210 return node
211 # else: fallback to "normal" processing
212 elif node in self.fragmentChildren:
213 return None
214
215 return node.getparent()