Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/bleach/_vendor/html5lib/treewalkers/etree_lxml.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 from __future__ import absolute_import, division, unicode_literals | |
2 from six import text_type | |
3 | |
4 from collections import OrderedDict | |
5 | |
6 from lxml import etree | |
7 from ..treebuilders.etree import tag_regexp | |
8 | |
9 from . import base | |
10 | |
11 from .. import _ihatexml | |
12 | |
13 | |
14 def ensure_str(s): | |
15 if s is None: | |
16 return None | |
17 elif isinstance(s, text_type): | |
18 return s | |
19 else: | |
20 return s.decode("ascii", "strict") | |
21 | |
22 | |
23 class Root(object): | |
24 def __init__(self, et): | |
25 self.elementtree = et | |
26 self.children = [] | |
27 | |
28 try: | |
29 if et.docinfo.internalDTD: | |
30 self.children.append(Doctype(self, | |
31 ensure_str(et.docinfo.root_name), | |
32 ensure_str(et.docinfo.public_id), | |
33 ensure_str(et.docinfo.system_url))) | |
34 except AttributeError: | |
35 pass | |
36 | |
37 try: | |
38 node = et.getroot() | |
39 except AttributeError: | |
40 node = et | |
41 | |
42 while node.getprevious() is not None: | |
43 node = node.getprevious() | |
44 while node is not None: | |
45 self.children.append(node) | |
46 node = node.getnext() | |
47 | |
48 self.text = None | |
49 self.tail = None | |
50 | |
51 def __getitem__(self, key): | |
52 return self.children[key] | |
53 | |
54 def getnext(self): | |
55 return None | |
56 | |
57 def __len__(self): | |
58 return 1 | |
59 | |
60 | |
61 class Doctype(object): | |
62 def __init__(self, root_node, name, public_id, system_id): | |
63 self.root_node = root_node | |
64 self.name = name | |
65 self.public_id = public_id | |
66 self.system_id = system_id | |
67 | |
68 self.text = None | |
69 self.tail = None | |
70 | |
71 def getnext(self): | |
72 return self.root_node.children[1] | |
73 | |
74 | |
75 class FragmentRoot(Root): | |
76 def __init__(self, children): | |
77 self.children = [FragmentWrapper(self, child) for child in children] | |
78 self.text = self.tail = None | |
79 | |
80 def getnext(self): | |
81 return None | |
82 | |
83 | |
84 class FragmentWrapper(object): | |
85 def __init__(self, fragment_root, obj): | |
86 self.root_node = fragment_root | |
87 self.obj = obj | |
88 if hasattr(self.obj, 'text'): | |
89 self.text = ensure_str(self.obj.text) | |
90 else: | |
91 self.text = None | |
92 if hasattr(self.obj, 'tail'): | |
93 self.tail = ensure_str(self.obj.tail) | |
94 else: | |
95 self.tail = None | |
96 | |
97 def __getattr__(self, name): | |
98 return getattr(self.obj, name) | |
99 | |
100 def getnext(self): | |
101 siblings = self.root_node.children | |
102 idx = siblings.index(self) | |
103 if idx < len(siblings) - 1: | |
104 return siblings[idx + 1] | |
105 else: | |
106 return None | |
107 | |
108 def __getitem__(self, key): | |
109 return self.obj[key] | |
110 | |
111 def __bool__(self): | |
112 return bool(self.obj) | |
113 | |
114 def getparent(self): | |
115 return None | |
116 | |
117 def __str__(self): | |
118 return str(self.obj) | |
119 | |
120 def __unicode__(self): | |
121 return str(self.obj) | |
122 | |
123 def __len__(self): | |
124 return len(self.obj) | |
125 | |
126 | |
127 class TreeWalker(base.NonRecursiveTreeWalker): | |
128 def __init__(self, tree): | |
129 # pylint:disable=redefined-variable-type | |
130 if isinstance(tree, list): | |
131 self.fragmentChildren = set(tree) | |
132 tree = FragmentRoot(tree) | |
133 else: | |
134 self.fragmentChildren = set() | |
135 tree = Root(tree) | |
136 base.NonRecursiveTreeWalker.__init__(self, tree) | |
137 self.filter = _ihatexml.InfosetFilter() | |
138 | |
139 def getNodeDetails(self, node): | |
140 if isinstance(node, tuple): # Text node | |
141 node, key = node | |
142 assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key | |
143 return base.TEXT, ensure_str(getattr(node, key)) | |
144 | |
145 elif isinstance(node, Root): | |
146 return (base.DOCUMENT,) | |
147 | |
148 elif isinstance(node, Doctype): | |
149 return base.DOCTYPE, node.name, node.public_id, node.system_id | |
150 | |
151 elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): | |
152 return base.TEXT, ensure_str(node.obj) | |
153 | |
154 elif node.tag == etree.Comment: | |
155 return base.COMMENT, ensure_str(node.text) | |
156 | |
157 elif node.tag == etree.Entity: | |
158 return base.ENTITY, ensure_str(node.text)[1:-1] # strip &; | |
159 | |
160 else: | |
161 # This is assumed to be an ordinary element | |
162 match = tag_regexp.match(ensure_str(node.tag)) | |
163 if match: | |
164 namespace, tag = match.groups() | |
165 else: | |
166 namespace = None | |
167 tag = ensure_str(node.tag) | |
168 attrs = OrderedDict() | |
169 for name, value in list(node.attrib.items()): | |
170 name = ensure_str(name) | |
171 value = ensure_str(value) | |
172 match = tag_regexp.match(name) | |
173 if match: | |
174 attrs[(match.group(1), match.group(2))] = value | |
175 else: | |
176 attrs[(None, name)] = value | |
177 return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), | |
178 attrs, len(node) > 0 or node.text) | |
179 | |
180 def getFirstChild(self, node): | |
181 assert not isinstance(node, tuple), "Text nodes have no children" | |
182 | |
183 assert len(node) or node.text, "Node has no children" | |
184 if node.text: | |
185 return (node, "text") | |
186 else: | |
187 return node[0] | |
188 | |
189 def getNextSibling(self, node): | |
190 if isinstance(node, tuple): # Text node | |
191 node, key = node | |
192 assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key | |
193 if key == "text": | |
194 # XXX: we cannot use a "bool(node) and node[0] or None" construct here | |
195 # because node[0] might evaluate to False if it has no child element | |
196 if len(node): | |
197 return node[0] | |
198 else: | |
199 return None | |
200 else: # tail | |
201 return node.getnext() | |
202 | |
203 return (node, "tail") if node.tail else node.getnext() | |
204 | |
205 def getParentNode(self, node): | |
206 if isinstance(node, tuple): # Text node | |
207 node, key = node | |
208 assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key | |
209 if key == "text": | |
210 return node | |
211 # else: fallback to "normal" processing | |
212 elif node in self.fragmentChildren: | |
213 return None | |
214 | |
215 return node.getparent() |