comparison env/lib/python3.9/site-packages/lxml/_elementpath.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # cython: language_level=2
2
3 #
4 # ElementTree
5 # $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
6 #
7 # limited xpath support for element trees
8 #
9 # history:
10 # 2003-05-23 fl created
11 # 2003-05-28 fl added support for // etc
12 # 2003-08-27 fl fixed parsing of periods in element names
13 # 2007-09-10 fl new selection engine
14 # 2007-09-12 fl fixed parent selector
15 # 2007-09-13 fl added iterfind; changed findall to return a list
16 # 2007-11-30 fl added namespaces support
17 # 2009-10-30 fl added child element value filter
18 #
19 # Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
20 #
21 # fredrik@pythonware.com
22 # http://www.pythonware.com
23 #
24 # --------------------------------------------------------------------
25 # The ElementTree toolkit is
26 #
27 # Copyright (c) 1999-2009 by Fredrik Lundh
28 #
29 # By obtaining, using, and/or copying this software and/or its
30 # associated documentation, you agree that you have read, understood,
31 # and will comply with the following terms and conditions:
32 #
33 # Permission to use, copy, modify, and distribute this software and
34 # its associated documentation for any purpose and without fee is
35 # hereby granted, provided that the above copyright notice appears in
36 # all copies, and that both that copyright notice and this permission
37 # notice appear in supporting documentation, and that the name of
38 # Secret Labs AB or the author not be used in advertising or publicity
39 # pertaining to distribution of the software without specific, written
40 # prior permission.
41 #
42 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
43 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
44 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
45 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
46 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
47 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
48 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
49 # OF THIS SOFTWARE.
50 # --------------------------------------------------------------------
51
52 ##
53 # Implementation module for XPath support. There's usually no reason
54 # to import this module directly; the <b>ElementTree</b> does this for
55 # you, if needed.
56 ##
57
58 from __future__ import absolute_import
59
60 import re
61
62 xpath_tokenizer_re = re.compile(
63 "("
64 "'[^']*'|\"[^\"]*\"|"
65 "::|"
66 "//?|"
67 r"\.\.|"
68 r"\(\)|"
69 r"[/.*:\[\]\(\)@=])|"
70 r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
71 r"\s+"
72 )
73
74 def xpath_tokenizer(pattern, namespaces=None):
75 # ElementTree uses '', lxml used None originally.
76 default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None
77 parsing_attribute = False
78 for token in xpath_tokenizer_re.findall(pattern):
79 ttype, tag = token
80 if tag and tag[0] != "{":
81 if ":" in tag:
82 prefix, uri = tag.split(":", 1)
83 try:
84 if not namespaces:
85 raise KeyError
86 yield ttype, "{%s}%s" % (namespaces[prefix], uri)
87 except KeyError:
88 raise SyntaxError("prefix %r not found in prefix map" % prefix)
89 elif default_namespace and not parsing_attribute:
90 yield ttype, "{%s}%s" % (default_namespace, tag)
91 else:
92 yield token
93 parsing_attribute = False
94 else:
95 yield token
96 parsing_attribute = ttype == '@'
97
98
99 def prepare_child(next, token):
100 tag = token[1]
101 def select(result):
102 for elem in result:
103 for e in elem.iterchildren(tag):
104 yield e
105 return select
106
107 def prepare_star(next, token):
108 def select(result):
109 for elem in result:
110 for e in elem.iterchildren('*'):
111 yield e
112 return select
113
114 def prepare_self(next, token):
115 def select(result):
116 return result
117 return select
118
119 def prepare_descendant(next, token):
120 token = next()
121 if token[0] == "*":
122 tag = "*"
123 elif not token[0]:
124 tag = token[1]
125 else:
126 raise SyntaxError("invalid descendant")
127 def select(result):
128 for elem in result:
129 for e in elem.iterdescendants(tag):
130 yield e
131 return select
132
133 def prepare_parent(next, token):
134 def select(result):
135 for elem in result:
136 parent = elem.getparent()
137 if parent is not None:
138 yield parent
139 return select
140
141 def prepare_predicate(next, token):
142 # FIXME: replace with real parser!!! refs:
143 # http://effbot.org/zone/simple-iterator-parser.htm
144 # http://javascript.crockford.com/tdop/tdop.html
145 signature = ''
146 predicate = []
147 while 1:
148 token = next()
149 if token[0] == "]":
150 break
151 if token == ('', ''):
152 # ignore whitespace
153 continue
154 if token[0] and token[0][:1] in "'\"":
155 token = "'", token[0][1:-1]
156 signature += token[0] or "-"
157 predicate.append(token[1])
158
159 # use signature to determine predicate type
160 if signature == "@-":
161 # [@attribute] predicate
162 key = predicate[1]
163 def select(result):
164 for elem in result:
165 if elem.get(key) is not None:
166 yield elem
167 return select
168 if signature == "@-='":
169 # [@attribute='value']
170 key = predicate[1]
171 value = predicate[-1]
172 def select(result):
173 for elem in result:
174 if elem.get(key) == value:
175 yield elem
176 return select
177 if signature == "-" and not re.match(r"-?\d+$", predicate[0]):
178 # [tag]
179 tag = predicate[0]
180 def select(result):
181 for elem in result:
182 for _ in elem.iterchildren(tag):
183 yield elem
184 break
185 return select
186 if signature == ".='" or (signature == "-='" and not re.match(r"-?\d+$", predicate[0])):
187 # [.='value'] or [tag='value']
188 tag = predicate[0]
189 value = predicate[-1]
190 if tag:
191 def select(result):
192 for elem in result:
193 for e in elem.iterchildren(tag):
194 if "".join(e.itertext()) == value:
195 yield elem
196 break
197 else:
198 def select(result):
199 for elem in result:
200 if "".join(elem.itertext()) == value:
201 yield elem
202 return select
203 if signature == "-" or signature == "-()" or signature == "-()-":
204 # [index] or [last()] or [last()-index]
205 if signature == "-":
206 # [index]
207 index = int(predicate[0]) - 1
208 if index < 0:
209 if index == -1:
210 raise SyntaxError(
211 "indices in path predicates are 1-based, not 0-based")
212 else:
213 raise SyntaxError("path index >= 1 expected")
214 else:
215 if predicate[0] != "last":
216 raise SyntaxError("unsupported function")
217 if signature == "-()-":
218 try:
219 index = int(predicate[2]) - 1
220 except ValueError:
221 raise SyntaxError("unsupported expression")
222 else:
223 index = -1
224 def select(result):
225 for elem in result:
226 parent = elem.getparent()
227 if parent is None:
228 continue
229 try:
230 # FIXME: what if the selector is "*" ?
231 elems = list(parent.iterchildren(elem.tag))
232 if elems[index] is elem:
233 yield elem
234 except IndexError:
235 pass
236 return select
237 raise SyntaxError("invalid predicate")
238
239 ops = {
240 "": prepare_child,
241 "*": prepare_star,
242 ".": prepare_self,
243 "..": prepare_parent,
244 "//": prepare_descendant,
245 "[": prepare_predicate,
246 }
247
248
249 # --------------------------------------------------------------------
250
251 _cache = {}
252
253
254 def _build_path_iterator(path, namespaces):
255 """compile selector pattern"""
256 if path[-1:] == "/":
257 path += "*" # implicit all (FIXME: keep this?)
258
259 cache_key = (path,)
260 if namespaces:
261 # lxml originally used None for the default namespace but ElementTree uses the
262 # more convenient (all-strings-dict) empty string, so we support both here,
263 # preferring the more convenient '', as long as they aren't ambiguous.
264 if None in namespaces:
265 if '' in namespaces and namespaces[None] != namespaces['']:
266 raise ValueError("Ambiguous default namespace provided: %r versus %r" % (
267 namespaces[None], namespaces['']))
268 cache_key += (namespaces[None],) + tuple(sorted(
269 item for item in namespaces.items() if item[0] is not None))
270 else:
271 cache_key += tuple(sorted(namespaces.items()))
272
273 try:
274 return _cache[cache_key]
275 except KeyError:
276 pass
277 if len(_cache) > 100:
278 _cache.clear()
279
280 if path[:1] == "/":
281 raise SyntaxError("cannot use absolute path on element")
282 stream = iter(xpath_tokenizer(path, namespaces))
283 try:
284 _next = stream.next
285 except AttributeError:
286 # Python 3
287 _next = stream.__next__
288 try:
289 token = _next()
290 except StopIteration:
291 raise SyntaxError("empty path expression")
292 selector = []
293 while 1:
294 try:
295 selector.append(ops[token[0]](_next, token))
296 except StopIteration:
297 raise SyntaxError("invalid path")
298 try:
299 token = _next()
300 if token[0] == "/":
301 token = _next()
302 except StopIteration:
303 break
304 _cache[cache_key] = selector
305 return selector
306
307
308 ##
309 # Iterate over the matching nodes
310
311 def iterfind(elem, path, namespaces=None):
312 selector = _build_path_iterator(path, namespaces)
313 result = iter((elem,))
314 for select in selector:
315 result = select(result)
316 return result
317
318
319 ##
320 # Find first matching object.
321
322 def find(elem, path, namespaces=None):
323 it = iterfind(elem, path, namespaces)
324 try:
325 return next(it)
326 except StopIteration:
327 return None
328
329
330 ##
331 # Find all matching objects.
332
333 def findall(elem, path, namespaces=None):
334 return list(iterfind(elem, path, namespaces))
335
336
337 ##
338 # Find text for first matching object.
339
340 def findtext(elem, path, default=None, namespaces=None):
341 el = find(elem, path, namespaces)
342 if el is None:
343 return default
344 else:
345 return el.text or ''