comparison env/lib/python3.9/site-packages/lxml/sax.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # cython: language_level=2
2
3 """
4 SAX-based adapter to copy trees from/to the Python standard library.
5
6 Use the `ElementTreeContentHandler` class to build an ElementTree from
7 SAX events.
8
9 Use the `ElementTreeProducer` class or the `saxify()` function to fire
10 the SAX events of an ElementTree against a SAX ContentHandler.
11
12 See https://lxml.de/sax.html
13 """
14
15 from __future__ import absolute_import
16
17 from xml.sax.handler import ContentHandler
18 from lxml import etree
19 from lxml.etree import ElementTree, SubElement
20 from lxml.etree import Comment, ProcessingInstruction
21
22
23 class SaxError(etree.LxmlError):
24 """General SAX error.
25 """
26
27
28 def _getNsTag(tag):
29 if tag[0] == '{':
30 return tuple(tag[1:].split('}', 1))
31 else:
32 return None, tag
33
34
35 class ElementTreeContentHandler(ContentHandler):
36 """Build an lxml ElementTree from SAX events.
37 """
38 def __init__(self, makeelement=None):
39 ContentHandler.__init__(self)
40 self._root = None
41 self._root_siblings = []
42 self._element_stack = []
43 self._default_ns = None
44 self._ns_mapping = { None : [None] }
45 self._new_mappings = {}
46 if makeelement is None:
47 makeelement = etree.Element
48 self._makeelement = makeelement
49
50 def _get_etree(self):
51 "Contains the generated ElementTree after parsing is finished."
52 return ElementTree(self._root)
53
54 etree = property(_get_etree, doc=_get_etree.__doc__)
55
56 def setDocumentLocator(self, locator):
57 pass
58
59 def startDocument(self):
60 pass
61
62 def endDocument(self):
63 pass
64
65 def startPrefixMapping(self, prefix, uri):
66 self._new_mappings[prefix] = uri
67 try:
68 self._ns_mapping[prefix].append(uri)
69 except KeyError:
70 self._ns_mapping[prefix] = [uri]
71 if prefix is None:
72 self._default_ns = uri
73
74 def endPrefixMapping(self, prefix):
75 ns_uri_list = self._ns_mapping[prefix]
76 ns_uri_list.pop()
77 if prefix is None:
78 self._default_ns = ns_uri_list[-1]
79
80 def _buildTag(self, ns_name_tuple):
81 ns_uri, local_name = ns_name_tuple
82 if ns_uri:
83 el_tag = "{%s}%s" % ns_name_tuple
84 elif self._default_ns:
85 el_tag = "{%s}%s" % (self._default_ns, local_name)
86 else:
87 el_tag = local_name
88 return el_tag
89
90 def startElementNS(self, ns_name, qname, attributes=None):
91 el_name = self._buildTag(ns_name)
92 if attributes:
93 attrs = {}
94 try:
95 iter_attributes = attributes.iteritems()
96 except AttributeError:
97 iter_attributes = attributes.items()
98
99 for name_tuple, value in iter_attributes:
100 if name_tuple[0]:
101 attr_name = "{%s}%s" % name_tuple
102 else:
103 attr_name = name_tuple[1]
104 attrs[attr_name] = value
105 else:
106 attrs = None
107
108 element_stack = self._element_stack
109 if self._root is None:
110 element = self._root = \
111 self._makeelement(el_name, attrs, self._new_mappings)
112 if self._root_siblings and hasattr(element, 'addprevious'):
113 for sibling in self._root_siblings:
114 element.addprevious(sibling)
115 del self._root_siblings[:]
116 else:
117 element = SubElement(element_stack[-1], el_name,
118 attrs, self._new_mappings)
119 element_stack.append(element)
120
121 self._new_mappings.clear()
122
123 def processingInstruction(self, target, data):
124 pi = ProcessingInstruction(target, data)
125 if self._root is None:
126 self._root_siblings.append(pi)
127 else:
128 self._element_stack[-1].append(pi)
129
130 def endElementNS(self, ns_name, qname):
131 element = self._element_stack.pop()
132 el_tag = self._buildTag(ns_name)
133 if el_tag != element.tag:
134 raise SaxError("Unexpected element closed: " + el_tag)
135
136 def startElement(self, name, attributes=None):
137 if attributes:
138 attributes = dict(
139 [((None, k), v) for k, v in attributes.items()]
140 )
141 self.startElementNS((None, name), name, attributes)
142
143 def endElement(self, name):
144 self.endElementNS((None, name), name)
145
146 def characters(self, data):
147 last_element = self._element_stack[-1]
148 try:
149 # if there already is a child element, we must append to its tail
150 last_element = last_element[-1]
151 last_element.tail = (last_element.tail or '') + data
152 except IndexError:
153 # otherwise: append to the text
154 last_element.text = (last_element.text or '') + data
155
156 ignorableWhitespace = characters
157
158
159 class ElementTreeProducer(object):
160 """Produces SAX events for an element and children.
161 """
162 def __init__(self, element_or_tree, content_handler):
163 try:
164 element = element_or_tree.getroot()
165 except AttributeError:
166 element = element_or_tree
167 self._element = element
168 self._content_handler = content_handler
169 from xml.sax.xmlreader import AttributesNSImpl as attr_class
170 self._attr_class = attr_class
171 self._empty_attributes = attr_class({}, {})
172
173 def saxify(self):
174 self._content_handler.startDocument()
175
176 element = self._element
177 if hasattr(element, 'getprevious'):
178 siblings = []
179 sibling = element.getprevious()
180 while getattr(sibling, 'tag', None) is ProcessingInstruction:
181 siblings.append(sibling)
182 sibling = sibling.getprevious()
183 for sibling in siblings[::-1]:
184 self._recursive_saxify(sibling, {})
185
186 self._recursive_saxify(element, {})
187
188 if hasattr(element, 'getnext'):
189 sibling = element.getnext()
190 while getattr(sibling, 'tag', None) is ProcessingInstruction:
191 self._recursive_saxify(sibling, {})
192 sibling = sibling.getnext()
193
194 self._content_handler.endDocument()
195
196 def _recursive_saxify(self, element, parent_nsmap):
197 content_handler = self._content_handler
198 tag = element.tag
199 if tag is Comment or tag is ProcessingInstruction:
200 if tag is ProcessingInstruction:
201 content_handler.processingInstruction(
202 element.target, element.text)
203 tail = element.tail
204 if tail:
205 content_handler.characters(tail)
206 return
207
208 element_nsmap = element.nsmap
209 new_prefixes = []
210 if element_nsmap != parent_nsmap:
211 # There have been updates to the namespace
212 for prefix, ns_uri in element_nsmap.items():
213 if parent_nsmap.get(prefix) != ns_uri:
214 new_prefixes.append( (prefix, ns_uri) )
215
216 attribs = element.items()
217 if attribs:
218 attr_values = {}
219 attr_qnames = {}
220 for attr_ns_name, value in attribs:
221 attr_ns_tuple = _getNsTag(attr_ns_name)
222 attr_values[attr_ns_tuple] = value
223 attr_qnames[attr_ns_tuple] = self._build_qname(
224 attr_ns_tuple[0], attr_ns_tuple[1], element_nsmap,
225 preferred_prefix=None, is_attribute=True)
226 sax_attributes = self._attr_class(attr_values, attr_qnames)
227 else:
228 sax_attributes = self._empty_attributes
229
230 ns_uri, local_name = _getNsTag(tag)
231 qname = self._build_qname(
232 ns_uri, local_name, element_nsmap, element.prefix, is_attribute=False)
233
234 for prefix, uri in new_prefixes:
235 content_handler.startPrefixMapping(prefix, uri)
236 content_handler.startElementNS(
237 (ns_uri, local_name), qname, sax_attributes)
238 text = element.text
239 if text:
240 content_handler.characters(text)
241 for child in element:
242 self._recursive_saxify(child, element_nsmap)
243 content_handler.endElementNS((ns_uri, local_name), qname)
244 for prefix, uri in new_prefixes:
245 content_handler.endPrefixMapping(prefix)
246 tail = element.tail
247 if tail:
248 content_handler.characters(tail)
249
250 def _build_qname(self, ns_uri, local_name, nsmap, preferred_prefix, is_attribute):
251 if ns_uri is None:
252 return local_name
253
254 if not is_attribute and nsmap.get(preferred_prefix) == ns_uri:
255 prefix = preferred_prefix
256 else:
257 # Pick the first matching prefix, in alphabetical order.
258 candidates = [
259 pfx for (pfx, uri) in nsmap.items()
260 if pfx is not None and uri == ns_uri
261 ]
262 prefix = (
263 candidates[0] if len(candidates) == 1
264 else min(candidates) if candidates
265 else None
266 )
267
268 if prefix is None:
269 # Default namespace
270 return local_name
271 return prefix + ':' + local_name
272
273
274 def saxify(element_or_tree, content_handler):
275 """One-shot helper to generate SAX events from an XML tree and fire
276 them against a SAX ContentHandler.
277 """
278 return ElementTreeProducer(element_or_tree, content_handler).saxify()