comparison env/lib/python3.9/site-packages/lxml/isoschematron/__init__.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """The ``lxml.isoschematron`` package implements ISO Schematron support on top
2 of the pure-xslt 'skeleton' implementation.
3 """
4
5 import sys
6 import os.path
7 from lxml import etree as _etree # due to validator __init__ signature
8
9
10 # some compat stuff, borrowed from lxml.html
11 try:
12 unicode
13 except NameError:
14 # Python 3
15 unicode = str
16 try:
17 basestring
18 except NameError:
19 # Python 3
20 basestring = str
21
22
23 __all__ = ['extract_xsd', 'extract_rng', 'iso_dsdl_include',
24 'iso_abstract_expand', 'iso_svrl_for_xslt1',
25 'svrl_validation_errors', 'schematron_schema_valid',
26 'stylesheet_params', 'Schematron']
27
28
29 # some namespaces
30 #FIXME: Maybe lxml should provide a dedicated place for common namespace
31 #FIXME: definitions?
32 XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema"
33 RELAXNG_NS = "http://relaxng.org/ns/structure/1.0"
34 SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron"
35 SVRL_NS = "http://purl.oclc.org/dsdl/svrl"
36
37
38 # some helpers
39 _schematron_root = '{%s}schema' % SCHEMATRON_NS
40 _xml_schema_root = '{%s}schema' % XML_SCHEMA_NS
41 _resources_dir = os.path.join(os.path.dirname(__file__), 'resources')
42
43
44 # the iso-schematron skeleton implementation steps aka xsl transformations
45 extract_xsd = _etree.XSLT(_etree.parse(
46 os.path.join(_resources_dir, 'xsl', 'XSD2Schtrn.xsl')))
47 extract_rng = _etree.XSLT(_etree.parse(
48 os.path.join(_resources_dir, 'xsl', 'RNG2Schtrn.xsl')))
49 iso_dsdl_include = _etree.XSLT(_etree.parse(
50 os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
51 'iso_dsdl_include.xsl')))
52 iso_abstract_expand = _etree.XSLT(_etree.parse(
53 os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
54 'iso_abstract_expand.xsl')))
55 iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse(
56 os.path.join(_resources_dir,
57 'xsl', 'iso-schematron-xslt1', 'iso_svrl_for_xslt1.xsl')))
58
59
60 # svrl result accessors
61 svrl_validation_errors = _etree.XPath(
62 '//svrl:failed-assert', namespaces={'svrl': SVRL_NS})
63
64
65 # RelaxNG validator for schematron schemas
66 schematron_schema_valid = _etree.RelaxNG(
67 file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))
68
69
70 def stylesheet_params(**kwargs):
71 """Convert keyword args to a dictionary of stylesheet parameters.
72 XSL stylesheet parameters must be XPath expressions, i.e.:
73
74 * string expressions, like "'5'"
75 * simple (number) expressions, like "5"
76 * valid XPath expressions, like "/a/b/text()"
77
78 This function converts native Python keyword arguments to stylesheet
79 parameters following these rules:
80 If an arg is a string wrap it with XSLT.strparam().
81 If an arg is an XPath object use its path string.
82 If arg is None raise TypeError.
83 Else convert arg to string.
84 """
85 result = {}
86 for key, val in kwargs.items():
87 if isinstance(val, basestring):
88 val = _etree.XSLT.strparam(val)
89 elif val is None:
90 raise TypeError('None not allowed as a stylesheet parameter')
91 elif not isinstance(val, _etree.XPath):
92 val = unicode(val)
93 result[key] = val
94 return result
95
96
97 # helper function for use in Schematron __init__
98 def _stylesheet_param_dict(paramsDict, kwargsDict):
99 """Return a copy of paramsDict, updated with kwargsDict entries, wrapped as
100 stylesheet arguments.
101 kwargsDict entries with a value of None are ignored.
102 """
103 # beware of changing mutable default arg
104 paramsDict = dict(paramsDict)
105 for k, v in kwargsDict.items():
106 if v is not None: # None values do not override
107 paramsDict[k] = v
108 paramsDict = stylesheet_params(**paramsDict)
109 return paramsDict
110
111
112 class Schematron(_etree._Validator):
113 """An ISO Schematron validator.
114
115 Pass a root Element or an ElementTree to turn it into a validator.
116 Alternatively, pass a filename as keyword argument 'file' to parse from
117 the file system.
118
119 Schematron is a less well known, but very powerful schema language.
120 The main idea is to use the capabilities of XPath to put restrictions on
121 the structure and the content of XML documents.
122
123 The standard behaviour is to fail on ``failed-assert`` findings only
124 (``ASSERTS_ONLY``). To change this, you can either pass a report filter
125 function to the ``error_finder`` parameter (e.g. ``ASSERTS_AND_REPORTS``
126 or a custom ``XPath`` object), or subclass isoschematron.Schematron for
127 complete control of the validation process.
128
129 Built on the Schematron language 'reference' skeleton pure-xslt
130 implementation, the validator is created as an XSLT 1.0 stylesheet using
131 these steps:
132
133 0) (Extract from XML Schema or RelaxNG schema)
134 1) Process inclusions
135 2) Process abstract patterns
136 3) Compile the schematron schema to XSLT
137
138 The ``include`` and ``expand`` keyword arguments can be used to switch off
139 steps 1) and 2).
140 To set parameters for steps 1), 2) and 3) hand parameter dictionaries to the
141 keyword arguments ``include_params``, ``expand_params`` or
142 ``compile_params``.
143 For convenience, the compile-step parameter ``phase`` is also exposed as a
144 keyword argument ``phase``. This takes precedence if the parameter is also
145 given in the parameter dictionary.
146
147 If ``store_schematron`` is set to True, the (included-and-expanded)
148 schematron document tree is stored and available through the ``schematron``
149 property.
150 If ``store_xslt`` is set to True, the validation XSLT document tree will be
151 stored and can be retrieved through the ``validator_xslt`` property.
152 With ``store_report`` set to True (default: False), the resulting validation
153 report document gets stored and can be accessed as the ``validation_report``
154 property.
155
156 Here is a usage example::
157
158 >>> from lxml import etree
159 >>> from lxml.isoschematron import Schematron
160
161 >>> schematron = Schematron(etree.XML('''
162 ... <schema xmlns="http://purl.oclc.org/dsdl/schematron" >
163 ... <pattern id="id_only_attribute">
164 ... <title>id is the only permitted attribute name</title>
165 ... <rule context="*">
166 ... <report test="@*[not(name()='id')]">Attribute
167 ... <name path="@*[not(name()='id')]"/> is forbidden<name/>
168 ... </report>
169 ... </rule>
170 ... </pattern>
171 ... </schema>'''),
172 ... error_finder=Schematron.ASSERTS_AND_REPORTS)
173
174 >>> xml = etree.XML('''
175 ... <AAA name="aaa">
176 ... <BBB id="bbb"/>
177 ... <CCC color="ccc"/>
178 ... </AAA>
179 ... ''')
180
181 >>> schematron.validate(xml)
182 False
183
184 >>> xml = etree.XML('''
185 ... <AAA id="aaa">
186 ... <BBB id="bbb"/>
187 ... <CCC/>
188 ... </AAA>
189 ... ''')
190
191 >>> schematron.validate(xml)
192 True
193 """
194
195 # libxml2 error categorization for validation errors
196 _domain = _etree.ErrorDomains.SCHEMATRONV
197 _level = _etree.ErrorLevels.ERROR
198 _error_type = _etree.ErrorTypes.SCHEMATRONV_ASSERT
199
200 # convenience definitions for common behaviours
201 ASSERTS_ONLY = svrl_validation_errors # Default
202 ASSERTS_AND_REPORTS = _etree.XPath(
203 '//svrl:failed-assert | //svrl:successful-report',
204 namespaces={'svrl': SVRL_NS})
205
206 def _extract(self, element):
207 """Extract embedded schematron schema from non-schematron host schema.
208 This method will only be called by __init__ if the given schema document
209 is not a schematron schema by itself.
210 Must return a schematron schema document tree or None.
211 """
212 schematron = None
213 if element.tag == _xml_schema_root:
214 schematron = self._extract_xsd(element)
215 elif element.nsmap[element.prefix] == RELAXNG_NS:
216 # RelaxNG does not have a single unique root element
217 schematron = self._extract_rng(element)
218 return schematron
219
220 # customization points
221 # etree.XSLT objects that provide the extract, include, expand, compile
222 # steps
223 _extract_xsd = extract_xsd
224 _extract_rng = extract_rng
225 _include = iso_dsdl_include
226 _expand = iso_abstract_expand
227 _compile = iso_svrl_for_xslt1
228
229 # etree.xpath object that determines input document validity when applied to
230 # the svrl result report; must return a list of result elements (empty if
231 # valid)
232 _validation_errors = ASSERTS_ONLY
233
234 def __init__(self, etree=None, file=None, include=True, expand=True,
235 include_params={}, expand_params={}, compile_params={},
236 store_schematron=False, store_xslt=False, store_report=False,
237 phase=None, error_finder=ASSERTS_ONLY):
238 super(Schematron, self).__init__()
239
240 self._store_report = store_report
241 self._schematron = None
242 self._validator_xslt = None
243 self._validation_report = None
244 if error_finder is not self.ASSERTS_ONLY:
245 self._validation_errors = error_finder
246
247 # parse schema document, may be a schematron schema or an XML Schema or
248 # a RelaxNG schema with embedded schematron rules
249 root = None
250 try:
251 if etree is not None:
252 if _etree.iselement(etree):
253 root = etree
254 else:
255 root = etree.getroot()
256 elif file is not None:
257 root = _etree.parse(file).getroot()
258 except Exception:
259 raise _etree.SchematronParseError(
260 "No tree or file given: %s" % sys.exc_info()[1])
261 if root is None:
262 raise ValueError("Empty tree")
263 if root.tag == _schematron_root:
264 schematron = root
265 else:
266 schematron = self._extract(root)
267 if schematron is None:
268 raise _etree.SchematronParseError(
269 "Document is not a schematron schema or schematron-extractable")
270 # perform the iso-schematron skeleton implementation steps to get a
271 # validating xslt
272 if include:
273 schematron = self._include(schematron, **include_params)
274 if expand:
275 schematron = self._expand(schematron, **expand_params)
276 if not schematron_schema_valid(schematron):
277 raise _etree.SchematronParseError(
278 "invalid schematron schema: %s" %
279 schematron_schema_valid.error_log)
280 if store_schematron:
281 self._schematron = schematron
282 # add new compile keyword args here if exposing them
283 compile_kwargs = {'phase': phase}
284 compile_params = _stylesheet_param_dict(compile_params, compile_kwargs)
285 validator_xslt = self._compile(schematron, **compile_params)
286 if store_xslt:
287 self._validator_xslt = validator_xslt
288 self._validator = _etree.XSLT(validator_xslt)
289
290 def __call__(self, etree):
291 """Validate doc using Schematron.
292
293 Returns true if document is valid, false if not.
294 """
295 self._clear_error_log()
296 result = self._validator(etree)
297 if self._store_report:
298 self._validation_report = result
299 errors = self._validation_errors(result)
300 if errors:
301 if _etree.iselement(etree):
302 fname = etree.getroottree().docinfo.URL or '<file>'
303 else:
304 fname = etree.docinfo.URL or '<file>'
305 for error in errors:
306 # Does svrl report the line number, anywhere? Don't think so.
307 self._append_log_message(
308 domain=self._domain, type=self._error_type,
309 level=self._level, line=0,
310 message=_etree.tostring(error, encoding='unicode'),
311 filename=fname)
312 return False
313 return True
314
315 @property
316 def schematron(self):
317 """ISO-schematron schema document (None if object has been initialized
318 with store_schematron=False).
319 """
320 return self._schematron
321
322 @property
323 def validator_xslt(self):
324 """ISO-schematron skeleton implementation XSLT validator document (None
325 if object has been initialized with store_xslt=False).
326 """
327 return self._validator_xslt
328
329 @property
330 def validation_report(self):
331 """ISO-schematron validation result report (None if result-storing has
332 been turned off).
333 """
334 return self._validation_report