Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/prov/tests/test_xml.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 from __future__ import (absolute_import, division, print_function, | |
| 2 unicode_literals) | |
| 3 | |
| 4 import difflib | |
| 5 import glob | |
| 6 import inspect | |
| 7 import io | |
| 8 from lxml import etree | |
| 9 import os | |
| 10 import unittest | |
| 11 import warnings | |
| 12 | |
| 13 from prov.identifier import Namespace, QualifiedName | |
| 14 from prov.constants import PROV | |
| 15 import prov.model as prov | |
| 16 from prov.tests.test_model import AllTestsBase | |
| 17 from prov.tests.utility import RoundTripTestCase | |
| 18 | |
| 19 | |
| 20 EX_NS = ('ex', 'http://example.com/ns/ex#') | |
| 21 EX_TR = ('tr', 'http://example.com/ns/tr#') | |
| 22 | |
| 23 # Most general way to get the path. | |
| 24 DATA_PATH = os.path.join(os.path.dirname(os.path.abspath(inspect.getfile( | |
| 25 inspect.currentframe()))), "xml") | |
| 26 | |
| 27 | |
| 28 def remove_empty_tags(tree): | |
| 29 if tree.text is not None and tree.text.strip() == "": | |
| 30 tree.text = None | |
| 31 for elem in tree: | |
| 32 if etree.iselement(elem): | |
| 33 remove_empty_tags(elem) | |
| 34 | |
| 35 | |
| 36 def compare_xml(doc1, doc2): | |
| 37 """ | |
| 38 Helper function to compare two XML files. It will parse both once again | |
| 39 and write them in a canonical fashion. | |
| 40 """ | |
| 41 try: | |
| 42 doc1.seek(0, 0) | |
| 43 except AttributeError: | |
| 44 pass | |
| 45 try: | |
| 46 doc2.seek(0, 0) | |
| 47 except AttributeError: | |
| 48 pass | |
| 49 | |
| 50 obj1 = etree.parse(doc1) | |
| 51 obj2 = etree.parse(doc2) | |
| 52 | |
| 53 # Remove comments from both. | |
| 54 for c in obj1.getroot().xpath("//comment()"): | |
| 55 p = c.getparent() | |
| 56 p.remove(c) | |
| 57 for c in obj2.getroot().xpath("//comment()"): | |
| 58 p = c.getparent() | |
| 59 p.remove(c) | |
| 60 | |
| 61 remove_empty_tags(obj1.getroot()) | |
| 62 remove_empty_tags(obj2.getroot()) | |
| 63 | |
| 64 buf = io.BytesIO() | |
| 65 obj1.write_c14n(buf) | |
| 66 buf.seek(0, 0) | |
| 67 str1 = buf.read().decode() | |
| 68 str1 = [_i.strip() for _i in str1.splitlines() if _i.strip()] | |
| 69 | |
| 70 buf = io.BytesIO() | |
| 71 obj2.write_c14n(buf) | |
| 72 buf.seek(0, 0) | |
| 73 str2 = buf.read().decode() | |
| 74 str2 = [_i.strip() for _i in str2.splitlines() if _i.strip()] | |
| 75 | |
| 76 unified_diff = difflib.unified_diff(str1, str2) | |
| 77 | |
| 78 err_msg = "\n".join(unified_diff) | |
| 79 if err_msg: | |
| 80 msg = "Strings are not equal.\n" | |
| 81 raise AssertionError(msg + err_msg) | |
| 82 | |
| 83 | |
| 84 class ProvXMLTestCase(unittest.TestCase): | |
| 85 def test_serialization_example_6(self): | |
| 86 """ | |
| 87 Test the serialization of example 6 which is a simple entity | |
| 88 description. | |
| 89 """ | |
| 90 document = prov.ProvDocument() | |
| 91 ex_ns = document.add_namespace(*EX_NS) | |
| 92 document.add_namespace(*EX_TR) | |
| 93 | |
| 94 document.entity("tr:WD-prov-dm-20111215", ( | |
| 95 (prov.PROV_TYPE, ex_ns["Document"]), | |
| 96 ("ex:version", "2") | |
| 97 )) | |
| 98 | |
| 99 with io.BytesIO() as actual: | |
| 100 document.serialize(format='xml', destination=actual) | |
| 101 compare_xml(os.path.join(DATA_PATH, "example_06.xml"), actual) | |
| 102 | |
| 103 def test_serialization_example_7(self): | |
| 104 """ | |
| 105 Test the serialization of example 7 which is a basic activity. | |
| 106 """ | |
| 107 document = prov.ProvDocument() | |
| 108 document.add_namespace(*EX_NS) | |
| 109 | |
| 110 document.activity( | |
| 111 "ex:a1", | |
| 112 "2011-11-16T16:05:00", | |
| 113 "2011-11-16T16:06:00", [ | |
| 114 (prov.PROV_TYPE, prov.Literal("ex:edit", prov.XSD_QNAME)), | |
| 115 ("ex:host", "server.example.org")]) | |
| 116 | |
| 117 with io.BytesIO() as actual: | |
| 118 document.serialize(format='xml', destination=actual) | |
| 119 compare_xml(os.path.join(DATA_PATH, "example_07.xml"), actual) | |
| 120 | |
| 121 def test_serialization_example_8(self): | |
| 122 """ | |
| 123 Test the serialization of example 8 which deals with generation. | |
| 124 """ | |
| 125 document = prov.ProvDocument() | |
| 126 document.add_namespace(*EX_NS) | |
| 127 | |
| 128 e1 = document.entity("ex:e1") | |
| 129 a1 = document.activity("ex:a1") | |
| 130 | |
| 131 document.wasGeneratedBy(entity=e1, activity=a1, | |
| 132 time="2001-10-26T21:32:52", | |
| 133 other_attributes={"ex:port": "p1"}) | |
| 134 | |
| 135 e2 = document.entity("ex:e2") | |
| 136 | |
| 137 document.wasGeneratedBy(entity=e2, activity=a1, | |
| 138 time="2001-10-26T10:00:00", | |
| 139 other_attributes={"ex:port": "p2"}) | |
| 140 | |
| 141 with io.BytesIO() as actual: | |
| 142 document.serialize(format='xml', destination=actual) | |
| 143 compare_xml(os.path.join(DATA_PATH, "example_08.xml"), actual) | |
| 144 | |
| 145 def test_deserialization_example_6(self): | |
| 146 """ | |
| 147 Test the deserialization of example 6 which is a simple entity | |
| 148 description. | |
| 149 """ | |
| 150 actual_doc = prov.ProvDocument.deserialize( | |
| 151 source=os.path.join(DATA_PATH, "example_06.xml"), | |
| 152 format="xml") | |
| 153 | |
| 154 expected_document = prov.ProvDocument() | |
| 155 ex_ns = expected_document.add_namespace(*EX_NS) | |
| 156 expected_document.add_namespace(*EX_TR) | |
| 157 | |
| 158 expected_document.entity("tr:WD-prov-dm-20111215", ( | |
| 159 (prov.PROV_TYPE, ex_ns["Document"]), | |
| 160 ("ex:version", "2") | |
| 161 )) | |
| 162 | |
| 163 self.assertEqual(actual_doc, expected_document) | |
| 164 | |
| 165 def test_deserialization_example_7(self): | |
| 166 """ | |
| 167 Test the deserialization of example 7 which is a simple activity | |
| 168 description. | |
| 169 """ | |
| 170 actual_doc = prov.ProvDocument.deserialize( | |
| 171 source=os.path.join(DATA_PATH, "example_07.xml"), | |
| 172 format="xml") | |
| 173 | |
| 174 expected_document = prov.ProvDocument() | |
| 175 ex_ns = Namespace(*EX_NS) | |
| 176 expected_document.add_namespace(ex_ns) | |
| 177 | |
| 178 expected_document.activity( | |
| 179 "ex:a1", | |
| 180 "2011-11-16T16:05:00", | |
| 181 "2011-11-16T16:06:00", [ | |
| 182 (prov.PROV_TYPE, QualifiedName(ex_ns, "edit")), | |
| 183 ("ex:host", "server.example.org")]) | |
| 184 | |
| 185 self.assertEqual(actual_doc, expected_document) | |
| 186 | |
| 187 def test_deserialization_example_04_and_05(self): | |
| 188 """ | |
| 189 Example 4 and 5 have a different type specification. They use an | |
| 190 xsi:type as an attribute on an entity. This can be read but if | |
| 191 written again it will become an XML child element. This is | |
| 192 semantically identical but cannot be tested with a round trip. | |
| 193 """ | |
| 194 # Example 4. | |
| 195 xml_string = """ | |
| 196 <prov:document | |
| 197 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
| 198 xmlns:xsd="http://www.w3.org/2001/XMLSchema" | |
| 199 xmlns:prov="http://www.w3.org/ns/prov#" | |
| 200 xmlns:ex="http://example.com/ns/ex#" | |
| 201 xmlns:tr="http://example.com/ns/tr#"> | |
| 202 | |
| 203 <prov:entity prov:id="tr:WD-prov-dm-20111215" xsi:type="prov:Plan"> | |
| 204 <prov:type xsi:type="xsd:QName">ex:Workflow</prov:type> | |
| 205 </prov:entity> | |
| 206 | |
| 207 </prov:document> | |
| 208 """ | |
| 209 with io.StringIO() as xml: | |
| 210 xml.write(xml_string) | |
| 211 xml.seek(0, 0) | |
| 212 actual_document = prov.ProvDocument.deserialize(source=xml, | |
| 213 format="xml") | |
| 214 | |
| 215 expected_document = prov.ProvDocument() | |
| 216 ex_ns = Namespace(*EX_NS) | |
| 217 expected_document.add_namespace(ex_ns) | |
| 218 expected_document.add_namespace(*EX_TR) | |
| 219 | |
| 220 # The xsi:type attribute is mapped to a proper PROV attribute. | |
| 221 expected_document.entity("tr:WD-prov-dm-20111215", ( | |
| 222 (prov.PROV_TYPE, QualifiedName(ex_ns, "Workflow")), | |
| 223 (prov.PROV_TYPE, PROV["Plan"]))) | |
| 224 | |
| 225 self.assertEqual(actual_document, expected_document, "example_04") | |
| 226 | |
| 227 # Example 5. | |
| 228 xml_string = """ | |
| 229 <prov:document | |
| 230 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
| 231 xmlns:xsd="http://www.w3.org/2001/XMLSchema" | |
| 232 xmlns:prov="http://www.w3.org/ns/prov#" | |
| 233 xmlns:ex="http://example.com/ns/ex#" | |
| 234 xmlns:tr="http://example.com/ns/tr#"> | |
| 235 | |
| 236 <prov:entity prov:id="tr:WD-prov-dm-20111215" xsi:type="prov:Plan"> | |
| 237 <prov:type xsi:type="xsd:QName">ex:Workflow</prov:type> | |
| 238 <prov:type xsi:type="xsd:QName">prov:Plan</prov:type> <!-- inferred --> | |
| 239 <prov:type xsi:type="xsd:QName">prov:Entity</prov:type> <!-- inferred --> | |
| 240 </prov:entity> | |
| 241 | |
| 242 </prov:document> | |
| 243 """ | |
| 244 with io.StringIO() as xml: | |
| 245 xml.write(xml_string) | |
| 246 xml.seek(0, 0) | |
| 247 actual_document = prov.ProvDocument.deserialize(source=xml, | |
| 248 format="xml") | |
| 249 | |
| 250 expected_document = prov.ProvDocument() | |
| 251 expected_document.add_namespace(*EX_NS) | |
| 252 expected_document.add_namespace(*EX_TR) | |
| 253 | |
| 254 # The xsi:type attribute is mapped to a proper PROV attribute. | |
| 255 expected_document.entity("tr:WD-prov-dm-20111215", ( | |
| 256 (prov.PROV_TYPE, QualifiedName(ex_ns, "Workflow")), | |
| 257 (prov.PROV_TYPE, PROV["Entity"]), | |
| 258 (prov.PROV_TYPE, PROV["Plan"]) | |
| 259 )) | |
| 260 | |
| 261 self.assertEqual(actual_document, expected_document, "example_05") | |
| 262 | |
| 263 def test_other_elements(self): | |
| 264 """ | |
| 265 PROV XML uses the <prov:other> element to enable the storage of non | |
| 266 PROV information in a PROV XML document. It will be ignored by this | |
| 267 library a warning will be raised informing the user. | |
| 268 """ | |
| 269 # This is example 42 from the PROV XML documentation. | |
| 270 xml_string = """ | |
| 271 <prov:document | |
| 272 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
| 273 xmlns:xsd="http://www.w3.org/2001/XMLSchema" | |
| 274 xmlns:prov="http://www.w3.org/ns/prov#" | |
| 275 xmlns:ex="http://example.com/ns/ex#"> | |
| 276 | |
| 277 <!-- prov statements go here --> | |
| 278 | |
| 279 <prov:other> | |
| 280 <ex:foo> | |
| 281 <ex:content>bar</ex:content> | |
| 282 </ex:foo> | |
| 283 </prov:other> | |
| 284 | |
| 285 <!-- more prov statements can go here --> | |
| 286 | |
| 287 </prov:document> | |
| 288 """ | |
| 289 with warnings.catch_warnings(record=True) as w: | |
| 290 warnings.simplefilter("always") | |
| 291 | |
| 292 with io.StringIO() as xml: | |
| 293 xml.write(xml_string) | |
| 294 xml.seek(0, 0) | |
| 295 doc = prov.ProvDocument.deserialize(source=xml, format="xml") | |
| 296 | |
| 297 self.assertEqual(len(w), 1) | |
| 298 self.assertTrue( | |
| 299 "Document contains non-PROV information in <prov:other>. It will " | |
| 300 "be ignored in this package." in str(w[0].message)) | |
| 301 | |
| 302 # This document contains nothing else. | |
| 303 self.assertEqual(len(doc._records), 0) | |
| 304 | |
| 305 def test_nested_default_namespace(self): | |
| 306 """ | |
| 307 Tests that a default namespace that is defined in a lower level tag is | |
| 308 written to a bundle. | |
| 309 """ | |
| 310 filename = os.path.join(DATA_PATH, "nested_default_namespace.xml") | |
| 311 doc = prov.ProvDocument.deserialize(source=filename, format="xml") | |
| 312 | |
| 313 ns = Namespace("", "http://example.org/0/") | |
| 314 | |
| 315 self.assertEqual(len(doc._records), 1) | |
| 316 self.assertEqual(doc.get_default_namespace(), ns) | |
| 317 self.assertEqual(doc._records[0].identifier.namespace, ns) | |
| 318 self.assertEqual(doc._records[0].identifier.localpart, "e001") | |
| 319 | |
| 320 def test_redefining_namespaces(self): | |
| 321 """ | |
| 322 Test the behaviour when namespaces are redefined at the element level. | |
| 323 """ | |
| 324 filename = os.path.join(DATA_PATH, | |
| 325 "namespace_redefined_but_does_not_change.xml") | |
| 326 doc = prov.ProvDocument.deserialize(source=filename, format="xml") | |
| 327 # This has one record part of the original namespace. | |
| 328 self.assertEqual(len(doc._records), 1) | |
| 329 ns = Namespace("ex", "http://example.com/ns/ex#") | |
| 330 self.assertEqual(doc._records[0].attributes[0][1].namespace, ns) | |
| 331 | |
| 332 # This also has one record but now in a different namespace. | |
| 333 filename = os.path.join(DATA_PATH, "namespace_redefined.xml") | |
| 334 doc = prov.ProvDocument.deserialize(source=filename, format="xml") | |
| 335 new_ns = doc._records[0].attributes[0][1].namespace | |
| 336 self.assertNotEqual(new_ns, ns) | |
| 337 self.assertEqual(new_ns.uri, "http://example.com/ns/new_ex#") | |
| 338 | |
| 339 | |
| 340 class ProvXMLRoundTripFromFileTestCase(unittest.TestCase): | |
| 341 def _perform_round_trip(self, filename, force_types=False): | |
| 342 document = prov.ProvDocument.deserialize( | |
| 343 source=filename, format="xml") | |
| 344 | |
| 345 with io.BytesIO() as new_xml: | |
| 346 document.serialize(format='xml', destination=new_xml, | |
| 347 force_types=force_types) | |
| 348 compare_xml(filename, new_xml) | |
| 349 | |
| 350 | |
| 351 # Add one test for each found file. Lazy way to do metaprogramming... | |
| 352 # I think parametrized tests are justified in this case as the test | |
| 353 # function names make it clear what is going on. | |
| 354 for filename in glob.iglob(os.path.join( | |
| 355 DATA_PATH, "*" + os.path.extsep + "xml")): | |
| 356 name = os.path.splitext(os.path.basename(filename))[0] | |
| 357 test_name = "test_roundtrip_from_xml_%s" % name | |
| 358 | |
| 359 # Cannot round trip this one as the namespace in the PROV data model are | |
| 360 # always defined per bundle and not per element. | |
| 361 if name in ("nested_default_namespace", | |
| 362 "nested_changing_default_namespace", | |
| 363 "namespace_redefined_but_does_not_change", | |
| 364 "namespace_redefined"): | |
| 365 continue | |
| 366 | |
| 367 # Python creates closures on function calls... | |
| 368 def get_fct(f): | |
| 369 # Some test files have a lot of type declarations... | |
| 370 if name in ["pc1"]: | |
| 371 force_types = True | |
| 372 else: | |
| 373 force_types = False | |
| 374 | |
| 375 def fct(self): | |
| 376 self._perform_round_trip(f, force_types=force_types) | |
| 377 return fct | |
| 378 | |
| 379 fct = get_fct(filename) | |
| 380 fct.__name__ = str(test_name) | |
| 381 | |
| 382 # Disabled round-trip XML comparisons since deserializing then serializing | |
| 383 # PROV-XML does not maintain XML equivalence. (For example, prov:entity | |
| 384 # elements with type prov:Plan become prov:plan elements) | |
| 385 # TODO: Revisit these tests | |
| 386 | |
| 387 # setattr(ProvXMLRoundTripFromFileTestCase, test_name, fct) | |
| 388 | |
| 389 | |
| 390 class RoundTripXMLTests(RoundTripTestCase, AllTestsBase): | |
| 391 FORMAT = 'xml' | |
| 392 | |
| 393 | |
| 394 if __name__ == '__main__': | |
| 395 unittest.main() |
