Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/prov/model.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d30785e31577 | 1:56ad4e20f292 |
|---|---|
| 1 """Python implementation of the W3C Provenance Data Model (PROV-DM), including | |
| 2 support for PROV-JSON import/export | |
| 3 | |
| 4 References: | |
| 5 | |
| 6 PROV-DM: http://www.w3.org/TR/prov-dm/ | |
| 7 PROV-JSON: https://provenance.ecs.soton.ac.uk/prov-json/ | |
| 8 """ | |
| 9 from __future__ import (absolute_import, division, print_function, | |
| 10 unicode_literals) | |
| 11 | |
| 12 from collections import defaultdict | |
| 13 from copy import deepcopy | |
| 14 import datetime | |
| 15 import io | |
| 16 import itertools | |
| 17 import logging | |
| 18 import os | |
| 19 import shutil | |
| 20 import tempfile | |
| 21 | |
| 22 import dateutil.parser | |
| 23 from prov import Error, serializers | |
| 24 from prov.constants import * | |
| 25 from prov.identifier import Identifier, QualifiedName, Namespace | |
| 26 from six.moves.urllib.parse import urlparse | |
| 27 | |
| 28 | |
| 29 __author__ = 'Trung Dong Huynh' | |
| 30 __email__ = 'trungdong@donggiang.com' | |
| 31 | |
| 32 | |
| 33 logger = logging.getLogger(__name__) | |
| 34 | |
| 35 | |
| 36 # Data Types | |
| 37 def _ensure_datetime(value): | |
| 38 if isinstance(value, six.string_types): | |
| 39 return dateutil.parser.parse(value) | |
| 40 else: | |
| 41 return value | |
| 42 | |
| 43 | |
| 44 def parse_xsd_datetime(value): | |
| 45 try: | |
| 46 return dateutil.parser.parse(value) | |
| 47 except ValueError: | |
| 48 pass | |
| 49 return None | |
| 50 | |
| 51 | |
| 52 def parse_boolean(value): | |
| 53 if value.lower() in ("false", "0"): | |
| 54 return False | |
| 55 elif value.lower() in ("true", "1"): | |
| 56 return True | |
| 57 else: | |
| 58 return None | |
| 59 | |
| 60 DATATYPE_PARSERS = { | |
| 61 datetime.datetime: parse_xsd_datetime, | |
| 62 } | |
| 63 | |
| 64 | |
| 65 # Mappings for XSD datatypes to Python standard types | |
| 66 XSD_DATATYPE_PARSERS = { | |
| 67 XSD_STRING: six.text_type, | |
| 68 XSD_DOUBLE: float, | |
| 69 # long on Python 2, int on Python 3 | |
| 70 XSD_LONG: six.integer_types[-1], | |
| 71 XSD_INT: int, | |
| 72 XSD_BOOLEAN: parse_boolean, | |
| 73 XSD_DATETIME: parse_xsd_datetime, | |
| 74 XSD_ANYURI: Identifier | |
| 75 } | |
| 76 | |
| 77 | |
| 78 def parse_xsd_types(value, datatype): | |
| 79 return ( | |
| 80 XSD_DATATYPE_PARSERS[datatype](value) | |
| 81 if datatype in XSD_DATATYPE_PARSERS else None | |
| 82 ) | |
| 83 | |
| 84 | |
| 85 def first(a_set): | |
| 86 return next(iter(a_set), None) | |
| 87 | |
| 88 | |
| 89 def _ensure_multiline_string_triple_quoted(value): | |
| 90 # converting the value to a string | |
| 91 s = six.text_type(value) | |
| 92 # Escaping any double quote | |
| 93 s = s.replace('"', '\\"') | |
| 94 if '\n' in s: | |
| 95 return '"""%s"""' % s | |
| 96 else: | |
| 97 return '"%s"' % s | |
| 98 | |
| 99 | |
| 100 def encoding_provn_value(value): | |
| 101 if isinstance(value, six.string_types): | |
| 102 return _ensure_multiline_string_triple_quoted(value) | |
| 103 elif isinstance(value, datetime.datetime): | |
| 104 return u'"{0}" %% xsd:dateTime'.format(value.isoformat()) | |
| 105 elif isinstance(value, float): | |
| 106 return u'"%g" %%%% xsd:float' % value | |
| 107 elif isinstance(value, bool): | |
| 108 return u'"%i" %%%% xsd:boolean' % value | |
| 109 else: | |
| 110 # TODO: QName export | |
| 111 return six.text_type(value) | |
| 112 | |
| 113 | |
| 114 @six.python_2_unicode_compatible | |
| 115 class Literal(object): | |
| 116 def __init__(self, value, datatype=None, langtag=None): | |
| 117 self._value = six.text_type(value) # value is always a string | |
| 118 if langtag: | |
| 119 if datatype is None: | |
| 120 logger.debug( | |
| 121 'Assuming prov:InternationalizedString as the type of ' | |
| 122 '"%s"@%s' % (value, langtag) | |
| 123 ) | |
| 124 datatype = PROV["InternationalizedString"] | |
| 125 # PROV JSON states that the type field must not be set when | |
| 126 # using the lang attribute and PROV XML requires it to be an | |
| 127 # internationalized string. | |
| 128 elif datatype != PROV["InternationalizedString"]: | |
| 129 logger.warn( | |
| 130 'Invalid data type (%s) for "%s"@%s, overridden as ' | |
| 131 'prov:InternationalizedString.' % | |
| 132 (datatype, value, langtag) | |
| 133 ) | |
| 134 datatype = PROV["InternationalizedString"] | |
| 135 self._datatype = datatype | |
| 136 # langtag is always a string | |
| 137 self._langtag = six.text_type(langtag) if langtag is not None else None | |
| 138 | |
| 139 def __str__(self): | |
| 140 return self.provn_representation() | |
| 141 | |
| 142 def __repr__(self): | |
| 143 return u'<Literal: %s>' % self.provn_representation() | |
| 144 | |
| 145 def __eq__(self, other): | |
| 146 return ( | |
| 147 (self._value == other.value and | |
| 148 self._datatype == other.datatype and | |
| 149 self._langtag == other.langtag) | |
| 150 if isinstance(other, Literal) else False | |
| 151 ) | |
| 152 | |
| 153 def __ne__(self, other): | |
| 154 return not (self == other) | |
| 155 | |
| 156 def __hash__(self): | |
| 157 return hash((self._value, self._datatype, self._langtag)) | |
| 158 | |
| 159 @property | |
| 160 def value(self): | |
| 161 return self._value | |
| 162 | |
| 163 @property | |
| 164 def datatype(self): | |
| 165 return self._datatype | |
| 166 | |
| 167 @property | |
| 168 def langtag(self): | |
| 169 return self._langtag | |
| 170 | |
| 171 def has_no_langtag(self): | |
| 172 return self._langtag is None | |
| 173 | |
| 174 def provn_representation(self): | |
| 175 if self._langtag: | |
| 176 # a language tag can only go with prov:InternationalizedString | |
| 177 return '%s@%s' % ( | |
| 178 _ensure_multiline_string_triple_quoted(self._value), | |
| 179 six.text_type(self._langtag) | |
| 180 ) | |
| 181 else: | |
| 182 return '%s %%%% %s' % ( | |
| 183 _ensure_multiline_string_triple_quoted(self._value), | |
| 184 six.text_type(self._datatype) | |
| 185 ) | |
| 186 | |
| 187 | |
| 188 # Exceptions and warnings | |
| 189 class ProvException(Error): | |
| 190 """Base class for PROV model exceptions.""" | |
| 191 pass | |
| 192 | |
| 193 | |
| 194 class ProvWarning(Warning): | |
| 195 """Base class for PROV model warnings.""" | |
| 196 pass | |
| 197 | |
| 198 | |
| 199 @six.python_2_unicode_compatible | |
| 200 class ProvExceptionInvalidQualifiedName(ProvException): | |
| 201 """Exception for an invalid qualified identifier name.""" | |
| 202 | |
| 203 qname = None | |
| 204 """Intended qualified name.""" | |
| 205 | |
| 206 def __init__(self, qname): | |
| 207 """ | |
| 208 Constructor. | |
| 209 | |
| 210 :param qname: Invalid qualified name. | |
| 211 """ | |
| 212 self.qname = qname | |
| 213 | |
| 214 def __str__(self): | |
| 215 return u'Invalid Qualified Name: %s' % self.qname | |
| 216 | |
| 217 | |
| 218 @six.python_2_unicode_compatible | |
| 219 class ProvElementIdentifierRequired(ProvException): | |
| 220 """Exception for a missing element identifier.""" | |
| 221 | |
| 222 def __str__(self): | |
| 223 return u'An identifier is missing. All PROV elements require a valid ' \ | |
| 224 u'identifier.' | |
| 225 | |
| 226 | |
| 227 # PROV records | |
| 228 @six.python_2_unicode_compatible | |
| 229 class ProvRecord(object): | |
| 230 """Base class for PROV records.""" | |
| 231 | |
| 232 FORMAL_ATTRIBUTES = () | |
| 233 | |
| 234 _prov_type = None | |
| 235 """PROV type of record.""" | |
| 236 | |
| 237 def __init__(self, bundle, identifier, attributes=None): | |
| 238 """ | |
| 239 Constructor. | |
| 240 | |
| 241 :param bundle: Bundle for the PROV record. | |
| 242 :param identifier: (Unique) identifier of the record. | |
| 243 :param attributes: Attributes to associate with the record (default: None). | |
| 244 """ | |
| 245 self._bundle = bundle | |
| 246 self._identifier = identifier | |
| 247 self._attributes = defaultdict(set) | |
| 248 if attributes: | |
| 249 self.add_attributes(attributes) | |
| 250 | |
| 251 def __hash__(self): | |
| 252 return hash( | |
| 253 (self.get_type(), self._identifier, frozenset(self.attributes)) | |
| 254 ) | |
| 255 | |
| 256 def copy(self): | |
| 257 """ | |
| 258 Return an exact copy of this record. | |
| 259 """ | |
| 260 return PROV_REC_CLS[self.get_type()]( | |
| 261 self._bundle, self.identifier, self.attributes | |
| 262 ) | |
| 263 | |
| 264 def get_type(self): | |
| 265 """Returns the PROV type of the record.""" | |
| 266 return self._prov_type | |
| 267 | |
| 268 def get_asserted_types(self): | |
| 269 """Returns the set of all asserted PROV types of this record.""" | |
| 270 return self._attributes[PROV_TYPE] | |
| 271 | |
| 272 def add_asserted_type(self, type_identifier): | |
| 273 """ | |
| 274 Adds a PROV type assertion to the record. | |
| 275 | |
| 276 :param type_identifier: PROV namespace identifier to add. | |
| 277 """ | |
| 278 self._attributes[PROV_TYPE].add(type_identifier) | |
| 279 | |
| 280 def get_attribute(self, attr_name): | |
| 281 """ | |
| 282 Returns the attribute of the given name. | |
| 283 | |
| 284 :param attr_name: Name of the attribute. | |
| 285 :return: Tuple (name, value) | |
| 286 """ | |
| 287 attr_name = self._bundle.valid_qualified_name(attr_name) | |
| 288 return self._attributes[attr_name] | |
| 289 | |
| 290 @property | |
| 291 def identifier(self): | |
| 292 """Record's identifier.""" | |
| 293 return self._identifier | |
| 294 | |
| 295 @property | |
| 296 def attributes(self): | |
| 297 """ | |
| 298 All record attributes. | |
| 299 | |
| 300 :return: List of tuples (name, value) | |
| 301 """ | |
| 302 return [ | |
| 303 (attr_name, value) | |
| 304 for attr_name, values in self._attributes.items() | |
| 305 for value in values | |
| 306 ] | |
| 307 | |
| 308 @property | |
| 309 def args(self): | |
| 310 """ | |
| 311 All values of the record's formal attributes. | |
| 312 | |
| 313 :return: Tuple | |
| 314 """ | |
| 315 return tuple( | |
| 316 first(self._attributes[attr_name]) | |
| 317 for attr_name in self.FORMAL_ATTRIBUTES | |
| 318 ) | |
| 319 | |
| 320 @property | |
| 321 def formal_attributes(self): | |
| 322 """ | |
| 323 All names and values of the record's formal attributes. | |
| 324 | |
| 325 :return: Tuple of tuples (name, value) | |
| 326 """ | |
| 327 return tuple( | |
| 328 (attr_name, first(self._attributes[attr_name])) | |
| 329 for attr_name in self.FORMAL_ATTRIBUTES | |
| 330 ) | |
| 331 | |
| 332 @property | |
| 333 def extra_attributes(self): | |
| 334 """ | |
| 335 All names and values of the record's attributes that are not formal | |
| 336 attributes. | |
| 337 | |
| 338 :return: Tuple of tuples (name, value) | |
| 339 """ | |
| 340 return [ | |
| 341 (attr_name, attr_value) for attr_name, attr_value in self.attributes | |
| 342 if attr_name not in self.FORMAL_ATTRIBUTES | |
| 343 ] | |
| 344 | |
| 345 @property | |
| 346 def bundle(self): | |
| 347 """ | |
| 348 Bundle of the record. | |
| 349 | |
| 350 :return: :py:class:`ProvBundle` | |
| 351 """ | |
| 352 return self._bundle | |
| 353 | |
| 354 @property | |
| 355 def label(self): | |
| 356 """Identifying label of the record.""" | |
| 357 return first(self._attributes[PROV_LABEL]) \ | |
| 358 if self._attributes[PROV_LABEL] else self._identifier | |
| 359 | |
| 360 @property | |
| 361 def value(self): | |
| 362 """Value of the record.""" | |
| 363 return self._attributes[PROV_VALUE] | |
| 364 | |
| 365 # Handling attributes | |
| 366 def _auto_literal_conversion(self, literal): | |
| 367 # This method normalise datatype for literals | |
| 368 | |
| 369 if isinstance(literal, ProvRecord): | |
| 370 # Use the QName of the record as the literal | |
| 371 literal = literal.identifier | |
| 372 | |
| 373 if isinstance(literal, str): | |
| 374 return six.text_type(literal) | |
| 375 elif isinstance(literal, QualifiedName): | |
| 376 return self._bundle.valid_qualified_name(literal) | |
| 377 elif isinstance(literal, Literal) and literal.has_no_langtag(): | |
| 378 if literal.datatype: | |
| 379 # try convert generic Literal object to Python standard type | |
| 380 # this is to match JSON decoding's literal conversion | |
| 381 value = parse_xsd_types(literal.value, literal.datatype) | |
| 382 else: | |
| 383 # A literal with no datatype nor langtag defined | |
| 384 # try auto-converting the value | |
| 385 value = self._auto_literal_conversion(literal.value) | |
| 386 if value is not None: | |
| 387 return value | |
| 388 | |
| 389 # No conversion possible, return the original value | |
| 390 return literal | |
| 391 | |
| 392 def add_attributes(self, attributes): | |
| 393 """ | |
| 394 Add attributes to the record. | |
| 395 | |
| 396 :param attributes: Dictionary of attributes, with keys being qualified | |
| 397 identifiers. Alternatively an iterable of tuples (key, value) with the | |
| 398 keys satisfying the same condition. | |
| 399 """ | |
| 400 if attributes: | |
| 401 if isinstance(attributes, dict): | |
| 402 # Converting the dictionary into a list of tuples | |
| 403 # (i.e. attribute-value pairs) | |
| 404 attributes = attributes.items() | |
| 405 | |
| 406 # Check if one of the attributes specifies that the current type | |
| 407 # is a collection. In that case multiple attributes of the same | |
| 408 # type are allowed. | |
| 409 if PROV_ATTR_COLLECTION in [_i[0] for _i in attributes]: | |
| 410 is_collection = True | |
| 411 else: | |
| 412 is_collection = False | |
| 413 | |
| 414 for attr_name, original_value in attributes: | |
| 415 if original_value is None: | |
| 416 continue | |
| 417 | |
| 418 # make sure the attribute name is valid | |
| 419 attr = self._bundle.valid_qualified_name(attr_name) | |
| 420 if attr is None: | |
| 421 raise ProvExceptionInvalidQualifiedName(attr_name) | |
| 422 | |
| 423 if attr in PROV_ATTRIBUTE_QNAMES: | |
| 424 # Expecting a qualified name | |
| 425 qname = original_value.identifier \ | |
| 426 if isinstance(original_value, ProvRecord) \ | |
| 427 else original_value | |
| 428 value = self._bundle.valid_qualified_name(qname) | |
| 429 elif attr in PROV_ATTRIBUTE_LITERALS: | |
| 430 value = original_value \ | |
| 431 if isinstance(original_value, datetime.datetime) \ | |
| 432 else parse_xsd_datetime(original_value) | |
| 433 else: | |
| 434 value = self._auto_literal_conversion(original_value) | |
| 435 | |
| 436 if value is None: | |
| 437 raise ProvException( | |
| 438 'Invalid value for attribute %s: %s' % | |
| 439 (attr, original_value) | |
| 440 ) | |
| 441 | |
| 442 if not is_collection and attr in PROV_ATTRIBUTES and \ | |
| 443 self._attributes[attr]: | |
| 444 existing_value = first(self._attributes[attr]) | |
| 445 is_not_same_value = True | |
| 446 try: | |
| 447 is_not_same_value = value != existing_value | |
| 448 except TypeError: | |
| 449 # Cannot compare them | |
| 450 pass # consider them different values | |
| 451 | |
| 452 if is_not_same_value: | |
| 453 raise ProvException( | |
| 454 'Cannot have more than one value for attribute %s' | |
| 455 % attr | |
| 456 ) | |
| 457 else: | |
| 458 # Same value, ignore it | |
| 459 continue | |
| 460 | |
| 461 self._attributes[attr].add(value) | |
| 462 | |
| 463 def __eq__(self, other): | |
| 464 if self.get_type() != other.get_type(): | |
| 465 return False | |
| 466 if self._identifier and not (self._identifier == other._identifier): | |
| 467 return False | |
| 468 | |
| 469 return set(self.attributes) == set(other.attributes) | |
| 470 | |
| 471 def __str__(self): | |
| 472 return self.get_provn() | |
| 473 | |
| 474 def get_provn(self): | |
| 475 """ | |
| 476 Returns the PROV-N representation of the record. | |
| 477 | |
| 478 :return: String | |
| 479 """ | |
| 480 items = [] | |
| 481 | |
| 482 # Generating identifier | |
| 483 relation_id = '' # default blank | |
| 484 if self._identifier: | |
| 485 identifier = six.text_type(self._identifier) # TODO: QName export | |
| 486 if self.is_element(): | |
| 487 items.append(identifier) | |
| 488 else: | |
| 489 # this is a relation | |
| 490 # relations use ; to separate identifiers | |
| 491 relation_id = identifier + '; ' | |
| 492 | |
| 493 # Writing out the formal attributes | |
| 494 for attr in self.FORMAL_ATTRIBUTES: | |
| 495 if attr in self._attributes and self._attributes[attr]: | |
| 496 # Formal attributes always have single values | |
| 497 value = first(self._attributes[attr]) | |
| 498 # TODO: QName export | |
| 499 items.append( | |
| 500 value.isoformat() if isinstance(value, datetime.datetime) | |
| 501 else six.text_type(value) | |
| 502 ) | |
| 503 else: | |
| 504 items.append('-') | |
| 505 | |
| 506 # Writing out the remaining attributes | |
| 507 extra = [] | |
| 508 for attr in self._attributes: | |
| 509 if attr not in self.FORMAL_ATTRIBUTES: | |
| 510 for value in self._attributes[attr]: | |
| 511 try: | |
| 512 # try if there is a prov-n representation defined | |
| 513 provn_represenation = value.provn_representation() | |
| 514 except AttributeError: | |
| 515 provn_represenation = encoding_provn_value(value) | |
| 516 # TODO: QName export | |
| 517 extra.append( | |
| 518 '%s=%s' % (six.text_type(attr), provn_represenation) | |
| 519 ) | |
| 520 | |
| 521 if extra: | |
| 522 items.append('[%s]' % ', '.join(extra)) | |
| 523 prov_n = '%s(%s%s)' % ( | |
| 524 PROV_N_MAP[self.get_type()], relation_id, ', '.join(items) | |
| 525 ) | |
| 526 return prov_n | |
| 527 | |
| 528 def is_element(self): | |
| 529 """ | |
| 530 True, if the record is an element, False otherwise. | |
| 531 | |
| 532 :return: bool | |
| 533 """ | |
| 534 return False | |
| 535 | |
| 536 def is_relation(self): | |
| 537 """ | |
| 538 True, if the record is a relation, False otherwise. | |
| 539 | |
| 540 :return: bool | |
| 541 """ | |
| 542 return False | |
| 543 | |
| 544 | |
| 545 # Abstract classes for elements and relations | |
| 546 class ProvElement(ProvRecord): | |
| 547 """Provenance Element (nodes in the provenance graph).""" | |
| 548 | |
| 549 def __init__(self, bundle, identifier, attributes=None): | |
| 550 if identifier is None: | |
| 551 # All types of PROV elements require a valid identifier | |
| 552 raise ProvElementIdentifierRequired() | |
| 553 | |
| 554 super(ProvElement, self).__init__(bundle, identifier, attributes) | |
| 555 | |
| 556 def is_element(self): | |
| 557 """ | |
| 558 True, if the record is an element, False otherwise. | |
| 559 | |
| 560 :return: bool | |
| 561 """ | |
| 562 return True | |
| 563 | |
| 564 def __repr__(self): | |
| 565 return '<%s: %s>' % (self.__class__.__name__, self._identifier) | |
| 566 | |
| 567 | |
| 568 class ProvRelation(ProvRecord): | |
| 569 """Provenance Relationship (edge between nodes).""" | |
| 570 | |
| 571 def is_relation(self): | |
| 572 """ | |
| 573 True, if the record is a relation, False otherwise. | |
| 574 | |
| 575 :return: bool | |
| 576 """ | |
| 577 return True | |
| 578 | |
| 579 def __repr__(self): | |
| 580 identifier = ' %s' % self._identifier if self._identifier else '' | |
| 581 element_1, element_2 = [ | |
| 582 qname for _, qname in self.formal_attributes[:2] | |
| 583 ] | |
| 584 return '<%s:%s (%s, %s)>' % ( | |
| 585 self.__class__.__name__, identifier, element_1, element_2 | |
| 586 ) | |
| 587 | |
| 588 | |
| 589 # Component 1: Entities and Activities | |
| 590 class ProvEntity(ProvElement): | |
| 591 """Provenance Entity element""" | |
| 592 | |
| 593 _prov_type = PROV_ENTITY | |
| 594 | |
| 595 # Convenient assertions that take the current ProvEntity as the first | |
| 596 # (formal) argument | |
| 597 def wasGeneratedBy(self, activity, time=None, attributes=None): | |
| 598 """ | |
| 599 Creates a new generation record to this entity. | |
| 600 | |
| 601 :param activity: Activity or string identifier of the activity involved in | |
| 602 the generation (default: None). | |
| 603 :param time: Optional time for the generation (default: None). | |
| 604 Either a :py:class:`datetime.datetime` object or a string that can be | |
| 605 parsed by :py:func:`dateutil.parser`. | |
| 606 :param attributes: Optional other attributes as a dictionary or list | |
| 607 of tuples to be added to the record optionally (default: None). | |
| 608 """ | |
| 609 self._bundle.generation( | |
| 610 self, activity, time, other_attributes=attributes | |
| 611 ) | |
| 612 return self | |
| 613 | |
| 614 def wasInvalidatedBy(self, activity, time=None, attributes=None): | |
| 615 """ | |
| 616 Creates a new invalidation record for this entity. | |
| 617 | |
| 618 :param activity: Activity or string identifier of the activity involved in | |
| 619 the invalidation (default: None). | |
| 620 :param time: Optional time for the invalidation (default: None). | |
| 621 Either a :py:class:`datetime.datetime` object or a string that can be | |
| 622 parsed by :py:func:`dateutil.parser`. | |
| 623 :param attributes: Optional other attributes as a dictionary or list | |
| 624 of tuples to be added to the record optionally (default: None). | |
| 625 """ | |
| 626 self._bundle.invalidation( | |
| 627 self, activity, time, other_attributes=attributes | |
| 628 ) | |
| 629 return self | |
| 630 | |
| 631 def wasDerivedFrom(self, usedEntity, activity=None, generation=None, | |
| 632 usage=None, attributes=None): | |
| 633 """ | |
| 634 Creates a new derivation record for this entity from a used entity. | |
| 635 | |
| 636 :param usedEntity: Entity or a string identifier for the used entity. | |
| 637 :param activity: Activity or string identifier of the activity involved in | |
| 638 the derivation (default: None). | |
| 639 :param generation: Optionally extra activity to state qualified derivation | |
| 640 through an internal generation (default: None). | |
| 641 :param usage: Optionally extra entity to state qualified derivation through | |
| 642 an internal usage (default: None). | |
| 643 :param attributes: Optional other attributes as a dictionary or list | |
| 644 of tuples to be added to the record optionally (default: None). | |
| 645 """ | |
| 646 self._bundle.derivation( | |
| 647 self, usedEntity, activity, generation, usage, | |
| 648 other_attributes=attributes | |
| 649 ) | |
| 650 return self | |
| 651 | |
| 652 def wasAttributedTo(self, agent, attributes=None): | |
| 653 """ | |
| 654 Creates a new attribution record between this entity and an agent. | |
| 655 | |
| 656 :param agent: Agent or string identifier of the agent involved in the | |
| 657 attribution. | |
| 658 :param attributes: Optional other attributes as a dictionary or list | |
| 659 of tuples to be added to the record optionally (default: None). | |
| 660 """ | |
| 661 self._bundle.attribution(self, agent, other_attributes=attributes) | |
| 662 return self | |
| 663 | |
| 664 def alternateOf(self, alternate2): | |
| 665 """ | |
| 666 Creates a new alternate record between this and another entity. | |
| 667 | |
| 668 :param alternate2: Entity or a string identifier for the second entity. | |
| 669 """ | |
| 670 self._bundle.alternate(self, alternate2) | |
| 671 return self | |
| 672 | |
| 673 def specializationOf(self, generalEntity): | |
| 674 """ | |
| 675 Creates a new specialisation record for this from a general entity. | |
| 676 | |
| 677 :param generalEntity: Entity or a string identifier for the general entity. | |
| 678 """ | |
| 679 self._bundle.specialization(self, generalEntity) | |
| 680 return self | |
| 681 | |
| 682 def hadMember(self, entity): | |
| 683 """ | |
| 684 Creates a new membership record to an entity for a collection. | |
| 685 | |
| 686 :param entity: Entity to be added to the collection. | |
| 687 """ | |
| 688 self._bundle.membership(self, entity) | |
| 689 return self | |
| 690 | |
| 691 | |
| 692 class ProvActivity(ProvElement): | |
| 693 """Provenance Activity element.""" | |
| 694 | |
| 695 FORMAL_ATTRIBUTES = (PROV_ATTR_STARTTIME, PROV_ATTR_ENDTIME) | |
| 696 | |
| 697 _prov_type = PROV_ACTIVITY | |
| 698 | |
| 699 # Convenient methods | |
| 700 def set_time(self, startTime=None, endTime=None): | |
| 701 """ | |
| 702 Sets the time this activity took place. | |
| 703 | |
| 704 :param startTime: Start time for the activity. | |
| 705 Either a :py:class:`datetime.datetime` object or a string that can be | |
| 706 parsed by :py:func:`dateutil.parser`. | |
| 707 :param endTime: Start time for the activity. | |
| 708 Either a :py:class:`datetime.datetime` object or a string that can be | |
| 709 parsed by :py:func:`dateutil.parser`. | |
| 710 """ | |
| 711 if startTime is not None: | |
| 712 self._attributes[PROV_ATTR_STARTTIME] = {startTime} | |
| 713 if endTime is not None: | |
| 714 self._attributes[PROV_ATTR_ENDTIME] = {endTime} | |
| 715 | |
| 716 def get_startTime(self): | |
| 717 """ | |
| 718 Returns the time the activity started. | |
| 719 | |
| 720 :return: :py:class:`datetime.datetime` | |
| 721 """ | |
| 722 values = self._attributes[PROV_ATTR_STARTTIME] | |
| 723 return first(values) if values else None | |
| 724 | |
| 725 def get_endTime(self): | |
| 726 """ | |
| 727 Returns the time the activity ended. | |
| 728 | |
| 729 :return: :py:class:`datetime.datetime` | |
| 730 """ | |
| 731 values = self._attributes[PROV_ATTR_ENDTIME] | |
| 732 return first(values) if values else None | |
| 733 | |
| 734 # Convenient assertions that take the current ProvActivity as the first | |
| 735 # (formal) argument | |
| 736 def used(self, entity, time=None, attributes=None): | |
| 737 """ | |
| 738 Creates a new usage record for this activity. | |
| 739 | |
| 740 :param entity: Entity or string identifier of the entity involved in | |
| 741 the usage relationship (default: None). | |
| 742 :param time: Optional time for the usage (default: None). | |
| 743 Either a :py:class:`datetime.datetime` object or a string that can be | |
| 744 parsed by :py:func:`dateutil.parser`. | |
| 745 :param attributes: Optional other attributes as a dictionary or list | |
| 746 of tuples to be added to the record optionally (default: None). | |
| 747 """ | |
| 748 self._bundle.usage(self, entity, time, other_attributes=attributes) | |
| 749 return self | |
| 750 | |
| 751 def wasInformedBy(self, informant, attributes=None): | |
| 752 """ | |
| 753 Creates a new communication record for this activity. | |
| 754 | |
| 755 :param informant: The informing activity (relationship source). | |
| 756 :param attributes: Optional other attributes as a dictionary or list | |
| 757 of tuples to be added to the record optionally (default: None). | |
| 758 """ | |
| 759 self._bundle.communication( | |
| 760 self, informant, other_attributes=attributes | |
| 761 ) | |
| 762 return self | |
| 763 | |
| 764 def wasStartedBy(self, trigger, starter=None, time=None, attributes=None): | |
| 765 """ | |
| 766 Creates a new start record for this activity. The activity did not exist | |
| 767 before the start by the trigger. | |
| 768 | |
| 769 :param trigger: Entity triggering the start of this activity. | |
| 770 :param starter: Optionally extra activity to state a qualified start | |
| 771 through which the trigger entity for the start is generated | |
| 772 (default: None). | |
| 773 :param time: Optional time for the start (default: None). | |
| 774 Either a :py:class:`datetime.datetime` object or a string that can be | |
| 775 parsed by :py:func:`dateutil.parser`. | |
| 776 :param attributes: Optional other attributes as a dictionary or list | |
| 777 of tuples to be added to the record optionally (default: None). | |
| 778 """ | |
| 779 self._bundle.start( | |
| 780 self, trigger, starter, time, other_attributes=attributes | |
| 781 ) | |
| 782 return self | |
| 783 | |
| 784 def wasEndedBy(self, trigger, ender=None, time=None, attributes=None): | |
| 785 """ | |
| 786 Creates a new end record for this activity. | |
| 787 | |
| 788 :param trigger: Entity triggering the end of this activity. | |
| 789 :param ender: Optionally extra activity to state a qualified end through | |
| 790 which the trigger entity for the end is generated (default: None). | |
| 791 :param time: Optional time for the end (default: None). | |
| 792 Either a :py:class:`datetime.datetime` object or a string that can be | |
| 793 parsed by :py:func:`dateutil.parser`. | |
| 794 :param attributes: Optional other attributes as a dictionary or list | |
| 795 of tuples to be added to the record optionally (default: None). | |
| 796 """ | |
| 797 self._bundle.end( | |
| 798 self, trigger, ender, time, other_attributes=attributes | |
| 799 ) | |
| 800 return self | |
| 801 | |
| 802 def wasAssociatedWith(self, agent, plan=None, attributes=None): | |
| 803 """ | |
| 804 Creates a new association record for this activity. | |
| 805 | |
| 806 :param agent: Agent or string identifier of the agent involved in the | |
| 807 association (default: None). | |
| 808 :param plan: Optionally extra entity to state qualified association through | |
| 809 an internal plan (default: None). | |
| 810 :param attributes: Optional other attributes as a dictionary or list | |
| 811 of tuples to be added to the record optionally (default: None). | |
| 812 """ | |
| 813 self._bundle.association( | |
| 814 self, agent, plan, other_attributes=attributes | |
| 815 ) | |
| 816 return self | |
| 817 | |
| 818 | |
| 819 class ProvGeneration(ProvRelation): | |
| 820 """Provenance Generation relationship.""" | |
| 821 | |
| 822 FORMAL_ATTRIBUTES = (PROV_ATTR_ENTITY, PROV_ATTR_ACTIVITY, PROV_ATTR_TIME) | |
| 823 | |
| 824 _prov_type = PROV_GENERATION | |
| 825 | |
| 826 | |
| 827 class ProvUsage(ProvRelation): | |
| 828 """Provenance Usage relationship.""" | |
| 829 | |
| 830 FORMAL_ATTRIBUTES = (PROV_ATTR_ACTIVITY, PROV_ATTR_ENTITY, PROV_ATTR_TIME) | |
| 831 | |
| 832 _prov_type = PROV_USAGE | |
| 833 | |
| 834 | |
| 835 class ProvCommunication(ProvRelation): | |
| 836 """Provenance Communication relationship.""" | |
| 837 | |
| 838 FORMAL_ATTRIBUTES = (PROV_ATTR_INFORMED, PROV_ATTR_INFORMANT) | |
| 839 | |
| 840 _prov_type = PROV_COMMUNICATION | |
| 841 | |
| 842 | |
| 843 class ProvStart(ProvRelation): | |
| 844 """Provenance Start relationship.""" | |
| 845 | |
| 846 FORMAL_ATTRIBUTES = (PROV_ATTR_ACTIVITY, PROV_ATTR_TRIGGER, | |
| 847 PROV_ATTR_STARTER, PROV_ATTR_TIME) | |
| 848 | |
| 849 _prov_type = PROV_START | |
| 850 | |
| 851 | |
| 852 class ProvEnd(ProvRelation): | |
| 853 """Provenance End relationship.""" | |
| 854 | |
| 855 FORMAL_ATTRIBUTES = (PROV_ATTR_ACTIVITY, PROV_ATTR_TRIGGER, | |
| 856 PROV_ATTR_ENDER, PROV_ATTR_TIME) | |
| 857 | |
| 858 _prov_type = PROV_END | |
| 859 | |
| 860 | |
| 861 class ProvInvalidation(ProvRelation): | |
| 862 """Provenance Invalidation relationship.""" | |
| 863 | |
| 864 FORMAL_ATTRIBUTES = (PROV_ATTR_ENTITY, PROV_ATTR_ACTIVITY, PROV_ATTR_TIME) | |
| 865 | |
| 866 _prov_type = PROV_INVALIDATION | |
| 867 | |
| 868 | |
| 869 # Component 2: Derivations | |
| 870 class ProvDerivation(ProvRelation): | |
| 871 """Provenance Derivation relationship.""" | |
| 872 | |
| 873 FORMAL_ATTRIBUTES = (PROV_ATTR_GENERATED_ENTITY, PROV_ATTR_USED_ENTITY, | |
| 874 PROV_ATTR_ACTIVITY, PROV_ATTR_GENERATION, | |
| 875 PROV_ATTR_USAGE) | |
| 876 | |
| 877 _prov_type = PROV_DERIVATION | |
| 878 | |
| 879 | |
| 880 # Component 3: Agents, Responsibility, and Influence | |
| 881 class ProvAgent(ProvElement): | |
| 882 """Provenance Agent element.""" | |
| 883 | |
| 884 _prov_type = PROV_AGENT | |
| 885 | |
| 886 # Convenient assertions that take the current ProvAgent as the first | |
| 887 # (formal) argument | |
| 888 def actedOnBehalfOf(self, responsible, activity=None, attributes=None): | |
| 889 """ | |
| 890 Creates a new delegation record on behalf of this agent. | |
| 891 | |
| 892 :param responsible: Agent the responsibility is delegated to. | |
| 893 :param activity: Optionally extra activity to state qualified delegation | |
| 894 internally (default: None). | |
| 895 :param attributes: Optional other attributes as a dictionary or list | |
| 896 of tuples to be added to the record optionally (default: None). | |
| 897 """ | |
| 898 self._bundle.delegation( | |
| 899 self, responsible, activity, other_attributes=attributes | |
| 900 ) | |
| 901 return self | |
| 902 | |
| 903 | |
| 904 class ProvAttribution(ProvRelation): | |
| 905 """Provenance Attribution relationship.""" | |
| 906 | |
| 907 FORMAL_ATTRIBUTES = (PROV_ATTR_ENTITY, PROV_ATTR_AGENT) | |
| 908 | |
| 909 _prov_type = PROV_ATTRIBUTION | |
| 910 | |
| 911 | |
| 912 class ProvAssociation(ProvRelation): | |
| 913 """Provenance Association relationship.""" | |
| 914 | |
| 915 FORMAL_ATTRIBUTES = (PROV_ATTR_ACTIVITY, PROV_ATTR_AGENT, PROV_ATTR_PLAN) | |
| 916 | |
| 917 _prov_type = PROV_ASSOCIATION | |
| 918 | |
| 919 | |
| 920 class ProvDelegation(ProvRelation): | |
| 921 """Provenance Delegation relationship.""" | |
| 922 | |
| 923 FORMAL_ATTRIBUTES = (PROV_ATTR_DELEGATE, PROV_ATTR_RESPONSIBLE, | |
| 924 PROV_ATTR_ACTIVITY) | |
| 925 | |
| 926 _prov_type = PROV_DELEGATION | |
| 927 | |
| 928 | |
| 929 class ProvInfluence(ProvRelation): | |
| 930 """Provenance Influence relationship.""" | |
| 931 | |
| 932 FORMAL_ATTRIBUTES = (PROV_ATTR_INFLUENCEE, PROV_ATTR_INFLUENCER) | |
| 933 | |
| 934 _prov_type = PROV_INFLUENCE | |
| 935 | |
| 936 | |
| 937 # Component 5: Alternate Entities | |
| 938 class ProvSpecialization(ProvRelation): | |
| 939 """Provenance Specialization relationship.""" | |
| 940 | |
| 941 FORMAL_ATTRIBUTES = (PROV_ATTR_SPECIFIC_ENTITY, PROV_ATTR_GENERAL_ENTITY) | |
| 942 | |
| 943 _prov_type = PROV_SPECIALIZATION | |
| 944 | |
| 945 | |
| 946 class ProvAlternate(ProvRelation): | |
| 947 """Provenance Alternate relationship.""" | |
| 948 | |
| 949 FORMAL_ATTRIBUTES = (PROV_ATTR_ALTERNATE1, PROV_ATTR_ALTERNATE2) | |
| 950 | |
| 951 _prov_type = PROV_ALTERNATE | |
| 952 | |
| 953 | |
| 954 class ProvMention(ProvSpecialization): | |
| 955 """Provenance Mention relationship (specific Specialization).""" | |
| 956 | |
| 957 FORMAL_ATTRIBUTES = (PROV_ATTR_SPECIFIC_ENTITY, PROV_ATTR_GENERAL_ENTITY, | |
| 958 PROV_ATTR_BUNDLE) | |
| 959 | |
| 960 _prov_type = PROV_MENTION | |
| 961 | |
| 962 | |
| 963 # Component 6: Collections | |
| 964 class ProvMembership(ProvRelation): | |
| 965 """Provenance Membership relationship.""" | |
| 966 | |
| 967 FORMAL_ATTRIBUTES = (PROV_ATTR_COLLECTION, PROV_ATTR_ENTITY) | |
| 968 | |
| 969 _prov_type = PROV_MEMBERSHIP | |
| 970 | |
| 971 | |
| 972 # Class mappings from PROV record type | |
| 973 PROV_REC_CLS = { | |
| 974 PROV_ENTITY: ProvEntity, | |
| 975 PROV_ACTIVITY: ProvActivity, | |
| 976 PROV_GENERATION: ProvGeneration, | |
| 977 PROV_USAGE: ProvUsage, | |
| 978 PROV_COMMUNICATION: ProvCommunication, | |
| 979 PROV_START: ProvStart, | |
| 980 PROV_END: ProvEnd, | |
| 981 PROV_INVALIDATION: ProvInvalidation, | |
| 982 PROV_DERIVATION: ProvDerivation, | |
| 983 PROV_AGENT: ProvAgent, | |
| 984 PROV_ATTRIBUTION: ProvAttribution, | |
| 985 PROV_ASSOCIATION: ProvAssociation, | |
| 986 PROV_DELEGATION: ProvDelegation, | |
| 987 PROV_INFLUENCE: ProvInfluence, | |
| 988 PROV_SPECIALIZATION: ProvSpecialization, | |
| 989 PROV_ALTERNATE: ProvAlternate, | |
| 990 PROV_MENTION: ProvMention, | |
| 991 PROV_MEMBERSHIP: ProvMembership, | |
| 992 } | |
| 993 | |
| 994 | |
| 995 DEFAULT_NAMESPACES = {'prov': PROV, 'xsd': XSD, 'xsi': XSI} | |
| 996 | |
| 997 | |
| 998 # Bundle | |
| 999 class NamespaceManager(dict): | |
| 1000 """Manages namespaces for PROV documents and bundles.""" | |
| 1001 | |
| 1002 parent = None | |
| 1003 """Parent :py:class:`NamespaceManager` this manager one is a child of.""" | |
| 1004 | |
| 1005 def __init__(self, namespaces=None, default=None, parent=None): | |
| 1006 """ | |
| 1007 Constructor. | |
| 1008 | |
| 1009 :param namespaces: Optional namespaces to add to the manager | |
| 1010 (default: None). | |
| 1011 :param default: Optional default namespace to use (default: None). | |
| 1012 :param parent: Optional parent :py:class:`NamespaceManager` to make this | |
| 1013 namespace manager a child of (default: None). | |
| 1014 """ | |
| 1015 dict.__init__(self) | |
| 1016 self._default_namespaces = DEFAULT_NAMESPACES | |
| 1017 self.update(self._default_namespaces) | |
| 1018 self._namespaces = {} | |
| 1019 | |
| 1020 if default is not None: | |
| 1021 self.set_default_namespace(default) | |
| 1022 else: | |
| 1023 self._default = None | |
| 1024 self.parent = parent | |
| 1025 # TODO check if default is in the default namespaces | |
| 1026 self._anon_id_count = 0 | |
| 1027 self._uri_map = dict() | |
| 1028 self._rename_map = dict() | |
| 1029 self._prefix_renamed_map = dict() | |
| 1030 self.add_namespaces(namespaces) | |
| 1031 | |
| 1032 def get_namespace(self, uri): | |
| 1033 """ | |
| 1034 Returns the namespace prefix for the given URI. | |
| 1035 | |
| 1036 :param uri: Namespace URI. | |
| 1037 :return: :py:class:`~prov.identifier.Namespace`. | |
| 1038 """ | |
| 1039 for namespace in self.values(): | |
| 1040 if uri == namespace._uri: | |
| 1041 return namespace | |
| 1042 return None | |
| 1043 | |
| 1044 def get_registered_namespaces(self): | |
| 1045 """ | |
| 1046 Returns all registered namespaces. | |
| 1047 | |
| 1048 :return: Iterable of :py:class:`~prov.identifier.Namespace`. | |
| 1049 """ | |
| 1050 return self._namespaces.values() | |
| 1051 | |
| 1052 def set_default_namespace(self, uri): | |
| 1053 """ | |
| 1054 Sets the default namespace to the one of a given URI. | |
| 1055 | |
| 1056 :param uri: Namespace URI. | |
| 1057 """ | |
| 1058 self._default = Namespace('', uri) | |
| 1059 self[''] = self._default | |
| 1060 | |
| 1061 def get_default_namespace(self): | |
| 1062 """ | |
| 1063 Returns the default namespace. | |
| 1064 | |
| 1065 :return: :py:class:`~prov.identifier.Namespace` | |
| 1066 """ | |
| 1067 return self._default | |
| 1068 | |
| 1069 def add_namespace(self, namespace): | |
| 1070 """ | |
| 1071 Adds a namespace (if not available, yet). | |
| 1072 | |
| 1073 :param namespace: :py:class:`~prov.identifier.Namespace` to add. | |
| 1074 """ | |
| 1075 if namespace in self.values(): | |
| 1076 # no need to do anything | |
| 1077 return namespace | |
| 1078 if namespace in self._rename_map: | |
| 1079 # already renamed and added | |
| 1080 return self._rename_map[namespace] | |
| 1081 | |
| 1082 # Checking if the URI has been defined and use the existing namespace | |
| 1083 # instead | |
| 1084 uri = namespace.uri | |
| 1085 prefix = namespace.prefix | |
| 1086 | |
| 1087 if uri in self._uri_map: | |
| 1088 existing_ns = self._uri_map[uri] | |
| 1089 self._rename_map[namespace] = existing_ns | |
| 1090 self._prefix_renamed_map[prefix] = existing_ns | |
| 1091 return existing_ns | |
| 1092 | |
| 1093 if prefix in self: | |
| 1094 # Conflicting prefix | |
| 1095 new_prefix = self._get_unused_prefix(prefix) | |
| 1096 new_namespace = Namespace(new_prefix, namespace.uri) | |
| 1097 self._rename_map[namespace] = new_namespace | |
| 1098 # TODO: What if the prefix is already in the map and point to a | |
| 1099 # different Namespace? Raise an exception? | |
| 1100 self._prefix_renamed_map[prefix] = new_namespace | |
| 1101 prefix = new_prefix | |
| 1102 namespace = new_namespace | |
| 1103 | |
| 1104 # Only now add the namespace to the registry | |
| 1105 self._namespaces[prefix] = namespace | |
| 1106 self[prefix] = namespace | |
| 1107 self._uri_map[uri] = namespace | |
| 1108 | |
| 1109 return namespace | |
| 1110 | |
| 1111 def add_namespaces(self, namespaces): | |
| 1112 """ | |
| 1113 Add multiple namespaces into this manager. | |
| 1114 | |
| 1115 :param namespaces: A collection of namespace(s) to add. | |
| 1116 :type namespaces: List of :py:class:`~prov.identifier.Namespace` or | |
| 1117 dict of {prefix: uri}. | |
| 1118 :returns: None | |
| 1119 """ | |
| 1120 if isinstance(namespaces, dict): | |
| 1121 # expecting a dictionary of {prefix: uri}, | |
| 1122 # convert it to a list of Namespace | |
| 1123 namespaces = [ | |
| 1124 Namespace(prefix, uri) for prefix, uri in namespaces.items() | |
| 1125 ] | |
| 1126 if namespaces: | |
| 1127 for ns in namespaces: | |
| 1128 self.add_namespace(ns) | |
| 1129 | |
| 1130 def valid_qualified_name(self, qname): | |
| 1131 """ | |
| 1132 Resolves an identifier to a valid qualified name. | |
| 1133 | |
| 1134 :param qname: Qualified name as :py:class:`~prov.identifier.QualifiedName` | |
| 1135 or a tuple (namespace, identifier). | |
| 1136 :return: :py:class:`~prov.identifier.QualifiedName` or None in case of | |
| 1137 failure. | |
| 1138 """ | |
| 1139 if not qname: | |
| 1140 return None | |
| 1141 | |
| 1142 if isinstance(qname, QualifiedName): | |
| 1143 # Register the namespace if it has not been registered before | |
| 1144 namespace = qname.namespace | |
| 1145 prefix = namespace.prefix | |
| 1146 local_part = qname.localpart | |
| 1147 if not prefix: | |
| 1148 # the namespace is a default namespace | |
| 1149 if self._default == namespace: | |
| 1150 # the same default namespace is defined | |
| 1151 new_qname = self._default[local_part] | |
| 1152 elif self._default is None: | |
| 1153 # no default namespace is defined, reused the one given | |
| 1154 self._default = namespace | |
| 1155 return qname # no change, return the original | |
| 1156 else: | |
| 1157 # different default namespace, | |
| 1158 # use the 'dn' prefix for the new namespace | |
| 1159 dn_namespace = Namespace('dn', namespace.uri) | |
| 1160 dn_namespace = self.add_namespace(dn_namespace) | |
| 1161 new_qname = dn_namespace[local_part] | |
| 1162 elif prefix in self and self[prefix] == namespace: | |
| 1163 # No need to add the namespace | |
| 1164 existing_ns = self[prefix] | |
| 1165 if existing_ns is namespace: | |
| 1166 return qname | |
| 1167 else: | |
| 1168 # reuse the existing namespace | |
| 1169 new_qname = existing_ns[local_part] | |
| 1170 else: | |
| 1171 # Do not reuse the namespace object | |
| 1172 ns = self.add_namespace(deepcopy(namespace)) | |
| 1173 # minting the same Qualified Name from the namespace's copy | |
| 1174 new_qname = ns[qname.localpart] | |
| 1175 # returning the new qname | |
| 1176 return new_qname | |
| 1177 | |
| 1178 # Trying to guess from here | |
| 1179 if not isinstance(qname, (six.string_types, Identifier)): | |
| 1180 # Only proceed for string or URI values | |
| 1181 return None | |
| 1182 # Try to generate a Qualified Name | |
| 1183 str_value = \ | |
| 1184 qname.uri if isinstance(qname, Identifier) else six.text_type(qname) | |
| 1185 if str_value.startswith('_:'): | |
| 1186 # this is a blank node ID | |
| 1187 return None | |
| 1188 elif ':' in str_value: | |
| 1189 # check if the identifier contains a registered prefix | |
| 1190 prefix, local_part = str_value.split(':', 1) | |
| 1191 if prefix in self: | |
| 1192 # return a new QualifiedName | |
| 1193 return self[prefix][local_part] | |
| 1194 if prefix in self._prefix_renamed_map: | |
| 1195 # return a new QualifiedName | |
| 1196 return self._prefix_renamed_map[prefix][local_part] | |
| 1197 else: | |
| 1198 # treat as a URI (with the first part as its scheme) | |
| 1199 # check if the URI can be compacted | |
| 1200 for namespace in self.values(): | |
| 1201 if str_value.startswith(namespace.uri): | |
| 1202 # create a QName with the namespace | |
| 1203 return namespace[str_value.replace(namespace.uri, '')] | |
| 1204 elif self._default: | |
| 1205 # create and return an identifier in the default namespace | |
| 1206 return self._default[qname] | |
| 1207 | |
| 1208 if self.parent: | |
| 1209 # all attempts have failed so far | |
| 1210 # now delegate this to the parent NamespaceManager | |
| 1211 return self.parent.valid_qualified_name(qname) | |
| 1212 | |
| 1213 # Default to FAIL | |
| 1214 return None | |
| 1215 | |
| 1216 def get_anonymous_identifier(self, local_prefix='id'): | |
| 1217 """ | |
| 1218 Returns an anonymous identifier (without a namespace prefix). | |
| 1219 | |
| 1220 :param local_prefix: Optional local namespace prefix as a string | |
| 1221 (default: 'id'). | |
| 1222 :return: :py:class:`~prov.identifier.Identifier` | |
| 1223 """ | |
| 1224 self._anon_id_count += 1 | |
| 1225 return Identifier('_:%s%d' % (local_prefix, self._anon_id_count)) | |
| 1226 | |
| 1227 def _get_unused_prefix(self, original_prefix): | |
| 1228 if original_prefix not in self: | |
| 1229 return original_prefix | |
| 1230 count = 1 | |
| 1231 while True: | |
| 1232 new_prefix = '_'.join((original_prefix, six.text_type(count))) | |
| 1233 if new_prefix in self: | |
| 1234 count += 1 | |
| 1235 else: | |
| 1236 return new_prefix | |
| 1237 | |
| 1238 | |
| 1239 class ProvBundle(object): | |
| 1240 """PROV Bundle""" | |
| 1241 | |
| 1242 def __init__(self, records=None, identifier=None, namespaces=None, | |
| 1243 document=None): | |
| 1244 """ | |
| 1245 Constructor. | |
| 1246 | |
| 1247 :param records: Optional iterable of records to add to the bundle | |
| 1248 (default: None). | |
| 1249 :param identifier: Optional identifier of the bundle (default: None). | |
| 1250 :param namespaces: Optional iterable of :py:class:`~prov.identifier.Namespace`s | |
| 1251 to set the document up with (default: None). | |
| 1252 :param document: Optional document to add to the bundle (default: None). | |
| 1253 """ | |
| 1254 # Initializing bundle-specific attributes | |
| 1255 self._identifier = identifier | |
| 1256 self._records = list() | |
| 1257 self._id_map = defaultdict(list) | |
| 1258 self._document = document | |
| 1259 self._namespaces = NamespaceManager( | |
| 1260 namespaces, | |
| 1261 parent=(document._namespaces if document is not None else None) | |
| 1262 ) | |
| 1263 if records: | |
| 1264 for record in records: | |
| 1265 self.add_record(record) | |
| 1266 | |
| 1267 def __repr__(self): | |
| 1268 return '<%s: %s>' % (self.__class__.__name__, self._identifier) | |
| 1269 | |
| 1270 @property | |
| 1271 def namespaces(self): | |
| 1272 """ | |
| 1273 Returns the set of registered namespaces. | |
| 1274 | |
| 1275 :return: Set of :py:class:`~prov.identifier.Namespace`. | |
| 1276 """ | |
| 1277 return set(self._namespaces.get_registered_namespaces()) | |
| 1278 | |
| 1279 @property | |
| 1280 def default_ns_uri(self): | |
| 1281 """ | |
| 1282 Returns the default namespace's URI, if any. | |
| 1283 | |
| 1284 :return: URI as string. | |
| 1285 """ | |
| 1286 default_ns = self._namespaces.get_default_namespace() | |
| 1287 return default_ns.uri if default_ns else None | |
| 1288 | |
| 1289 @property | |
| 1290 def document(self): | |
| 1291 """ | |
| 1292 Returns the parent document, if any. | |
| 1293 | |
| 1294 :return: :py:class:`ProvDocument`. | |
| 1295 """ | |
| 1296 return self._document | |
| 1297 | |
| 1298 @property | |
| 1299 def identifier(self): | |
| 1300 """ | |
| 1301 Returns the bundle's identifier | |
| 1302 """ | |
| 1303 return self._identifier | |
| 1304 | |
| 1305 @property | |
| 1306 def records(self): | |
| 1307 """ | |
| 1308 Returns the list of all records in the current bundle | |
| 1309 """ | |
| 1310 return list(self._records) | |
| 1311 | |
| 1312 # Bundle configurations | |
| 1313 def set_default_namespace(self, uri): | |
| 1314 """ | |
| 1315 Sets the default namespace through a given URI. | |
| 1316 | |
| 1317 :param uri: Namespace URI. | |
| 1318 """ | |
| 1319 self._namespaces.set_default_namespace(uri) | |
| 1320 | |
| 1321 def get_default_namespace(self): | |
| 1322 """ | |
| 1323 Returns the default namespace. | |
| 1324 | |
| 1325 :return: :py:class:`~prov.identifier.Namespace` | |
| 1326 """ | |
| 1327 return self._namespaces.get_default_namespace() | |
| 1328 | |
| 1329 def add_namespace(self, namespace_or_prefix, uri=None): | |
| 1330 """ | |
| 1331 Adds a namespace (if not available, yet). | |
| 1332 | |
| 1333 :param namespace_or_prefix: :py:class:`~prov.identifier.Namespace` or its | |
| 1334 prefix as a string to add. | |
| 1335 :param uri: Namespace URI (default: None). Must be present if only a | |
| 1336 prefix is given in the previous parameter. | |
| 1337 """ | |
| 1338 if uri is None: | |
| 1339 return self._namespaces.add_namespace(namespace_or_prefix) | |
| 1340 else: | |
| 1341 return self._namespaces.add_namespace( | |
| 1342 Namespace(namespace_or_prefix, uri) | |
| 1343 ) | |
| 1344 | |
| 1345 def get_registered_namespaces(self): | |
| 1346 """ | |
| 1347 Returns all registered namespaces. | |
| 1348 | |
| 1349 :return: Iterable of :py:class:`~prov.identifier.Namespace`. | |
| 1350 """ | |
| 1351 return self._namespaces.get_registered_namespaces() | |
| 1352 | |
| 1353 def valid_qualified_name(self, identifier): | |
| 1354 return self._namespaces.valid_qualified_name(identifier) | |
| 1355 | |
| 1356 def get_records(self, class_or_type_or_tuple=None): | |
| 1357 """ | |
| 1358 Returns all records. Returned records may be filtered by the optional | |
| 1359 argument. | |
| 1360 | |
| 1361 :param class_or_type_or_tuple: A filter on the type for which records are | |
| 1362 to be returned (default: None). The filter checks by the type of the | |
| 1363 record using the `isinstance` check on the record. | |
| 1364 :return: List of :py:class:`ProvRecord` objects. | |
| 1365 """ | |
| 1366 results = list(self._records) | |
| 1367 if class_or_type_or_tuple: | |
| 1368 return filter( | |
| 1369 lambda rec: isinstance(rec, class_or_type_or_tuple), results | |
| 1370 ) | |
| 1371 else: | |
| 1372 return results | |
| 1373 | |
| 1374 def get_record(self, identifier): | |
| 1375 """ | |
| 1376 Returns a specific record matching a given identifier. | |
| 1377 | |
| 1378 :param identifier: Record identifier. | |
| 1379 :return: :py:class:`ProvRecord` | |
| 1380 """ | |
| 1381 # TODO: This will not work with the new _id_map, which is now a map of | |
| 1382 # (QName, list(ProvRecord)) | |
| 1383 if identifier is None: | |
| 1384 return None | |
| 1385 valid_id = self.valid_qualified_name(identifier) | |
| 1386 try: | |
| 1387 return self._id_map[valid_id] | |
| 1388 except KeyError: | |
| 1389 # looking up the parent bundle | |
| 1390 if self.is_bundle(): | |
| 1391 return self.document.get_record(valid_id) | |
| 1392 else: | |
| 1393 return None | |
| 1394 | |
| 1395 # Miscellaneous functions | |
| 1396 def is_document(self): | |
| 1397 """ | |
| 1398 `True` if the object is a document, `False` otherwise. | |
| 1399 | |
| 1400 :return: bool | |
| 1401 """ | |
| 1402 return False | |
| 1403 | |
| 1404 def is_bundle(self): | |
| 1405 """ | |
| 1406 `True` if the object is a bundle, `False` otherwise. | |
| 1407 | |
| 1408 :return: bool | |
| 1409 """ | |
| 1410 return True | |
| 1411 | |
| 1412 def has_bundles(self): | |
| 1413 """ | |
| 1414 `True` if the object has at least one bundle, `False` otherwise. | |
| 1415 | |
| 1416 :return: bool | |
| 1417 """ | |
| 1418 return False | |
| 1419 | |
| 1420 @property | |
| 1421 def bundles(self): | |
| 1422 """ | |
| 1423 Returns bundles contained in the document | |
| 1424 | |
| 1425 :return: Iterable of :py:class:`ProvBundle`. | |
| 1426 """ | |
| 1427 return frozenset() | |
| 1428 | |
| 1429 def get_provn(self, _indent_level=0): | |
| 1430 """ | |
| 1431 Returns the PROV-N representation of the bundle. | |
| 1432 | |
| 1433 :return: String | |
| 1434 """ | |
| 1435 indentation = '' + (' ' * _indent_level) | |
| 1436 newline = '\n' + (' ' * (_indent_level + 1)) | |
| 1437 | |
| 1438 # if this is the document, start the document; | |
| 1439 # otherwise, start the bundle | |
| 1440 lines = ['document'] if self.is_document() \ | |
| 1441 else ['bundle %s' % self._identifier] | |
| 1442 | |
| 1443 default_namespace = self._namespaces.get_default_namespace() | |
| 1444 if default_namespace: | |
| 1445 lines.append('default <%s>' % default_namespace.uri) | |
| 1446 | |
| 1447 registered_namespaces = self._namespaces.get_registered_namespaces() | |
| 1448 if registered_namespaces: | |
| 1449 lines.extend( | |
| 1450 ['prefix %s <%s>' % (namespace.prefix, namespace.uri) | |
| 1451 for namespace in registered_namespaces] | |
| 1452 ) | |
| 1453 | |
| 1454 if default_namespace or registered_namespaces: | |
| 1455 # a blank line between the prefixes and the assertions | |
| 1456 lines.append('') | |
| 1457 | |
| 1458 # adding all the records | |
| 1459 lines.extend([record.get_provn() for record in self._records]) | |
| 1460 if self.is_document(): | |
| 1461 # Print out bundles | |
| 1462 lines.extend( | |
| 1463 bundle.get_provn(_indent_level + 1) for bundle in self.bundles | |
| 1464 ) | |
| 1465 provn_str = newline.join(lines) + '\n' | |
| 1466 | |
| 1467 # closing the structure | |
| 1468 provn_str += indentation + ( | |
| 1469 'endDocument' if self.is_document() else 'endBundle' | |
| 1470 ) | |
| 1471 return provn_str | |
| 1472 | |
| 1473 def __eq__(self, other): | |
| 1474 if not isinstance(other, ProvBundle): | |
| 1475 return False | |
| 1476 other_records = set(other.get_records()) | |
| 1477 this_records = set(self.get_records()) | |
| 1478 if len(this_records) != len(other_records): | |
| 1479 return False | |
| 1480 # check if all records for equality | |
| 1481 for record_a in this_records: | |
| 1482 # Manually look for the record | |
| 1483 found = False | |
| 1484 for record_b in other_records: | |
| 1485 if record_a == record_b: | |
| 1486 other_records.remove(record_b) | |
| 1487 found = True | |
| 1488 break | |
| 1489 if not found: | |
| 1490 logger.debug( | |
| 1491 'Equality (ProvBundle): Could not find this record: %s', | |
| 1492 six.text_type(record_a) | |
| 1493 ) | |
| 1494 return False | |
| 1495 return True | |
| 1496 | |
| 1497 def __ne__(self, other): | |
| 1498 return not (self == other) | |
| 1499 | |
| 1500 __hash__ = None | |
| 1501 | |
| 1502 # Transformations | |
| 1503 def _unified_records(self): | |
| 1504 """Returns a list of unified records.""" | |
| 1505 # TODO: Check unification rules in the PROV-CONSTRAINTS document | |
| 1506 # This method simply merges the records having the same name | |
| 1507 merged_records = dict() | |
| 1508 for identifier, records in self._id_map.items(): | |
| 1509 if len(records) > 1: | |
| 1510 # more than one record having the same identifier | |
| 1511 # merge the records | |
| 1512 merged = records[0].copy() | |
| 1513 for record in records[1:]: | |
| 1514 merged.add_attributes(record.attributes) | |
| 1515 # map all of them to the merged record | |
| 1516 for record in records: | |
| 1517 merged_records[record] = merged | |
| 1518 if not merged_records: | |
| 1519 # No merging done, just return the list of original records | |
| 1520 return list(self._records) | |
| 1521 | |
| 1522 added_merged_records = set() | |
| 1523 unified_records = list() | |
| 1524 for record in self._records: | |
| 1525 if record in merged_records: | |
| 1526 merged = merged_records[record] | |
| 1527 if merged not in added_merged_records: | |
| 1528 unified_records.append(merged) | |
| 1529 added_merged_records.add(merged) | |
| 1530 else: | |
| 1531 # add the original record | |
| 1532 unified_records.append(record) | |
| 1533 return unified_records | |
| 1534 | |
| 1535 def unified(self): | |
| 1536 """ | |
| 1537 Unifies all records in the bundle that haves same identifiers | |
| 1538 | |
| 1539 :returns: :py:class:`ProvBundle` -- the new unified bundle. | |
| 1540 """ | |
| 1541 unified_records = self._unified_records() | |
| 1542 bundle = ProvBundle( | |
| 1543 records=unified_records, identifier=self.identifier | |
| 1544 ) | |
| 1545 return bundle | |
| 1546 | |
| 1547 def update(self, other): | |
| 1548 """ | |
| 1549 Append all the records of the *other* ProvBundle into this bundle. | |
| 1550 | |
| 1551 :param other: the other bundle whose records to be appended. | |
| 1552 :type other: :py:class:`ProvBundle` | |
| 1553 :returns: None. | |
| 1554 """ | |
| 1555 if isinstance(other, ProvBundle): | |
| 1556 if other.is_document() and other.has_bundles(): | |
| 1557 # Cannot add bundles to a bundle | |
| 1558 raise ProvException( | |
| 1559 'ProvBundle.update(): The other bundle is a document with ' | |
| 1560 'sub-bundle(s).' | |
| 1561 ) | |
| 1562 for record in other.get_records(): | |
| 1563 self.add_record(record) | |
| 1564 else: | |
| 1565 raise ProvException( | |
| 1566 'ProvBundle.update(): The other bundle is not a ProvBundle ' | |
| 1567 'instance (%s)' % type(other) | |
| 1568 ) | |
| 1569 | |
| 1570 # Provenance statements | |
| 1571 def _add_record(self, record): | |
| 1572 # IMPORTANT: All records need to be added to a bundle/document via this | |
| 1573 # method. Otherwise, the _id_map dict will not be correctly updated | |
| 1574 identifier = record.identifier | |
| 1575 if identifier is not None: | |
| 1576 self._id_map[identifier].append(record) | |
| 1577 self._records.append(record) | |
| 1578 | |
| 1579 def new_record(self, record_type, identifier, attributes=None, | |
| 1580 other_attributes=None): | |
| 1581 """ | |
| 1582 Creates a new record. | |
| 1583 | |
| 1584 :param record_type: Type of record (one of :py:const:`PROV_REC_CLS`). | |
| 1585 :param identifier: Identifier for new record. | |
| 1586 :param attributes: Attributes as a dictionary or list of tuples to be added | |
| 1587 to the record optionally (default: None). | |
| 1588 :param other_attributes: Optional other attributes as a dictionary or list | |
| 1589 of tuples to be added to the record optionally (default: None). | |
| 1590 """ | |
| 1591 attr_list = [] | |
| 1592 if attributes: | |
| 1593 if isinstance(attributes, dict): | |
| 1594 attr_list.extend( | |
| 1595 (attr, value) for attr, value in attributes.items() | |
| 1596 ) | |
| 1597 else: | |
| 1598 # expecting a list of attributes here | |
| 1599 attr_list.extend(attributes) | |
| 1600 if other_attributes: | |
| 1601 attr_list.extend( | |
| 1602 other_attributes.items() if isinstance(other_attributes, dict) | |
| 1603 else other_attributes | |
| 1604 ) | |
| 1605 new_record = PROV_REC_CLS[record_type]( | |
| 1606 self, self.valid_qualified_name(identifier), attr_list | |
| 1607 ) | |
| 1608 self._add_record(new_record) | |
| 1609 return new_record | |
| 1610 | |
| 1611 def add_record(self, record): | |
| 1612 """ | |
| 1613 Adds a new record that to the bundle. | |
| 1614 | |
| 1615 :param record: :py:class:`ProvRecord` to be added. | |
| 1616 """ | |
| 1617 return self.new_record( | |
| 1618 record.get_type(), record.identifier, record.formal_attributes, | |
| 1619 record.extra_attributes | |
| 1620 ) | |
| 1621 | |
| 1622 def entity(self, identifier, other_attributes=None): | |
| 1623 """ | |
| 1624 Creates a new entity. | |
| 1625 | |
| 1626 :param identifier: Identifier for new entity. | |
| 1627 :param other_attributes: Optional other attributes as a dictionary or list | |
| 1628 of tuples to be added to the record optionally (default: None). | |
| 1629 """ | |
| 1630 return self.new_record(PROV_ENTITY, identifier, None, other_attributes) | |
| 1631 | |
| 1632 def activity(self, identifier, startTime=None, endTime=None, | |
| 1633 other_attributes=None): | |
| 1634 """ | |
| 1635 Creates a new activity. | |
| 1636 | |
| 1637 :param identifier: Identifier for new activity. | |
| 1638 :param startTime: Optional start time for the activity (default: None). | |
| 1639 Either a :py:class:`datetime.datetime` object or a string that can be | |
| 1640 parsed by :py:func:`dateutil.parser`. | |
| 1641 :param endTime: Optional start time for the activity (default: None). | |
| 1642 Either a :py:class:`datetime.datetime` object or a string that can be | |
| 1643 parsed by :py:func:`dateutil.parser`. | |
| 1644 :param other_attributes: Optional other attributes as a dictionary or list | |
| 1645 of tuples to be added to the record optionally (default: None). | |
| 1646 """ | |
| 1647 return self.new_record( | |
| 1648 PROV_ACTIVITY, identifier, { | |
| 1649 PROV_ATTR_STARTTIME: _ensure_datetime(startTime), | |
| 1650 PROV_ATTR_ENDTIME: _ensure_datetime(endTime) | |
| 1651 }, | |
| 1652 other_attributes | |
| 1653 ) | |
| 1654 | |
| 1655 def generation(self, entity, activity=None, time=None, identifier=None, | |
| 1656 other_attributes=None): | |
| 1657 """ | |
| 1658 Creates a new generation record for an entity. | |
| 1659 | |
| 1660 :param entity: Entity or a string identifier for the entity. | |
| 1661 :param activity: Activity or string identifier of the activity involved in | |
| 1662 the generation (default: None). | |
| 1663 :param time: Optional time for the generation (default: None). | |
| 1664 Either a :py:class:`datetime.datetime` object or a string that can be | |
| 1665 parsed by :py:func:`dateutil.parser`. | |
| 1666 :param identifier: Identifier for new generation record. | |
| 1667 :param other_attributes: Optional other attributes as a dictionary or list | |
| 1668 of tuples to be added to the record optionally (default: None). | |
| 1669 """ | |
| 1670 return self.new_record( | |
| 1671 PROV_GENERATION, identifier, { | |
| 1672 PROV_ATTR_ENTITY: entity, | |
| 1673 PROV_ATTR_ACTIVITY: activity, | |
| 1674 PROV_ATTR_TIME: _ensure_datetime(time) | |
| 1675 }, | |
| 1676 other_attributes | |
| 1677 ) | |
| 1678 | |
| 1679 def usage(self, activity, entity=None, time=None, identifier=None, | |
| 1680 other_attributes=None): | |
| 1681 """ | |
| 1682 Creates a new usage record for an activity. | |
| 1683 | |
| 1684 :param activity: Activity or a string identifier for the entity. | |
| 1685 :param entity: Entity or string identifier of the entity involved in | |
| 1686 the usage relationship (default: None). | |
| 1687 :param time: Optional time for the usage (default: None). | |
| 1688 Either a :py:class:`datetime.datetime` object or a string that can be | |
| 1689 parsed by :py:func:`dateutil.parser`. | |
| 1690 :param identifier: Identifier for new usage record. | |
| 1691 :param other_attributes: Optional other attributes as a dictionary or list | |
| 1692 of tuples to be added to the record optionally (default: None). | |
| 1693 """ | |
| 1694 return self.new_record( | |
| 1695 PROV_USAGE, identifier, { | |
| 1696 PROV_ATTR_ACTIVITY: activity, | |
| 1697 PROV_ATTR_ENTITY: entity, | |
| 1698 PROV_ATTR_TIME: _ensure_datetime(time)}, | |
| 1699 other_attributes | |
| 1700 ) | |
| 1701 | |
| 1702 def start(self, activity, trigger=None, starter=None, time=None, | |
| 1703 identifier=None, other_attributes=None): | |
| 1704 """ | |
| 1705 Creates a new start record for an activity. | |
| 1706 | |
| 1707 :param activity: Activity or a string identifier for the entity. | |
| 1708 :param trigger: Entity triggering the start of this activity. | |
| 1709 :param starter: Optionally extra activity to state a qualified start | |
| 1710 through which the trigger entity for the start is generated | |
| 1711 (default: None). | |
| 1712 :param time: Optional time for the start (default: None). | |
| 1713 Either a :py:class:`datetime.datetime` object or a string that can be | |
| 1714 parsed by :py:func:`dateutil.parser`. | |
| 1715 :param identifier: Identifier for new start record. | |
| 1716 :param other_attributes: Optional other attributes as a dictionary or list | |
| 1717 of tuples to be added to the record optionally (default: None). | |
| 1718 """ | |
| 1719 return self.new_record( | |
| 1720 PROV_START, identifier, { | |
| 1721 PROV_ATTR_ACTIVITY: activity, | |
| 1722 PROV_ATTR_TRIGGER: trigger, | |
| 1723 PROV_ATTR_STARTER: starter, | |
| 1724 PROV_ATTR_TIME: _ensure_datetime(time) | |
| 1725 }, | |
| 1726 other_attributes | |
| 1727 ) | |
| 1728 | |
| 1729 def end(self, activity, trigger=None, ender=None, time=None, | |
| 1730 identifier=None, other_attributes=None): | |
| 1731 """ | |
| 1732 Creates a new end record for an activity. | |
| 1733 | |
| 1734 :param activity: Activity or a string identifier for the entity. | |
| 1735 :param trigger: trigger: Entity triggering the end of this activity. | |
| 1736 :param ender: Optionally extra activity to state a qualified end | |
| 1737 through which the trigger entity for the end is generated | |
| 1738 (default: None). | |
| 1739 :param time: Optional time for the end (default: None). | |
| 1740 Either a :py:class:`datetime.datetime` object or a string that can be | |
| 1741 parsed by :py:func:`dateutil.parser`. | |
| 1742 :param identifier: Identifier for new end record. | |
| 1743 :param other_attributes: Optional other attributes as a dictionary or list | |
| 1744 of tuples to be added to the record optionally (default: None). | |
| 1745 """ | |
| 1746 return self.new_record( | |
| 1747 PROV_END, identifier, { | |
| 1748 PROV_ATTR_ACTIVITY: activity, | |
| 1749 PROV_ATTR_TRIGGER: trigger, | |
| 1750 PROV_ATTR_ENDER: ender, | |
| 1751 PROV_ATTR_TIME: _ensure_datetime(time) | |
| 1752 }, | |
| 1753 other_attributes | |
| 1754 ) | |
| 1755 | |
| 1756 def invalidation(self, entity, activity=None, time=None, identifier=None, | |
| 1757 other_attributes=None): | |
| 1758 """ | |
| 1759 Creates a new invalidation record for an entity. | |
| 1760 | |
| 1761 :param entity: Entity or a string identifier for the entity. | |
| 1762 :param activity: Activity or string identifier of the activity involved in | |
| 1763 the invalidation (default: None). | |
| 1764 :param time: Optional time for the invalidation (default: None). | |
| 1765 Either a :py:class:`datetime.datetime` object or a string that can be | |
| 1766 parsed by :py:func:`dateutil.parser`. | |
| 1767 :param identifier: Identifier for new invalidation record. | |
| 1768 :param other_attributes: Optional other attributes as a dictionary or list | |
| 1769 of tuples to be added to the record optionally (default: None). | |
| 1770 """ | |
| 1771 return self.new_record( | |
| 1772 PROV_INVALIDATION, identifier, { | |
| 1773 PROV_ATTR_ENTITY: entity, | |
| 1774 PROV_ATTR_ACTIVITY: activity, | |
| 1775 PROV_ATTR_TIME: _ensure_datetime(time) | |
| 1776 }, | |
| 1777 other_attributes | |
| 1778 ) | |
| 1779 | |
| 1780 def communication(self, informed, informant, identifier=None, | |
| 1781 other_attributes=None): | |
| 1782 """ | |
| 1783 Creates a new communication record for an entity. | |
| 1784 | |
| 1785 :param informed: The informed activity (relationship destination). | |
| 1786 :param informant: The informing activity (relationship source). | |
| 1787 :param identifier: Identifier for new communication record. | |
| 1788 :param other_attributes: Optional other attributes as a dictionary or list | |
| 1789 of tuples to be added to the record optionally (default: None). | |
| 1790 """ | |
| 1791 return self.new_record( | |
| 1792 PROV_COMMUNICATION, identifier, { | |
| 1793 PROV_ATTR_INFORMED: informed, | |
| 1794 PROV_ATTR_INFORMANT: informant | |
| 1795 }, | |
| 1796 other_attributes | |
| 1797 ) | |
| 1798 | |
| 1799 def agent(self, identifier, other_attributes=None): | |
| 1800 """ | |
| 1801 Creates a new agent. | |
| 1802 | |
| 1803 :param identifier: Identifier for new agent. | |
| 1804 :param other_attributes: Optional other attributes as a dictionary or list | |
| 1805 of tuples to be added to the record optionally (default: None). | |
| 1806 """ | |
| 1807 return self.new_record(PROV_AGENT, identifier, None, other_attributes) | |
| 1808 | |
| 1809 def attribution(self, entity, agent, identifier=None, | |
| 1810 other_attributes=None): | |
| 1811 """ | |
| 1812 Creates a new attribution record between an entity and an agent. | |
| 1813 | |
| 1814 :param entity: Entity or a string identifier for the entity (relationship | |
| 1815 source). | |
| 1816 :param agent: Agent or string identifier of the agent involved in the | |
| 1817 attribution (relationship destination). | |
| 1818 :param identifier: Identifier for new attribution record. | |
| 1819 :param other_attributes: Optional other attributes as a dictionary or list | |
| 1820 of tuples to be added to the record optionally (default: None). | |
| 1821 """ | |
| 1822 return self.new_record( | |
| 1823 PROV_ATTRIBUTION, identifier, { | |
| 1824 PROV_ATTR_ENTITY: entity, | |
| 1825 PROV_ATTR_AGENT: agent | |
| 1826 }, | |
| 1827 other_attributes | |
| 1828 ) | |
| 1829 | |
| 1830 def association(self, activity, agent=None, plan=None, identifier=None, | |
| 1831 other_attributes=None): | |
| 1832 """ | |
| 1833 Creates a new association record for an activity. | |
| 1834 | |
| 1835 :param activity: Activity or a string identifier for the activity. | |
| 1836 :param agent: Agent or string identifier of the agent involved in the | |
| 1837 association (default: None). | |
| 1838 :param plan: Optionally extra entity to state qualified association through | |
| 1839 an internal plan (default: None). | |
| 1840 :param identifier: Identifier for new association record. | |
| 1841 :param other_attributes: Optional other attributes as a dictionary or list | |
| 1842 of tuples to be added to the record optionally (default: None). | |
| 1843 """ | |
| 1844 return self.new_record( | |
| 1845 PROV_ASSOCIATION, identifier, { | |
| 1846 PROV_ATTR_ACTIVITY: activity, | |
| 1847 PROV_ATTR_AGENT: agent, | |
| 1848 PROV_ATTR_PLAN: plan | |
| 1849 }, | |
| 1850 other_attributes | |
| 1851 ) | |
| 1852 | |
| 1853 def delegation(self, delegate, responsible, activity=None, identifier=None, | |
| 1854 other_attributes=None): | |
| 1855 """ | |
| 1856 Creates a new delegation record on behalf of an agent. | |
| 1857 | |
| 1858 :param delegate: Agent delegating the responsibility (relationship source). | |
| 1859 :param responsible: Agent the responsibility is delegated to (relationship | |
| 1860 destination). | |
| 1861 :param activity: Optionally extra activity to state qualified delegation | |
| 1862 internally (default: None). | |
| 1863 :param identifier: Identifier for new association record. | |
| 1864 :param other_attributes: Optional other attributes as a dictionary or list | |
| 1865 of tuples to be added to the record optionally (default: None). | |
| 1866 """ | |
| 1867 return self.new_record( | |
| 1868 PROV_DELEGATION, identifier, { | |
| 1869 PROV_ATTR_DELEGATE: delegate, | |
| 1870 PROV_ATTR_RESPONSIBLE: responsible, | |
| 1871 PROV_ATTR_ACTIVITY: activity | |
| 1872 }, | |
| 1873 other_attributes | |
| 1874 ) | |
| 1875 | |
| 1876 def influence(self, influencee, influencer, identifier=None, | |
| 1877 other_attributes=None): | |
| 1878 """ | |
| 1879 Creates a new influence record between two entities, activities or agents. | |
| 1880 | |
| 1881 :param influencee: Influenced entity, activity or agent (relationship | |
| 1882 source). | |
| 1883 :param influencer: Influencing entity, activity or agent (relationship | |
| 1884 destination). | |
| 1885 :param identifier: Identifier for new influence record. | |
| 1886 :param other_attributes: Optional other attributes as a dictionary or list | |
| 1887 of tuples to be added to the record optionally (default: None). | |
| 1888 """ | |
| 1889 return self.new_record( | |
| 1890 PROV_INFLUENCE, identifier, { | |
| 1891 PROV_ATTR_INFLUENCEE: influencee, | |
| 1892 PROV_ATTR_INFLUENCER: influencer | |
| 1893 }, | |
| 1894 other_attributes | |
| 1895 ) | |
| 1896 | |
| 1897 def derivation(self, generatedEntity, usedEntity, activity=None, | |
| 1898 generation=None, usage=None, | |
| 1899 identifier=None, other_attributes=None): | |
| 1900 """ | |
| 1901 Creates a new derivation record for a generated entity from a used entity. | |
| 1902 | |
| 1903 :param generatedEntity: Entity or a string identifier for the generated | |
| 1904 entity (relationship source). | |
| 1905 :param usedEntity: Entity or a string identifier for the used entity | |
| 1906 (relationship destination). | |
| 1907 :param activity: Activity or string identifier of the activity involved in | |
| 1908 the derivation (default: None). | |
| 1909 :param generation: Optionally extra activity to state qualified generation | |
| 1910 through a generation (default: None). | |
| 1911 :param usage: XXX (default: None). | |
| 1912 :param identifier: Identifier for new derivation record. | |
| 1913 :param other_attributes: Optional other attributes as a dictionary or list | |
| 1914 of tuples to be added to the record optionally (default: None). | |
| 1915 """ | |
| 1916 attributes = {PROV_ATTR_GENERATED_ENTITY: generatedEntity, | |
| 1917 PROV_ATTR_USED_ENTITY: usedEntity, | |
| 1918 PROV_ATTR_ACTIVITY: activity, | |
| 1919 PROV_ATTR_GENERATION: generation, | |
| 1920 PROV_ATTR_USAGE: usage} | |
| 1921 return self.new_record( | |
| 1922 PROV_DERIVATION, identifier, attributes, other_attributes | |
| 1923 ) | |
| 1924 | |
| 1925 def revision(self, generatedEntity, usedEntity, activity=None, | |
| 1926 generation=None, usage=None, | |
| 1927 identifier=None, other_attributes=None): | |
| 1928 """ | |
| 1929 Creates a new revision record for a generated entity from a used entity. | |
| 1930 | |
| 1931 :param generatedEntity: Entity or a string identifier for the generated | |
| 1932 entity (relationship source). | |
| 1933 :param usedEntity: Entity or a string identifier for the used entity | |
| 1934 (relationship destination). | |
| 1935 :param activity: Activity or string identifier of the activity involved in | |
| 1936 the revision (default: None). | |
| 1937 :param generation: Optionally to state qualified revision through a | |
| 1938 generation activity (default: None). | |
| 1939 :param usage: XXX (default: None). | |
| 1940 :param identifier: Identifier for new revision record. | |
| 1941 :param other_attributes: Optional other attributes as a dictionary or list | |
| 1942 of tuples to be added to the record optionally (default: None). | |
| 1943 """ | |
| 1944 record = self.derivation( | |
| 1945 generatedEntity, usedEntity, activity, generation, usage, | |
| 1946 identifier, other_attributes | |
| 1947 ) | |
| 1948 record.add_asserted_type(PROV['Revision']) | |
| 1949 return record | |
| 1950 | |
| 1951 def quotation(self, generatedEntity, usedEntity, activity=None, | |
| 1952 generation=None, usage=None, | |
| 1953 identifier=None, other_attributes=None): | |
| 1954 """ | |
| 1955 Creates a new quotation record for a generated entity from a used entity. | |
| 1956 | |
| 1957 :param generatedEntity: Entity or a string identifier for the generated | |
| 1958 entity (relationship source). | |
| 1959 :param usedEntity: Entity or a string identifier for the used entity | |
| 1960 (relationship destination). | |
| 1961 :param activity: Activity or string identifier of the activity involved in | |
| 1962 the quotation (default: None). | |
| 1963 :param generation: Optionally to state qualified quotation through a | |
| 1964 generation activity (default: None). | |
| 1965 :param usage: XXX (default: None). | |
| 1966 :param identifier: Identifier for new quotation record. | |
| 1967 :param other_attributes: Optional other attributes as a dictionary or list | |
| 1968 of tuples to be added to the record optionally (default: None). | |
| 1969 """ | |
| 1970 record = self.derivation( | |
| 1971 generatedEntity, usedEntity, activity, generation, usage, | |
| 1972 identifier, other_attributes | |
| 1973 ) | |
| 1974 record.add_asserted_type(PROV['Quotation']) | |
| 1975 return record | |
| 1976 | |
| 1977 def primary_source(self, generatedEntity, usedEntity, activity=None, | |
| 1978 generation=None, usage=None, | |
| 1979 identifier=None, other_attributes=None): | |
| 1980 """ | |
| 1981 Creates a new primary source record for a generated entity from a used | |
| 1982 entity. | |
| 1983 | |
| 1984 :param generatedEntity: Entity or a string identifier for the generated | |
| 1985 entity (relationship source). | |
| 1986 :param usedEntity: Entity or a string identifier for the used entity | |
| 1987 (relationship destination). | |
| 1988 :param activity: Activity or string identifier of the activity involved in | |
| 1989 the primary source (default: None). | |
| 1990 :param generation: Optionally to state qualified primary source through a | |
| 1991 generation activity (default: None). | |
| 1992 :param usage: XXX (default: None). | |
| 1993 :param identifier: Identifier for new primary source record. | |
| 1994 :param other_attributes: Optional other attributes as a dictionary or list | |
| 1995 of tuples to be added to the record optionally (default: None). | |
| 1996 """ | |
| 1997 record = self.derivation( | |
| 1998 generatedEntity, usedEntity, activity, generation, usage, | |
| 1999 identifier, other_attributes | |
| 2000 ) | |
| 2001 record.add_asserted_type(PROV['PrimarySource']) | |
| 2002 return record | |
| 2003 | |
| 2004 def specialization(self, specificEntity, generalEntity): | |
| 2005 """ | |
| 2006 Creates a new specialisation record for a specific from a general entity. | |
| 2007 | |
| 2008 :param specificEntity: Entity or a string identifier for the specific | |
| 2009 entity (relationship source). | |
| 2010 :param generalEntity: Entity or a string identifier for the general entity | |
| 2011 (relationship destination). | |
| 2012 """ | |
| 2013 return self.new_record( | |
| 2014 PROV_SPECIALIZATION, None, { | |
| 2015 PROV_ATTR_SPECIFIC_ENTITY: specificEntity, | |
| 2016 PROV_ATTR_GENERAL_ENTITY: generalEntity | |
| 2017 } | |
| 2018 ) | |
| 2019 | |
| 2020 def alternate(self, alternate1, alternate2): | |
| 2021 """ | |
| 2022 Creates a new alternate record between two entities. | |
| 2023 | |
| 2024 :param alternate1: Entity or a string identifier for the first entity | |
| 2025 (relationship source). | |
| 2026 :param alternate2: Entity or a string identifier for the second entity | |
| 2027 (relationship destination). | |
| 2028 """ | |
| 2029 return self.new_record( | |
| 2030 PROV_ALTERNATE, None, { | |
| 2031 PROV_ATTR_ALTERNATE1: alternate1, | |
| 2032 PROV_ATTR_ALTERNATE2: alternate2 | |
| 2033 }, | |
| 2034 ) | |
| 2035 | |
| 2036 def mention(self, specificEntity, generalEntity, bundle): | |
| 2037 """ | |
| 2038 Creates a new mention record for a specific from a general entity. | |
| 2039 | |
| 2040 :param specificEntity: Entity or a string identifier for the specific | |
| 2041 entity (relationship source). | |
| 2042 :param generalEntity: Entity or a string identifier for the general entity | |
| 2043 (relationship destination). | |
| 2044 :param bundle: XXX | |
| 2045 """ | |
| 2046 return self.new_record( | |
| 2047 PROV_MENTION, None, { | |
| 2048 PROV_ATTR_SPECIFIC_ENTITY: specificEntity, | |
| 2049 PROV_ATTR_GENERAL_ENTITY: generalEntity, | |
| 2050 PROV_ATTR_BUNDLE: bundle | |
| 2051 } | |
| 2052 ) | |
| 2053 | |
| 2054 def collection(self, identifier, other_attributes=None): | |
| 2055 """ | |
| 2056 Creates a new collection record for a particular record. | |
| 2057 | |
| 2058 :param identifier: Identifier for new collection record. | |
| 2059 :param other_attributes: Optional other attributes as a dictionary or list | |
| 2060 of tuples to be added to the record optionally (default: None). | |
| 2061 """ | |
| 2062 record = self.new_record( | |
| 2063 PROV_ENTITY, identifier, None, other_attributes | |
| 2064 ) | |
| 2065 record.add_asserted_type(PROV['Collection']) | |
| 2066 return record | |
| 2067 | |
| 2068 def membership(self, collection, entity): | |
| 2069 """ | |
| 2070 Creates a new membership record for an entity to a collection. | |
| 2071 | |
| 2072 :param collection: Collection the entity is to be added to. | |
| 2073 :param entity: Entity to be added to the collection. | |
| 2074 """ | |
| 2075 return self.new_record( | |
| 2076 PROV_MEMBERSHIP, None, { | |
| 2077 PROV_ATTR_COLLECTION: collection, | |
| 2078 PROV_ATTR_ENTITY: entity | |
| 2079 } | |
| 2080 ) | |
| 2081 | |
| 2082 def plot(self, filename=None, show_nary=True, use_labels=False, | |
| 2083 show_element_attributes=True, show_relation_attributes=True): | |
| 2084 """ | |
| 2085 Convenience function to plot a PROV document. | |
| 2086 | |
| 2087 :param filename: The filename to save to. If not given, it will open | |
| 2088 an interactive matplotlib plot. The filetype is determined from | |
| 2089 the filename ending. | |
| 2090 :type filename: String | |
| 2091 :param show_nary: Shows all elements in n-ary relations. | |
| 2092 :type show_nary: bool | |
| 2093 :param use_labels: Uses the `prov:label` property of an element as its | |
| 2094 name (instead of its identifier). | |
| 2095 :type use_labels: bool | |
| 2096 :param show_element_attributes: Shows attributes of elements. | |
| 2097 :type show_element_attributes: bool | |
| 2098 :param show_relation_attributes: Shows attributes of relations. | |
| 2099 :type show_relation_attributes: bool | |
| 2100 """ | |
| 2101 # Lazy imports to have soft dependencies on pydot and matplotlib | |
| 2102 # (imported even later). | |
| 2103 from prov import dot | |
| 2104 | |
| 2105 if filename: | |
| 2106 format = os.path.splitext(filename)[-1].lower().strip( | |
| 2107 os.path.extsep) | |
| 2108 else: | |
| 2109 format = "png" | |
| 2110 format = format.lower() | |
| 2111 d = dot.prov_to_dot(self, show_nary=show_nary, use_labels=use_labels, | |
| 2112 show_element_attributes=show_element_attributes, | |
| 2113 show_relation_attributes=show_relation_attributes) | |
| 2114 method = "create_%s" % format | |
| 2115 if not hasattr(d, method): | |
| 2116 raise ValueError("Format '%s' cannot be saved." % format) | |
| 2117 with io.BytesIO() as buf: | |
| 2118 buf.write(getattr(d, method)()) | |
| 2119 | |
| 2120 buf.seek(0, 0) | |
| 2121 if filename: | |
| 2122 with open(filename, "wb") as fh: | |
| 2123 fh.write(buf.read()) | |
| 2124 else: | |
| 2125 # Use matplotlib to show the image as it likely is more | |
| 2126 # widespread then PIL and works nicely in the ipython notebook. | |
| 2127 import matplotlib.pylab as plt | |
| 2128 import matplotlib.image as mpimg | |
| 2129 | |
| 2130 max_size = 30 | |
| 2131 | |
| 2132 img = mpimg.imread(buf) | |
| 2133 # pydot makes a border around the image. remove it. | |
| 2134 img = img[1:-1, 1:-1] | |
| 2135 size = (img.shape[1] / 100.0, img.shape[0] / 100.0) | |
| 2136 if max(size) > max_size: | |
| 2137 scale = max_size / max(size) | |
| 2138 else: | |
| 2139 scale = 1.0 | |
| 2140 size = (scale * size[0], scale * size[1]) | |
| 2141 | |
| 2142 plt.figure(figsize=size) | |
| 2143 plt.subplots_adjust(bottom=0, top=1, left=0, right=1) | |
| 2144 plt.xticks([]) | |
| 2145 plt.yticks([]) | |
| 2146 plt.imshow(img) | |
| 2147 plt.axis("off") | |
| 2148 plt.show() | |
| 2149 | |
| 2150 # Aliases | |
| 2151 wasGeneratedBy = generation | |
| 2152 used = usage | |
| 2153 wasStartedBy = start | |
| 2154 wasEndedBy = end | |
| 2155 wasInvalidatedBy = invalidation | |
| 2156 wasInformedBy = communication | |
| 2157 wasAttributedTo = attribution | |
| 2158 wasAssociatedWith = association | |
| 2159 actedOnBehalfOf = delegation | |
| 2160 wasInfluencedBy = influence | |
| 2161 wasDerivedFrom = derivation | |
| 2162 wasRevisionOf = revision | |
| 2163 wasQuotedFrom = quotation | |
| 2164 hadPrimarySource = primary_source | |
| 2165 alternateOf = alternate | |
| 2166 specializationOf = specialization | |
| 2167 mentionOf = mention | |
| 2168 hadMember = membership | |
| 2169 | |
| 2170 | |
| 2171 class ProvDocument(ProvBundle): | |
| 2172 """Provenance Document.""" | |
| 2173 | |
| 2174 def __init__(self, records=None, namespaces=None): | |
| 2175 """ | |
| 2176 Constructor. | |
| 2177 | |
| 2178 :param records: Optional records to add to the document (default: None). | |
| 2179 :param namespaces: Optional iterable of :py:class:`~prov.identifier.Namespace`s | |
| 2180 to set the document up with (default: None). | |
| 2181 """ | |
| 2182 ProvBundle.__init__( | |
| 2183 self, records=records, identifier=None, namespaces=namespaces | |
| 2184 ) | |
| 2185 self._bundles = dict() | |
| 2186 | |
| 2187 def __repr__(self): | |
| 2188 return '<ProvDocument>' | |
| 2189 | |
| 2190 def __eq__(self, other): | |
| 2191 if not isinstance(other, ProvDocument): | |
| 2192 return False | |
| 2193 # Comparing the documents' content | |
| 2194 if not super(ProvDocument, self).__eq__(other): | |
| 2195 return False | |
| 2196 | |
| 2197 # Comparing the documents' bundles | |
| 2198 for b_id, bundle in self._bundles.items(): | |
| 2199 if b_id not in other._bundles: | |
| 2200 return False | |
| 2201 other_bundle = other._bundles[b_id] | |
| 2202 if bundle != other_bundle: | |
| 2203 return False | |
| 2204 | |
| 2205 # Everything is the same | |
| 2206 return True | |
| 2207 | |
| 2208 def is_document(self): | |
| 2209 """ | |
| 2210 `True` if the object is a document, `False` otherwise. | |
| 2211 | |
| 2212 :return: bool | |
| 2213 """ | |
| 2214 return True | |
| 2215 | |
| 2216 def is_bundle(self): | |
| 2217 """ | |
| 2218 `True` if the object is a bundle, `False` otherwise. | |
| 2219 | |
| 2220 :return: bool | |
| 2221 """ | |
| 2222 return False | |
| 2223 | |
| 2224 def has_bundles(self): | |
| 2225 """ | |
| 2226 `True` if the object has at least one bundle, `False` otherwise. | |
| 2227 | |
| 2228 :return: bool | |
| 2229 """ | |
| 2230 return len(self._bundles) > 0 | |
| 2231 | |
| 2232 @property | |
| 2233 def bundles(self): | |
| 2234 """ | |
| 2235 Returns bundles contained in the document | |
| 2236 | |
| 2237 :return: Iterable of :py:class:`ProvBundle`. | |
| 2238 """ | |
| 2239 return self._bundles.values() | |
| 2240 | |
| 2241 # Transformations | |
| 2242 def flattened(self): | |
| 2243 """ | |
| 2244 Flattens the document by moving all the records in its bundles up | |
| 2245 to the document level. | |
| 2246 | |
| 2247 :returns: :py:class:`ProvDocument` -- the (new) flattened document. | |
| 2248 """ | |
| 2249 if self._bundles: | |
| 2250 # Creating a new document for all the records | |
| 2251 new_doc = ProvDocument() | |
| 2252 bundled_records = itertools.chain( | |
| 2253 *[b.get_records() for b in self._bundles.values()] | |
| 2254 ) | |
| 2255 for record in itertools.chain(self._records, bundled_records): | |
| 2256 new_doc.add_record(record) | |
| 2257 return new_doc | |
| 2258 else: | |
| 2259 # returning the same document | |
| 2260 return self | |
| 2261 | |
| 2262 def unified(self): | |
| 2263 """ | |
| 2264 Returns a new document containing all records having same identifiers | |
| 2265 unified (including those inside bundles). | |
| 2266 | |
| 2267 :return: :py:class:`ProvDocument` | |
| 2268 """ | |
| 2269 document = ProvDocument(self._unified_records()) | |
| 2270 document._namespaces = self._namespaces | |
| 2271 for bundle in self.bundles: | |
| 2272 unified_bundle = bundle.unified() | |
| 2273 document.add_bundle(unified_bundle) | |
| 2274 return document | |
| 2275 | |
| 2276 def update(self, other): | |
| 2277 """ | |
| 2278 Append all the records of the *other* document/bundle into this document. | |
| 2279 Bundles having same identifiers will be merged. | |
| 2280 | |
| 2281 :param other: The other document/bundle whose records to be appended. | |
| 2282 :type other: :py:class:`ProvDocument` or :py:class:`ProvBundle` | |
| 2283 :returns: None. | |
| 2284 """ | |
| 2285 if isinstance(other, ProvBundle): | |
| 2286 for record in other.get_records(): | |
| 2287 self.add_record(record) | |
| 2288 if other.has_bundles(): | |
| 2289 for bundle in other.bundles: | |
| 2290 if bundle.identifier in self._bundles: | |
| 2291 self._bundles[bundle.identifier].update(bundle) | |
| 2292 else: | |
| 2293 new_bundle = self.bundle(bundle.identifier) | |
| 2294 new_bundle.update(bundle) | |
| 2295 else: | |
| 2296 raise ProvException( | |
| 2297 'ProvDocument.update(): The other is not a ProvDocument or ' | |
| 2298 'ProvBundle instance (%s)' % type(other) | |
| 2299 ) | |
| 2300 | |
| 2301 # Bundle operations | |
| 2302 def add_bundle(self, bundle, identifier=None): | |
| 2303 """ | |
| 2304 Add a bundle to the current document. | |
| 2305 | |
| 2306 :param bundle: The bundle to add to the document. | |
| 2307 :type bundle: :py:class:`ProvBundle` | |
| 2308 :param identifier: The (optional) identifier to use for the bundle | |
| 2309 (default: None). If none given, use the identifier from the bundle | |
| 2310 itself. | |
| 2311 """ | |
| 2312 if not isinstance(bundle, ProvBundle): | |
| 2313 raise ProvException( | |
| 2314 'Only a ProvBundle instance can be added as a bundle in a ' | |
| 2315 'ProvDocument.' | |
| 2316 ) | |
| 2317 | |
| 2318 if bundle.is_document(): | |
| 2319 if bundle.has_bundles(): | |
| 2320 raise ProvException( | |
| 2321 'Cannot add a document with nested bundles as a bundle.' | |
| 2322 ) | |
| 2323 # Make it a new ProvBundle | |
| 2324 new_bundle = ProvBundle(namespaces=bundle.namespaces) | |
| 2325 new_bundle.update(bundle) | |
| 2326 bundle = new_bundle | |
| 2327 | |
| 2328 if identifier is None: | |
| 2329 identifier = bundle.identifier | |
| 2330 | |
| 2331 if not identifier: | |
| 2332 raise ProvException('The provided bundle has no identifier') | |
| 2333 | |
| 2334 # Link the bundle namespace manager to the document's | |
| 2335 bundle._namespaces.parent = self._namespaces | |
| 2336 | |
| 2337 valid_id = bundle.valid_qualified_name(identifier) | |
| 2338 # IMPORTANT: Rewriting the bundle identifier for consistency | |
| 2339 bundle._identifier = valid_id | |
| 2340 | |
| 2341 if valid_id in self._bundles: | |
| 2342 raise ProvException('A bundle with that identifier already exists') | |
| 2343 | |
| 2344 self._bundles[valid_id] = bundle | |
| 2345 bundle._document = self | |
| 2346 | |
| 2347 def bundle(self, identifier): | |
| 2348 """ | |
| 2349 Returns a new bundle from the current document. | |
| 2350 | |
| 2351 :param identifier: The identifier to use for the bundle. | |
| 2352 :return: :py:class:`ProvBundle` | |
| 2353 """ | |
| 2354 if identifier is None: | |
| 2355 raise ProvException( | |
| 2356 'An identifier is required. Cannot create an unnamed bundle.' | |
| 2357 ) | |
| 2358 valid_id = self.valid_qualified_name(identifier) | |
| 2359 if valid_id is None: | |
| 2360 raise ProvException( | |
| 2361 'The provided identifier "%s" is not valid' % identifier | |
| 2362 ) | |
| 2363 if valid_id in self._bundles: | |
| 2364 raise ProvException('A bundle with that identifier already exists') | |
| 2365 b = ProvBundle(identifier=valid_id, document=self) | |
| 2366 self._bundles[valid_id] = b | |
| 2367 return b | |
| 2368 | |
| 2369 # Serializing and deserializing | |
| 2370 def serialize(self, destination=None, format='json', **args): | |
| 2371 """ | |
| 2372 Serialize the :py:class:`ProvDocument` to the destination. | |
| 2373 | |
| 2374 Available serializers can be queried by the value of | |
| 2375 `:py:attr:~prov.serializers.Registry.serializers` after loading them via | |
| 2376 `:py:func:~prov.serializers.Registry.load_serializers()`. | |
| 2377 | |
| 2378 :param destination: Stream object to serialize the output to. Default is | |
| 2379 `None`, which serializes as a string. | |
| 2380 :param format: Serialization format (default: 'json'), defaulting to | |
| 2381 PROV-JSON. | |
| 2382 :return: Serialization in a string if no destination was given, | |
| 2383 None otherwise. | |
| 2384 """ | |
| 2385 serializer = serializers.get(format)(self) | |
| 2386 if destination is None: | |
| 2387 stream = io.StringIO() | |
| 2388 serializer.serialize(stream, **args) | |
| 2389 return stream.getvalue() | |
| 2390 if hasattr(destination, "write"): | |
| 2391 stream = destination | |
| 2392 serializer.serialize(stream, **args) | |
| 2393 else: | |
| 2394 location = destination | |
| 2395 scheme, netloc, path, params, _query, fragment = urlparse(location) | |
| 2396 if netloc != "": | |
| 2397 print("WARNING: not saving as location " + | |
| 2398 "is not a local file reference") | |
| 2399 return | |
| 2400 fd, name = tempfile.mkstemp() | |
| 2401 stream = os.fdopen(fd, "wb") | |
| 2402 serializer.serialize(stream, **args) | |
| 2403 stream.close() | |
| 2404 if hasattr(shutil, "move"): | |
| 2405 shutil.move(name, path) | |
| 2406 else: | |
| 2407 shutil.copy(name, path) | |
| 2408 os.remove(name) | |
| 2409 | |
| 2410 @staticmethod | |
| 2411 def deserialize(source=None, content=None, format='json', **args): | |
| 2412 """ | |
| 2413 Deserialize the :py:class:`ProvDocument` from source (a stream or a | |
| 2414 file path) or directly from a string content. | |
| 2415 | |
| 2416 Available serializers can be queried by the value of | |
| 2417 `:py:attr:~prov.serializers.Registry.serializers` after loading them via | |
| 2418 `:py:func:~prov.serializers.Registry.load_serializers()`. | |
| 2419 | |
| 2420 Note: Not all serializers support deserialization. | |
| 2421 | |
| 2422 :param source: Stream object to deserialize the PROV document from | |
| 2423 (default: None). | |
| 2424 :param content: String to deserialize the PROV document from | |
| 2425 (default: None). | |
| 2426 :param format: Serialization format (default: 'json'), defaulting to | |
| 2427 PROV-JSON. | |
| 2428 :return: :py:class:`ProvDocument` | |
| 2429 """ | |
| 2430 serializer = serializers.get(format)() | |
| 2431 | |
| 2432 if content is not None: | |
| 2433 # io.StringIO only accepts unicode strings | |
| 2434 stream = io.StringIO( | |
| 2435 content if not isinstance(content, six.binary_type) | |
| 2436 else content.decode() | |
| 2437 ) | |
| 2438 return serializer.deserialize(stream, **args) | |
| 2439 | |
| 2440 if source is not None: | |
| 2441 if hasattr(source, "read"): | |
| 2442 return serializer.deserialize(source, **args) | |
| 2443 else: | |
| 2444 with open(source) as f: | |
| 2445 return serializer.deserialize(f, **args) | |
| 2446 | |
| 2447 | |
| 2448 def sorted_attributes(element, attributes): | |
| 2449 """ | |
| 2450 Helper function sorting attributes into the order required by PROV-XML. | |
| 2451 | |
| 2452 :param element: The prov element used to derive the type and the | |
| 2453 attribute order for the type. | |
| 2454 :param attributes: The attributes to sort. | |
| 2455 """ | |
| 2456 attributes = list(attributes) | |
| 2457 order = list(PROV_REC_CLS[element].FORMAL_ATTRIBUTES) | |
| 2458 | |
| 2459 # Append label, location, role, type, and value attributes. This is | |
| 2460 # universal amongst all elements. | |
| 2461 order.extend([PROV_LABEL, PROV_LOCATION, PROV_ROLE, PROV_TYPE, | |
| 2462 PROV_VALUE]) | |
| 2463 | |
| 2464 # Sort function. The PROV XML specification talks about alphabetical | |
| 2465 # sorting. We now interpret it as sorting by tag including the prefix | |
| 2466 # first and then sorting by the text, also including the namespace | |
| 2467 # prefix if given. | |
| 2468 def sort_fct(x): | |
| 2469 return ( | |
| 2470 six.text_type(x[0]), | |
| 2471 six.text_type(x[1].value if hasattr(x[1], "value") else x[1]) | |
| 2472 ) | |
| 2473 | |
| 2474 sorted_elements = [] | |
| 2475 for item in order: | |
| 2476 this_type_list = [] | |
| 2477 for e in list(attributes): | |
| 2478 if e[0] != item: | |
| 2479 continue | |
| 2480 this_type_list.append(e) | |
| 2481 attributes.remove(e) | |
| 2482 this_type_list.sort(key=sort_fct) | |
| 2483 sorted_elements.extend(this_type_list) | |
| 2484 # Add remaining attributes. According to the spec, the other attributes | |
| 2485 # have a fixed alphabetical order. | |
| 2486 attributes.sort(key=sort_fct) | |
| 2487 sorted_elements.extend(attributes) | |
| 2488 | |
| 2489 return sorted_elements |
