Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/prov/model.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 """Python implementation of the W3C Provenance Data Model (PROV-DM), including | |
2 support for PROV-JSON import/export | |
3 | |
4 References: | |
5 | |
6 PROV-DM: http://www.w3.org/TR/prov-dm/ | |
7 PROV-JSON: https://provenance.ecs.soton.ac.uk/prov-json/ | |
8 """ | |
9 from __future__ import (absolute_import, division, print_function, | |
10 unicode_literals) | |
11 | |
12 from collections import defaultdict | |
13 from copy import deepcopy | |
14 import datetime | |
15 import io | |
16 import itertools | |
17 import logging | |
18 import os | |
19 import shutil | |
20 import tempfile | |
21 | |
22 import dateutil.parser | |
23 from prov import Error, serializers | |
24 from prov.constants import * | |
25 from prov.identifier import Identifier, QualifiedName, Namespace | |
26 from six.moves.urllib.parse import urlparse | |
27 | |
28 | |
29 __author__ = 'Trung Dong Huynh' | |
30 __email__ = 'trungdong@donggiang.com' | |
31 | |
32 | |
33 logger = logging.getLogger(__name__) | |
34 | |
35 | |
36 # Data Types | |
37 def _ensure_datetime(value): | |
38 if isinstance(value, six.string_types): | |
39 return dateutil.parser.parse(value) | |
40 else: | |
41 return value | |
42 | |
43 | |
44 def parse_xsd_datetime(value): | |
45 try: | |
46 return dateutil.parser.parse(value) | |
47 except ValueError: | |
48 pass | |
49 return None | |
50 | |
51 | |
52 def parse_boolean(value): | |
53 if value.lower() in ("false", "0"): | |
54 return False | |
55 elif value.lower() in ("true", "1"): | |
56 return True | |
57 else: | |
58 return None | |
59 | |
60 DATATYPE_PARSERS = { | |
61 datetime.datetime: parse_xsd_datetime, | |
62 } | |
63 | |
64 | |
65 # Mappings for XSD datatypes to Python standard types | |
66 XSD_DATATYPE_PARSERS = { | |
67 XSD_STRING: six.text_type, | |
68 XSD_DOUBLE: float, | |
69 # long on Python 2, int on Python 3 | |
70 XSD_LONG: six.integer_types[-1], | |
71 XSD_INT: int, | |
72 XSD_BOOLEAN: parse_boolean, | |
73 XSD_DATETIME: parse_xsd_datetime, | |
74 XSD_ANYURI: Identifier | |
75 } | |
76 | |
77 | |
78 def parse_xsd_types(value, datatype): | |
79 return ( | |
80 XSD_DATATYPE_PARSERS[datatype](value) | |
81 if datatype in XSD_DATATYPE_PARSERS else None | |
82 ) | |
83 | |
84 | |
85 def first(a_set): | |
86 return next(iter(a_set), None) | |
87 | |
88 | |
89 def _ensure_multiline_string_triple_quoted(value): | |
90 # converting the value to a string | |
91 s = six.text_type(value) | |
92 # Escaping any double quote | |
93 s = s.replace('"', '\\"') | |
94 if '\n' in s: | |
95 return '"""%s"""' % s | |
96 else: | |
97 return '"%s"' % s | |
98 | |
99 | |
100 def encoding_provn_value(value): | |
101 if isinstance(value, six.string_types): | |
102 return _ensure_multiline_string_triple_quoted(value) | |
103 elif isinstance(value, datetime.datetime): | |
104 return u'"{0}" %% xsd:dateTime'.format(value.isoformat()) | |
105 elif isinstance(value, float): | |
106 return u'"%g" %%%% xsd:float' % value | |
107 elif isinstance(value, bool): | |
108 return u'"%i" %%%% xsd:boolean' % value | |
109 else: | |
110 # TODO: QName export | |
111 return six.text_type(value) | |
112 | |
113 | |
114 @six.python_2_unicode_compatible | |
115 class Literal(object): | |
116 def __init__(self, value, datatype=None, langtag=None): | |
117 self._value = six.text_type(value) # value is always a string | |
118 if langtag: | |
119 if datatype is None: | |
120 logger.debug( | |
121 'Assuming prov:InternationalizedString as the type of ' | |
122 '"%s"@%s' % (value, langtag) | |
123 ) | |
124 datatype = PROV["InternationalizedString"] | |
125 # PROV JSON states that the type field must not be set when | |
126 # using the lang attribute and PROV XML requires it to be an | |
127 # internationalized string. | |
128 elif datatype != PROV["InternationalizedString"]: | |
129 logger.warn( | |
130 'Invalid data type (%s) for "%s"@%s, overridden as ' | |
131 'prov:InternationalizedString.' % | |
132 (datatype, value, langtag) | |
133 ) | |
134 datatype = PROV["InternationalizedString"] | |
135 self._datatype = datatype | |
136 # langtag is always a string | |
137 self._langtag = six.text_type(langtag) if langtag is not None else None | |
138 | |
139 def __str__(self): | |
140 return self.provn_representation() | |
141 | |
142 def __repr__(self): | |
143 return u'<Literal: %s>' % self.provn_representation() | |
144 | |
145 def __eq__(self, other): | |
146 return ( | |
147 (self._value == other.value and | |
148 self._datatype == other.datatype and | |
149 self._langtag == other.langtag) | |
150 if isinstance(other, Literal) else False | |
151 ) | |
152 | |
153 def __ne__(self, other): | |
154 return not (self == other) | |
155 | |
156 def __hash__(self): | |
157 return hash((self._value, self._datatype, self._langtag)) | |
158 | |
159 @property | |
160 def value(self): | |
161 return self._value | |
162 | |
163 @property | |
164 def datatype(self): | |
165 return self._datatype | |
166 | |
167 @property | |
168 def langtag(self): | |
169 return self._langtag | |
170 | |
171 def has_no_langtag(self): | |
172 return self._langtag is None | |
173 | |
174 def provn_representation(self): | |
175 if self._langtag: | |
176 # a language tag can only go with prov:InternationalizedString | |
177 return '%s@%s' % ( | |
178 _ensure_multiline_string_triple_quoted(self._value), | |
179 six.text_type(self._langtag) | |
180 ) | |
181 else: | |
182 return '%s %%%% %s' % ( | |
183 _ensure_multiline_string_triple_quoted(self._value), | |
184 six.text_type(self._datatype) | |
185 ) | |
186 | |
187 | |
188 # Exceptions and warnings | |
189 class ProvException(Error): | |
190 """Base class for PROV model exceptions.""" | |
191 pass | |
192 | |
193 | |
194 class ProvWarning(Warning): | |
195 """Base class for PROV model warnings.""" | |
196 pass | |
197 | |
198 | |
199 @six.python_2_unicode_compatible | |
200 class ProvExceptionInvalidQualifiedName(ProvException): | |
201 """Exception for an invalid qualified identifier name.""" | |
202 | |
203 qname = None | |
204 """Intended qualified name.""" | |
205 | |
206 def __init__(self, qname): | |
207 """ | |
208 Constructor. | |
209 | |
210 :param qname: Invalid qualified name. | |
211 """ | |
212 self.qname = qname | |
213 | |
214 def __str__(self): | |
215 return u'Invalid Qualified Name: %s' % self.qname | |
216 | |
217 | |
218 @six.python_2_unicode_compatible | |
219 class ProvElementIdentifierRequired(ProvException): | |
220 """Exception for a missing element identifier.""" | |
221 | |
222 def __str__(self): | |
223 return u'An identifier is missing. All PROV elements require a valid ' \ | |
224 u'identifier.' | |
225 | |
226 | |
227 # PROV records | |
228 @six.python_2_unicode_compatible | |
229 class ProvRecord(object): | |
230 """Base class for PROV records.""" | |
231 | |
232 FORMAL_ATTRIBUTES = () | |
233 | |
234 _prov_type = None | |
235 """PROV type of record.""" | |
236 | |
237 def __init__(self, bundle, identifier, attributes=None): | |
238 """ | |
239 Constructor. | |
240 | |
241 :param bundle: Bundle for the PROV record. | |
242 :param identifier: (Unique) identifier of the record. | |
243 :param attributes: Attributes to associate with the record (default: None). | |
244 """ | |
245 self._bundle = bundle | |
246 self._identifier = identifier | |
247 self._attributes = defaultdict(set) | |
248 if attributes: | |
249 self.add_attributes(attributes) | |
250 | |
251 def __hash__(self): | |
252 return hash( | |
253 (self.get_type(), self._identifier, frozenset(self.attributes)) | |
254 ) | |
255 | |
256 def copy(self): | |
257 """ | |
258 Return an exact copy of this record. | |
259 """ | |
260 return PROV_REC_CLS[self.get_type()]( | |
261 self._bundle, self.identifier, self.attributes | |
262 ) | |
263 | |
264 def get_type(self): | |
265 """Returns the PROV type of the record.""" | |
266 return self._prov_type | |
267 | |
268 def get_asserted_types(self): | |
269 """Returns the set of all asserted PROV types of this record.""" | |
270 return self._attributes[PROV_TYPE] | |
271 | |
272 def add_asserted_type(self, type_identifier): | |
273 """ | |
274 Adds a PROV type assertion to the record. | |
275 | |
276 :param type_identifier: PROV namespace identifier to add. | |
277 """ | |
278 self._attributes[PROV_TYPE].add(type_identifier) | |
279 | |
280 def get_attribute(self, attr_name): | |
281 """ | |
282 Returns the attribute of the given name. | |
283 | |
284 :param attr_name: Name of the attribute. | |
285 :return: Tuple (name, value) | |
286 """ | |
287 attr_name = self._bundle.valid_qualified_name(attr_name) | |
288 return self._attributes[attr_name] | |
289 | |
290 @property | |
291 def identifier(self): | |
292 """Record's identifier.""" | |
293 return self._identifier | |
294 | |
295 @property | |
296 def attributes(self): | |
297 """ | |
298 All record attributes. | |
299 | |
300 :return: List of tuples (name, value) | |
301 """ | |
302 return [ | |
303 (attr_name, value) | |
304 for attr_name, values in self._attributes.items() | |
305 for value in values | |
306 ] | |
307 | |
308 @property | |
309 def args(self): | |
310 """ | |
311 All values of the record's formal attributes. | |
312 | |
313 :return: Tuple | |
314 """ | |
315 return tuple( | |
316 first(self._attributes[attr_name]) | |
317 for attr_name in self.FORMAL_ATTRIBUTES | |
318 ) | |
319 | |
320 @property | |
321 def formal_attributes(self): | |
322 """ | |
323 All names and values of the record's formal attributes. | |
324 | |
325 :return: Tuple of tuples (name, value) | |
326 """ | |
327 return tuple( | |
328 (attr_name, first(self._attributes[attr_name])) | |
329 for attr_name in self.FORMAL_ATTRIBUTES | |
330 ) | |
331 | |
332 @property | |
333 def extra_attributes(self): | |
334 """ | |
335 All names and values of the record's attributes that are not formal | |
336 attributes. | |
337 | |
338 :return: Tuple of tuples (name, value) | |
339 """ | |
340 return [ | |
341 (attr_name, attr_value) for attr_name, attr_value in self.attributes | |
342 if attr_name not in self.FORMAL_ATTRIBUTES | |
343 ] | |
344 | |
345 @property | |
346 def bundle(self): | |
347 """ | |
348 Bundle of the record. | |
349 | |
350 :return: :py:class:`ProvBundle` | |
351 """ | |
352 return self._bundle | |
353 | |
354 @property | |
355 def label(self): | |
356 """Identifying label of the record.""" | |
357 return first(self._attributes[PROV_LABEL]) \ | |
358 if self._attributes[PROV_LABEL] else self._identifier | |
359 | |
360 @property | |
361 def value(self): | |
362 """Value of the record.""" | |
363 return self._attributes[PROV_VALUE] | |
364 | |
365 # Handling attributes | |
366 def _auto_literal_conversion(self, literal): | |
367 # This method normalise datatype for literals | |
368 | |
369 if isinstance(literal, ProvRecord): | |
370 # Use the QName of the record as the literal | |
371 literal = literal.identifier | |
372 | |
373 if isinstance(literal, str): | |
374 return six.text_type(literal) | |
375 elif isinstance(literal, QualifiedName): | |
376 return self._bundle.valid_qualified_name(literal) | |
377 elif isinstance(literal, Literal) and literal.has_no_langtag(): | |
378 if literal.datatype: | |
379 # try convert generic Literal object to Python standard type | |
380 # this is to match JSON decoding's literal conversion | |
381 value = parse_xsd_types(literal.value, literal.datatype) | |
382 else: | |
383 # A literal with no datatype nor langtag defined | |
384 # try auto-converting the value | |
385 value = self._auto_literal_conversion(literal.value) | |
386 if value is not None: | |
387 return value | |
388 | |
389 # No conversion possible, return the original value | |
390 return literal | |
391 | |
392 def add_attributes(self, attributes): | |
393 """ | |
394 Add attributes to the record. | |
395 | |
396 :param attributes: Dictionary of attributes, with keys being qualified | |
397 identifiers. Alternatively an iterable of tuples (key, value) with the | |
398 keys satisfying the same condition. | |
399 """ | |
400 if attributes: | |
401 if isinstance(attributes, dict): | |
402 # Converting the dictionary into a list of tuples | |
403 # (i.e. attribute-value pairs) | |
404 attributes = attributes.items() | |
405 | |
406 # Check if one of the attributes specifies that the current type | |
407 # is a collection. In that case multiple attributes of the same | |
408 # type are allowed. | |
409 if PROV_ATTR_COLLECTION in [_i[0] for _i in attributes]: | |
410 is_collection = True | |
411 else: | |
412 is_collection = False | |
413 | |
414 for attr_name, original_value in attributes: | |
415 if original_value is None: | |
416 continue | |
417 | |
418 # make sure the attribute name is valid | |
419 attr = self._bundle.valid_qualified_name(attr_name) | |
420 if attr is None: | |
421 raise ProvExceptionInvalidQualifiedName(attr_name) | |
422 | |
423 if attr in PROV_ATTRIBUTE_QNAMES: | |
424 # Expecting a qualified name | |
425 qname = original_value.identifier \ | |
426 if isinstance(original_value, ProvRecord) \ | |
427 else original_value | |
428 value = self._bundle.valid_qualified_name(qname) | |
429 elif attr in PROV_ATTRIBUTE_LITERALS: | |
430 value = original_value \ | |
431 if isinstance(original_value, datetime.datetime) \ | |
432 else parse_xsd_datetime(original_value) | |
433 else: | |
434 value = self._auto_literal_conversion(original_value) | |
435 | |
436 if value is None: | |
437 raise ProvException( | |
438 'Invalid value for attribute %s: %s' % | |
439 (attr, original_value) | |
440 ) | |
441 | |
442 if not is_collection and attr in PROV_ATTRIBUTES and \ | |
443 self._attributes[attr]: | |
444 existing_value = first(self._attributes[attr]) | |
445 is_not_same_value = True | |
446 try: | |
447 is_not_same_value = value != existing_value | |
448 except TypeError: | |
449 # Cannot compare them | |
450 pass # consider them different values | |
451 | |
452 if is_not_same_value: | |
453 raise ProvException( | |
454 'Cannot have more than one value for attribute %s' | |
455 % attr | |
456 ) | |
457 else: | |
458 # Same value, ignore it | |
459 continue | |
460 | |
461 self._attributes[attr].add(value) | |
462 | |
463 def __eq__(self, other): | |
464 if self.get_type() != other.get_type(): | |
465 return False | |
466 if self._identifier and not (self._identifier == other._identifier): | |
467 return False | |
468 | |
469 return set(self.attributes) == set(other.attributes) | |
470 | |
471 def __str__(self): | |
472 return self.get_provn() | |
473 | |
474 def get_provn(self): | |
475 """ | |
476 Returns the PROV-N representation of the record. | |
477 | |
478 :return: String | |
479 """ | |
480 items = [] | |
481 | |
482 # Generating identifier | |
483 relation_id = '' # default blank | |
484 if self._identifier: | |
485 identifier = six.text_type(self._identifier) # TODO: QName export | |
486 if self.is_element(): | |
487 items.append(identifier) | |
488 else: | |
489 # this is a relation | |
490 # relations use ; to separate identifiers | |
491 relation_id = identifier + '; ' | |
492 | |
493 # Writing out the formal attributes | |
494 for attr in self.FORMAL_ATTRIBUTES: | |
495 if attr in self._attributes and self._attributes[attr]: | |
496 # Formal attributes always have single values | |
497 value = first(self._attributes[attr]) | |
498 # TODO: QName export | |
499 items.append( | |
500 value.isoformat() if isinstance(value, datetime.datetime) | |
501 else six.text_type(value) | |
502 ) | |
503 else: | |
504 items.append('-') | |
505 | |
506 # Writing out the remaining attributes | |
507 extra = [] | |
508 for attr in self._attributes: | |
509 if attr not in self.FORMAL_ATTRIBUTES: | |
510 for value in self._attributes[attr]: | |
511 try: | |
512 # try if there is a prov-n representation defined | |
513 provn_represenation = value.provn_representation() | |
514 except AttributeError: | |
515 provn_represenation = encoding_provn_value(value) | |
516 # TODO: QName export | |
517 extra.append( | |
518 '%s=%s' % (six.text_type(attr), provn_represenation) | |
519 ) | |
520 | |
521 if extra: | |
522 items.append('[%s]' % ', '.join(extra)) | |
523 prov_n = '%s(%s%s)' % ( | |
524 PROV_N_MAP[self.get_type()], relation_id, ', '.join(items) | |
525 ) | |
526 return prov_n | |
527 | |
528 def is_element(self): | |
529 """ | |
530 True, if the record is an element, False otherwise. | |
531 | |
532 :return: bool | |
533 """ | |
534 return False | |
535 | |
536 def is_relation(self): | |
537 """ | |
538 True, if the record is a relation, False otherwise. | |
539 | |
540 :return: bool | |
541 """ | |
542 return False | |
543 | |
544 | |
545 # Abstract classes for elements and relations | |
546 class ProvElement(ProvRecord): | |
547 """Provenance Element (nodes in the provenance graph).""" | |
548 | |
549 def __init__(self, bundle, identifier, attributes=None): | |
550 if identifier is None: | |
551 # All types of PROV elements require a valid identifier | |
552 raise ProvElementIdentifierRequired() | |
553 | |
554 super(ProvElement, self).__init__(bundle, identifier, attributes) | |
555 | |
556 def is_element(self): | |
557 """ | |
558 True, if the record is an element, False otherwise. | |
559 | |
560 :return: bool | |
561 """ | |
562 return True | |
563 | |
564 def __repr__(self): | |
565 return '<%s: %s>' % (self.__class__.__name__, self._identifier) | |
566 | |
567 | |
568 class ProvRelation(ProvRecord): | |
569 """Provenance Relationship (edge between nodes).""" | |
570 | |
571 def is_relation(self): | |
572 """ | |
573 True, if the record is a relation, False otherwise. | |
574 | |
575 :return: bool | |
576 """ | |
577 return True | |
578 | |
579 def __repr__(self): | |
580 identifier = ' %s' % self._identifier if self._identifier else '' | |
581 element_1, element_2 = [ | |
582 qname for _, qname in self.formal_attributes[:2] | |
583 ] | |
584 return '<%s:%s (%s, %s)>' % ( | |
585 self.__class__.__name__, identifier, element_1, element_2 | |
586 ) | |
587 | |
588 | |
589 # Component 1: Entities and Activities | |
590 class ProvEntity(ProvElement): | |
591 """Provenance Entity element""" | |
592 | |
593 _prov_type = PROV_ENTITY | |
594 | |
595 # Convenient assertions that take the current ProvEntity as the first | |
596 # (formal) argument | |
597 def wasGeneratedBy(self, activity, time=None, attributes=None): | |
598 """ | |
599 Creates a new generation record to this entity. | |
600 | |
601 :param activity: Activity or string identifier of the activity involved in | |
602 the generation (default: None). | |
603 :param time: Optional time for the generation (default: None). | |
604 Either a :py:class:`datetime.datetime` object or a string that can be | |
605 parsed by :py:func:`dateutil.parser`. | |
606 :param attributes: Optional other attributes as a dictionary or list | |
607 of tuples to be added to the record optionally (default: None). | |
608 """ | |
609 self._bundle.generation( | |
610 self, activity, time, other_attributes=attributes | |
611 ) | |
612 return self | |
613 | |
614 def wasInvalidatedBy(self, activity, time=None, attributes=None): | |
615 """ | |
616 Creates a new invalidation record for this entity. | |
617 | |
618 :param activity: Activity or string identifier of the activity involved in | |
619 the invalidation (default: None). | |
620 :param time: Optional time for the invalidation (default: None). | |
621 Either a :py:class:`datetime.datetime` object or a string that can be | |
622 parsed by :py:func:`dateutil.parser`. | |
623 :param attributes: Optional other attributes as a dictionary or list | |
624 of tuples to be added to the record optionally (default: None). | |
625 """ | |
626 self._bundle.invalidation( | |
627 self, activity, time, other_attributes=attributes | |
628 ) | |
629 return self | |
630 | |
631 def wasDerivedFrom(self, usedEntity, activity=None, generation=None, | |
632 usage=None, attributes=None): | |
633 """ | |
634 Creates a new derivation record for this entity from a used entity. | |
635 | |
636 :param usedEntity: Entity or a string identifier for the used entity. | |
637 :param activity: Activity or string identifier of the activity involved in | |
638 the derivation (default: None). | |
639 :param generation: Optionally extra activity to state qualified derivation | |
640 through an internal generation (default: None). | |
641 :param usage: Optionally extra entity to state qualified derivation through | |
642 an internal usage (default: None). | |
643 :param attributes: Optional other attributes as a dictionary or list | |
644 of tuples to be added to the record optionally (default: None). | |
645 """ | |
646 self._bundle.derivation( | |
647 self, usedEntity, activity, generation, usage, | |
648 other_attributes=attributes | |
649 ) | |
650 return self | |
651 | |
652 def wasAttributedTo(self, agent, attributes=None): | |
653 """ | |
654 Creates a new attribution record between this entity and an agent. | |
655 | |
656 :param agent: Agent or string identifier of the agent involved in the | |
657 attribution. | |
658 :param attributes: Optional other attributes as a dictionary or list | |
659 of tuples to be added to the record optionally (default: None). | |
660 """ | |
661 self._bundle.attribution(self, agent, other_attributes=attributes) | |
662 return self | |
663 | |
664 def alternateOf(self, alternate2): | |
665 """ | |
666 Creates a new alternate record between this and another entity. | |
667 | |
668 :param alternate2: Entity or a string identifier for the second entity. | |
669 """ | |
670 self._bundle.alternate(self, alternate2) | |
671 return self | |
672 | |
673 def specializationOf(self, generalEntity): | |
674 """ | |
675 Creates a new specialisation record for this from a general entity. | |
676 | |
677 :param generalEntity: Entity or a string identifier for the general entity. | |
678 """ | |
679 self._bundle.specialization(self, generalEntity) | |
680 return self | |
681 | |
682 def hadMember(self, entity): | |
683 """ | |
684 Creates a new membership record to an entity for a collection. | |
685 | |
686 :param entity: Entity to be added to the collection. | |
687 """ | |
688 self._bundle.membership(self, entity) | |
689 return self | |
690 | |
691 | |
692 class ProvActivity(ProvElement): | |
693 """Provenance Activity element.""" | |
694 | |
695 FORMAL_ATTRIBUTES = (PROV_ATTR_STARTTIME, PROV_ATTR_ENDTIME) | |
696 | |
697 _prov_type = PROV_ACTIVITY | |
698 | |
699 # Convenient methods | |
700 def set_time(self, startTime=None, endTime=None): | |
701 """ | |
702 Sets the time this activity took place. | |
703 | |
704 :param startTime: Start time for the activity. | |
705 Either a :py:class:`datetime.datetime` object or a string that can be | |
706 parsed by :py:func:`dateutil.parser`. | |
707 :param endTime: Start time for the activity. | |
708 Either a :py:class:`datetime.datetime` object or a string that can be | |
709 parsed by :py:func:`dateutil.parser`. | |
710 """ | |
711 if startTime is not None: | |
712 self._attributes[PROV_ATTR_STARTTIME] = {startTime} | |
713 if endTime is not None: | |
714 self._attributes[PROV_ATTR_ENDTIME] = {endTime} | |
715 | |
716 def get_startTime(self): | |
717 """ | |
718 Returns the time the activity started. | |
719 | |
720 :return: :py:class:`datetime.datetime` | |
721 """ | |
722 values = self._attributes[PROV_ATTR_STARTTIME] | |
723 return first(values) if values else None | |
724 | |
725 def get_endTime(self): | |
726 """ | |
727 Returns the time the activity ended. | |
728 | |
729 :return: :py:class:`datetime.datetime` | |
730 """ | |
731 values = self._attributes[PROV_ATTR_ENDTIME] | |
732 return first(values) if values else None | |
733 | |
734 # Convenient assertions that take the current ProvActivity as the first | |
735 # (formal) argument | |
736 def used(self, entity, time=None, attributes=None): | |
737 """ | |
738 Creates a new usage record for this activity. | |
739 | |
740 :param entity: Entity or string identifier of the entity involved in | |
741 the usage relationship (default: None). | |
742 :param time: Optional time for the usage (default: None). | |
743 Either a :py:class:`datetime.datetime` object or a string that can be | |
744 parsed by :py:func:`dateutil.parser`. | |
745 :param attributes: Optional other attributes as a dictionary or list | |
746 of tuples to be added to the record optionally (default: None). | |
747 """ | |
748 self._bundle.usage(self, entity, time, other_attributes=attributes) | |
749 return self | |
750 | |
751 def wasInformedBy(self, informant, attributes=None): | |
752 """ | |
753 Creates a new communication record for this activity. | |
754 | |
755 :param informant: The informing activity (relationship source). | |
756 :param attributes: Optional other attributes as a dictionary or list | |
757 of tuples to be added to the record optionally (default: None). | |
758 """ | |
759 self._bundle.communication( | |
760 self, informant, other_attributes=attributes | |
761 ) | |
762 return self | |
763 | |
764 def wasStartedBy(self, trigger, starter=None, time=None, attributes=None): | |
765 """ | |
766 Creates a new start record for this activity. The activity did not exist | |
767 before the start by the trigger. | |
768 | |
769 :param trigger: Entity triggering the start of this activity. | |
770 :param starter: Optionally extra activity to state a qualified start | |
771 through which the trigger entity for the start is generated | |
772 (default: None). | |
773 :param time: Optional time for the start (default: None). | |
774 Either a :py:class:`datetime.datetime` object or a string that can be | |
775 parsed by :py:func:`dateutil.parser`. | |
776 :param attributes: Optional other attributes as a dictionary or list | |
777 of tuples to be added to the record optionally (default: None). | |
778 """ | |
779 self._bundle.start( | |
780 self, trigger, starter, time, other_attributes=attributes | |
781 ) | |
782 return self | |
783 | |
784 def wasEndedBy(self, trigger, ender=None, time=None, attributes=None): | |
785 """ | |
786 Creates a new end record for this activity. | |
787 | |
788 :param trigger: Entity triggering the end of this activity. | |
789 :param ender: Optionally extra activity to state a qualified end through | |
790 which the trigger entity for the end is generated (default: None). | |
791 :param time: Optional time for the end (default: None). | |
792 Either a :py:class:`datetime.datetime` object or a string that can be | |
793 parsed by :py:func:`dateutil.parser`. | |
794 :param attributes: Optional other attributes as a dictionary or list | |
795 of tuples to be added to the record optionally (default: None). | |
796 """ | |
797 self._bundle.end( | |
798 self, trigger, ender, time, other_attributes=attributes | |
799 ) | |
800 return self | |
801 | |
802 def wasAssociatedWith(self, agent, plan=None, attributes=None): | |
803 """ | |
804 Creates a new association record for this activity. | |
805 | |
806 :param agent: Agent or string identifier of the agent involved in the | |
807 association (default: None). | |
808 :param plan: Optionally extra entity to state qualified association through | |
809 an internal plan (default: None). | |
810 :param attributes: Optional other attributes as a dictionary or list | |
811 of tuples to be added to the record optionally (default: None). | |
812 """ | |
813 self._bundle.association( | |
814 self, agent, plan, other_attributes=attributes | |
815 ) | |
816 return self | |
817 | |
818 | |
819 class ProvGeneration(ProvRelation): | |
820 """Provenance Generation relationship.""" | |
821 | |
822 FORMAL_ATTRIBUTES = (PROV_ATTR_ENTITY, PROV_ATTR_ACTIVITY, PROV_ATTR_TIME) | |
823 | |
824 _prov_type = PROV_GENERATION | |
825 | |
826 | |
827 class ProvUsage(ProvRelation): | |
828 """Provenance Usage relationship.""" | |
829 | |
830 FORMAL_ATTRIBUTES = (PROV_ATTR_ACTIVITY, PROV_ATTR_ENTITY, PROV_ATTR_TIME) | |
831 | |
832 _prov_type = PROV_USAGE | |
833 | |
834 | |
835 class ProvCommunication(ProvRelation): | |
836 """Provenance Communication relationship.""" | |
837 | |
838 FORMAL_ATTRIBUTES = (PROV_ATTR_INFORMED, PROV_ATTR_INFORMANT) | |
839 | |
840 _prov_type = PROV_COMMUNICATION | |
841 | |
842 | |
843 class ProvStart(ProvRelation): | |
844 """Provenance Start relationship.""" | |
845 | |
846 FORMAL_ATTRIBUTES = (PROV_ATTR_ACTIVITY, PROV_ATTR_TRIGGER, | |
847 PROV_ATTR_STARTER, PROV_ATTR_TIME) | |
848 | |
849 _prov_type = PROV_START | |
850 | |
851 | |
852 class ProvEnd(ProvRelation): | |
853 """Provenance End relationship.""" | |
854 | |
855 FORMAL_ATTRIBUTES = (PROV_ATTR_ACTIVITY, PROV_ATTR_TRIGGER, | |
856 PROV_ATTR_ENDER, PROV_ATTR_TIME) | |
857 | |
858 _prov_type = PROV_END | |
859 | |
860 | |
861 class ProvInvalidation(ProvRelation): | |
862 """Provenance Invalidation relationship.""" | |
863 | |
864 FORMAL_ATTRIBUTES = (PROV_ATTR_ENTITY, PROV_ATTR_ACTIVITY, PROV_ATTR_TIME) | |
865 | |
866 _prov_type = PROV_INVALIDATION | |
867 | |
868 | |
869 # Component 2: Derivations | |
870 class ProvDerivation(ProvRelation): | |
871 """Provenance Derivation relationship.""" | |
872 | |
873 FORMAL_ATTRIBUTES = (PROV_ATTR_GENERATED_ENTITY, PROV_ATTR_USED_ENTITY, | |
874 PROV_ATTR_ACTIVITY, PROV_ATTR_GENERATION, | |
875 PROV_ATTR_USAGE) | |
876 | |
877 _prov_type = PROV_DERIVATION | |
878 | |
879 | |
880 # Component 3: Agents, Responsibility, and Influence | |
881 class ProvAgent(ProvElement): | |
882 """Provenance Agent element.""" | |
883 | |
884 _prov_type = PROV_AGENT | |
885 | |
886 # Convenient assertions that take the current ProvAgent as the first | |
887 # (formal) argument | |
888 def actedOnBehalfOf(self, responsible, activity=None, attributes=None): | |
889 """ | |
890 Creates a new delegation record on behalf of this agent. | |
891 | |
892 :param responsible: Agent the responsibility is delegated to. | |
893 :param activity: Optionally extra activity to state qualified delegation | |
894 internally (default: None). | |
895 :param attributes: Optional other attributes as a dictionary or list | |
896 of tuples to be added to the record optionally (default: None). | |
897 """ | |
898 self._bundle.delegation( | |
899 self, responsible, activity, other_attributes=attributes | |
900 ) | |
901 return self | |
902 | |
903 | |
904 class ProvAttribution(ProvRelation): | |
905 """Provenance Attribution relationship.""" | |
906 | |
907 FORMAL_ATTRIBUTES = (PROV_ATTR_ENTITY, PROV_ATTR_AGENT) | |
908 | |
909 _prov_type = PROV_ATTRIBUTION | |
910 | |
911 | |
912 class ProvAssociation(ProvRelation): | |
913 """Provenance Association relationship.""" | |
914 | |
915 FORMAL_ATTRIBUTES = (PROV_ATTR_ACTIVITY, PROV_ATTR_AGENT, PROV_ATTR_PLAN) | |
916 | |
917 _prov_type = PROV_ASSOCIATION | |
918 | |
919 | |
920 class ProvDelegation(ProvRelation): | |
921 """Provenance Delegation relationship.""" | |
922 | |
923 FORMAL_ATTRIBUTES = (PROV_ATTR_DELEGATE, PROV_ATTR_RESPONSIBLE, | |
924 PROV_ATTR_ACTIVITY) | |
925 | |
926 _prov_type = PROV_DELEGATION | |
927 | |
928 | |
929 class ProvInfluence(ProvRelation): | |
930 """Provenance Influence relationship.""" | |
931 | |
932 FORMAL_ATTRIBUTES = (PROV_ATTR_INFLUENCEE, PROV_ATTR_INFLUENCER) | |
933 | |
934 _prov_type = PROV_INFLUENCE | |
935 | |
936 | |
937 # Component 5: Alternate Entities | |
938 class ProvSpecialization(ProvRelation): | |
939 """Provenance Specialization relationship.""" | |
940 | |
941 FORMAL_ATTRIBUTES = (PROV_ATTR_SPECIFIC_ENTITY, PROV_ATTR_GENERAL_ENTITY) | |
942 | |
943 _prov_type = PROV_SPECIALIZATION | |
944 | |
945 | |
946 class ProvAlternate(ProvRelation): | |
947 """Provenance Alternate relationship.""" | |
948 | |
949 FORMAL_ATTRIBUTES = (PROV_ATTR_ALTERNATE1, PROV_ATTR_ALTERNATE2) | |
950 | |
951 _prov_type = PROV_ALTERNATE | |
952 | |
953 | |
954 class ProvMention(ProvSpecialization): | |
955 """Provenance Mention relationship (specific Specialization).""" | |
956 | |
957 FORMAL_ATTRIBUTES = (PROV_ATTR_SPECIFIC_ENTITY, PROV_ATTR_GENERAL_ENTITY, | |
958 PROV_ATTR_BUNDLE) | |
959 | |
960 _prov_type = PROV_MENTION | |
961 | |
962 | |
963 # Component 6: Collections | |
964 class ProvMembership(ProvRelation): | |
965 """Provenance Membership relationship.""" | |
966 | |
967 FORMAL_ATTRIBUTES = (PROV_ATTR_COLLECTION, PROV_ATTR_ENTITY) | |
968 | |
969 _prov_type = PROV_MEMBERSHIP | |
970 | |
971 | |
972 # Class mappings from PROV record type | |
973 PROV_REC_CLS = { | |
974 PROV_ENTITY: ProvEntity, | |
975 PROV_ACTIVITY: ProvActivity, | |
976 PROV_GENERATION: ProvGeneration, | |
977 PROV_USAGE: ProvUsage, | |
978 PROV_COMMUNICATION: ProvCommunication, | |
979 PROV_START: ProvStart, | |
980 PROV_END: ProvEnd, | |
981 PROV_INVALIDATION: ProvInvalidation, | |
982 PROV_DERIVATION: ProvDerivation, | |
983 PROV_AGENT: ProvAgent, | |
984 PROV_ATTRIBUTION: ProvAttribution, | |
985 PROV_ASSOCIATION: ProvAssociation, | |
986 PROV_DELEGATION: ProvDelegation, | |
987 PROV_INFLUENCE: ProvInfluence, | |
988 PROV_SPECIALIZATION: ProvSpecialization, | |
989 PROV_ALTERNATE: ProvAlternate, | |
990 PROV_MENTION: ProvMention, | |
991 PROV_MEMBERSHIP: ProvMembership, | |
992 } | |
993 | |
994 | |
995 DEFAULT_NAMESPACES = {'prov': PROV, 'xsd': XSD, 'xsi': XSI} | |
996 | |
997 | |
998 # Bundle | |
999 class NamespaceManager(dict): | |
1000 """Manages namespaces for PROV documents and bundles.""" | |
1001 | |
1002 parent = None | |
1003 """Parent :py:class:`NamespaceManager` this manager one is a child of.""" | |
1004 | |
1005 def __init__(self, namespaces=None, default=None, parent=None): | |
1006 """ | |
1007 Constructor. | |
1008 | |
1009 :param namespaces: Optional namespaces to add to the manager | |
1010 (default: None). | |
1011 :param default: Optional default namespace to use (default: None). | |
1012 :param parent: Optional parent :py:class:`NamespaceManager` to make this | |
1013 namespace manager a child of (default: None). | |
1014 """ | |
1015 dict.__init__(self) | |
1016 self._default_namespaces = DEFAULT_NAMESPACES | |
1017 self.update(self._default_namespaces) | |
1018 self._namespaces = {} | |
1019 | |
1020 if default is not None: | |
1021 self.set_default_namespace(default) | |
1022 else: | |
1023 self._default = None | |
1024 self.parent = parent | |
1025 # TODO check if default is in the default namespaces | |
1026 self._anon_id_count = 0 | |
1027 self._uri_map = dict() | |
1028 self._rename_map = dict() | |
1029 self._prefix_renamed_map = dict() | |
1030 self.add_namespaces(namespaces) | |
1031 | |
1032 def get_namespace(self, uri): | |
1033 """ | |
1034 Returns the namespace prefix for the given URI. | |
1035 | |
1036 :param uri: Namespace URI. | |
1037 :return: :py:class:`~prov.identifier.Namespace`. | |
1038 """ | |
1039 for namespace in self.values(): | |
1040 if uri == namespace._uri: | |
1041 return namespace | |
1042 return None | |
1043 | |
1044 def get_registered_namespaces(self): | |
1045 """ | |
1046 Returns all registered namespaces. | |
1047 | |
1048 :return: Iterable of :py:class:`~prov.identifier.Namespace`. | |
1049 """ | |
1050 return self._namespaces.values() | |
1051 | |
1052 def set_default_namespace(self, uri): | |
1053 """ | |
1054 Sets the default namespace to the one of a given URI. | |
1055 | |
1056 :param uri: Namespace URI. | |
1057 """ | |
1058 self._default = Namespace('', uri) | |
1059 self[''] = self._default | |
1060 | |
1061 def get_default_namespace(self): | |
1062 """ | |
1063 Returns the default namespace. | |
1064 | |
1065 :return: :py:class:`~prov.identifier.Namespace` | |
1066 """ | |
1067 return self._default | |
1068 | |
1069 def add_namespace(self, namespace): | |
1070 """ | |
1071 Adds a namespace (if not available, yet). | |
1072 | |
1073 :param namespace: :py:class:`~prov.identifier.Namespace` to add. | |
1074 """ | |
1075 if namespace in self.values(): | |
1076 # no need to do anything | |
1077 return namespace | |
1078 if namespace in self._rename_map: | |
1079 # already renamed and added | |
1080 return self._rename_map[namespace] | |
1081 | |
1082 # Checking if the URI has been defined and use the existing namespace | |
1083 # instead | |
1084 uri = namespace.uri | |
1085 prefix = namespace.prefix | |
1086 | |
1087 if uri in self._uri_map: | |
1088 existing_ns = self._uri_map[uri] | |
1089 self._rename_map[namespace] = existing_ns | |
1090 self._prefix_renamed_map[prefix] = existing_ns | |
1091 return existing_ns | |
1092 | |
1093 if prefix in self: | |
1094 # Conflicting prefix | |
1095 new_prefix = self._get_unused_prefix(prefix) | |
1096 new_namespace = Namespace(new_prefix, namespace.uri) | |
1097 self._rename_map[namespace] = new_namespace | |
1098 # TODO: What if the prefix is already in the map and point to a | |
1099 # different Namespace? Raise an exception? | |
1100 self._prefix_renamed_map[prefix] = new_namespace | |
1101 prefix = new_prefix | |
1102 namespace = new_namespace | |
1103 | |
1104 # Only now add the namespace to the registry | |
1105 self._namespaces[prefix] = namespace | |
1106 self[prefix] = namespace | |
1107 self._uri_map[uri] = namespace | |
1108 | |
1109 return namespace | |
1110 | |
1111 def add_namespaces(self, namespaces): | |
1112 """ | |
1113 Add multiple namespaces into this manager. | |
1114 | |
1115 :param namespaces: A collection of namespace(s) to add. | |
1116 :type namespaces: List of :py:class:`~prov.identifier.Namespace` or | |
1117 dict of {prefix: uri}. | |
1118 :returns: None | |
1119 """ | |
1120 if isinstance(namespaces, dict): | |
1121 # expecting a dictionary of {prefix: uri}, | |
1122 # convert it to a list of Namespace | |
1123 namespaces = [ | |
1124 Namespace(prefix, uri) for prefix, uri in namespaces.items() | |
1125 ] | |
1126 if namespaces: | |
1127 for ns in namespaces: | |
1128 self.add_namespace(ns) | |
1129 | |
1130 def valid_qualified_name(self, qname): | |
1131 """ | |
1132 Resolves an identifier to a valid qualified name. | |
1133 | |
1134 :param qname: Qualified name as :py:class:`~prov.identifier.QualifiedName` | |
1135 or a tuple (namespace, identifier). | |
1136 :return: :py:class:`~prov.identifier.QualifiedName` or None in case of | |
1137 failure. | |
1138 """ | |
1139 if not qname: | |
1140 return None | |
1141 | |
1142 if isinstance(qname, QualifiedName): | |
1143 # Register the namespace if it has not been registered before | |
1144 namespace = qname.namespace | |
1145 prefix = namespace.prefix | |
1146 local_part = qname.localpart | |
1147 if not prefix: | |
1148 # the namespace is a default namespace | |
1149 if self._default == namespace: | |
1150 # the same default namespace is defined | |
1151 new_qname = self._default[local_part] | |
1152 elif self._default is None: | |
1153 # no default namespace is defined, reused the one given | |
1154 self._default = namespace | |
1155 return qname # no change, return the original | |
1156 else: | |
1157 # different default namespace, | |
1158 # use the 'dn' prefix for the new namespace | |
1159 dn_namespace = Namespace('dn', namespace.uri) | |
1160 dn_namespace = self.add_namespace(dn_namespace) | |
1161 new_qname = dn_namespace[local_part] | |
1162 elif prefix in self and self[prefix] == namespace: | |
1163 # No need to add the namespace | |
1164 existing_ns = self[prefix] | |
1165 if existing_ns is namespace: | |
1166 return qname | |
1167 else: | |
1168 # reuse the existing namespace | |
1169 new_qname = existing_ns[local_part] | |
1170 else: | |
1171 # Do not reuse the namespace object | |
1172 ns = self.add_namespace(deepcopy(namespace)) | |
1173 # minting the same Qualified Name from the namespace's copy | |
1174 new_qname = ns[qname.localpart] | |
1175 # returning the new qname | |
1176 return new_qname | |
1177 | |
1178 # Trying to guess from here | |
1179 if not isinstance(qname, (six.string_types, Identifier)): | |
1180 # Only proceed for string or URI values | |
1181 return None | |
1182 # Try to generate a Qualified Name | |
1183 str_value = \ | |
1184 qname.uri if isinstance(qname, Identifier) else six.text_type(qname) | |
1185 if str_value.startswith('_:'): | |
1186 # this is a blank node ID | |
1187 return None | |
1188 elif ':' in str_value: | |
1189 # check if the identifier contains a registered prefix | |
1190 prefix, local_part = str_value.split(':', 1) | |
1191 if prefix in self: | |
1192 # return a new QualifiedName | |
1193 return self[prefix][local_part] | |
1194 if prefix in self._prefix_renamed_map: | |
1195 # return a new QualifiedName | |
1196 return self._prefix_renamed_map[prefix][local_part] | |
1197 else: | |
1198 # treat as a URI (with the first part as its scheme) | |
1199 # check if the URI can be compacted | |
1200 for namespace in self.values(): | |
1201 if str_value.startswith(namespace.uri): | |
1202 # create a QName with the namespace | |
1203 return namespace[str_value.replace(namespace.uri, '')] | |
1204 elif self._default: | |
1205 # create and return an identifier in the default namespace | |
1206 return self._default[qname] | |
1207 | |
1208 if self.parent: | |
1209 # all attempts have failed so far | |
1210 # now delegate this to the parent NamespaceManager | |
1211 return self.parent.valid_qualified_name(qname) | |
1212 | |
1213 # Default to FAIL | |
1214 return None | |
1215 | |
1216 def get_anonymous_identifier(self, local_prefix='id'): | |
1217 """ | |
1218 Returns an anonymous identifier (without a namespace prefix). | |
1219 | |
1220 :param local_prefix: Optional local namespace prefix as a string | |
1221 (default: 'id'). | |
1222 :return: :py:class:`~prov.identifier.Identifier` | |
1223 """ | |
1224 self._anon_id_count += 1 | |
1225 return Identifier('_:%s%d' % (local_prefix, self._anon_id_count)) | |
1226 | |
1227 def _get_unused_prefix(self, original_prefix): | |
1228 if original_prefix not in self: | |
1229 return original_prefix | |
1230 count = 1 | |
1231 while True: | |
1232 new_prefix = '_'.join((original_prefix, six.text_type(count))) | |
1233 if new_prefix in self: | |
1234 count += 1 | |
1235 else: | |
1236 return new_prefix | |
1237 | |
1238 | |
1239 class ProvBundle(object): | |
1240 """PROV Bundle""" | |
1241 | |
1242 def __init__(self, records=None, identifier=None, namespaces=None, | |
1243 document=None): | |
1244 """ | |
1245 Constructor. | |
1246 | |
1247 :param records: Optional iterable of records to add to the bundle | |
1248 (default: None). | |
1249 :param identifier: Optional identifier of the bundle (default: None). | |
1250 :param namespaces: Optional iterable of :py:class:`~prov.identifier.Namespace`s | |
1251 to set the document up with (default: None). | |
1252 :param document: Optional document to add to the bundle (default: None). | |
1253 """ | |
1254 # Initializing bundle-specific attributes | |
1255 self._identifier = identifier | |
1256 self._records = list() | |
1257 self._id_map = defaultdict(list) | |
1258 self._document = document | |
1259 self._namespaces = NamespaceManager( | |
1260 namespaces, | |
1261 parent=(document._namespaces if document is not None else None) | |
1262 ) | |
1263 if records: | |
1264 for record in records: | |
1265 self.add_record(record) | |
1266 | |
1267 def __repr__(self): | |
1268 return '<%s: %s>' % (self.__class__.__name__, self._identifier) | |
1269 | |
1270 @property | |
1271 def namespaces(self): | |
1272 """ | |
1273 Returns the set of registered namespaces. | |
1274 | |
1275 :return: Set of :py:class:`~prov.identifier.Namespace`. | |
1276 """ | |
1277 return set(self._namespaces.get_registered_namespaces()) | |
1278 | |
1279 @property | |
1280 def default_ns_uri(self): | |
1281 """ | |
1282 Returns the default namespace's URI, if any. | |
1283 | |
1284 :return: URI as string. | |
1285 """ | |
1286 default_ns = self._namespaces.get_default_namespace() | |
1287 return default_ns.uri if default_ns else None | |
1288 | |
1289 @property | |
1290 def document(self): | |
1291 """ | |
1292 Returns the parent document, if any. | |
1293 | |
1294 :return: :py:class:`ProvDocument`. | |
1295 """ | |
1296 return self._document | |
1297 | |
1298 @property | |
1299 def identifier(self): | |
1300 """ | |
1301 Returns the bundle's identifier | |
1302 """ | |
1303 return self._identifier | |
1304 | |
1305 @property | |
1306 def records(self): | |
1307 """ | |
1308 Returns the list of all records in the current bundle | |
1309 """ | |
1310 return list(self._records) | |
1311 | |
1312 # Bundle configurations | |
1313 def set_default_namespace(self, uri): | |
1314 """ | |
1315 Sets the default namespace through a given URI. | |
1316 | |
1317 :param uri: Namespace URI. | |
1318 """ | |
1319 self._namespaces.set_default_namespace(uri) | |
1320 | |
1321 def get_default_namespace(self): | |
1322 """ | |
1323 Returns the default namespace. | |
1324 | |
1325 :return: :py:class:`~prov.identifier.Namespace` | |
1326 """ | |
1327 return self._namespaces.get_default_namespace() | |
1328 | |
1329 def add_namespace(self, namespace_or_prefix, uri=None): | |
1330 """ | |
1331 Adds a namespace (if not available, yet). | |
1332 | |
1333 :param namespace_or_prefix: :py:class:`~prov.identifier.Namespace` or its | |
1334 prefix as a string to add. | |
1335 :param uri: Namespace URI (default: None). Must be present if only a | |
1336 prefix is given in the previous parameter. | |
1337 """ | |
1338 if uri is None: | |
1339 return self._namespaces.add_namespace(namespace_or_prefix) | |
1340 else: | |
1341 return self._namespaces.add_namespace( | |
1342 Namespace(namespace_or_prefix, uri) | |
1343 ) | |
1344 | |
1345 def get_registered_namespaces(self): | |
1346 """ | |
1347 Returns all registered namespaces. | |
1348 | |
1349 :return: Iterable of :py:class:`~prov.identifier.Namespace`. | |
1350 """ | |
1351 return self._namespaces.get_registered_namespaces() | |
1352 | |
1353 def valid_qualified_name(self, identifier): | |
1354 return self._namespaces.valid_qualified_name(identifier) | |
1355 | |
1356 def get_records(self, class_or_type_or_tuple=None): | |
1357 """ | |
1358 Returns all records. Returned records may be filtered by the optional | |
1359 argument. | |
1360 | |
1361 :param class_or_type_or_tuple: A filter on the type for which records are | |
1362 to be returned (default: None). The filter checks by the type of the | |
1363 record using the `isinstance` check on the record. | |
1364 :return: List of :py:class:`ProvRecord` objects. | |
1365 """ | |
1366 results = list(self._records) | |
1367 if class_or_type_or_tuple: | |
1368 return filter( | |
1369 lambda rec: isinstance(rec, class_or_type_or_tuple), results | |
1370 ) | |
1371 else: | |
1372 return results | |
1373 | |
1374 def get_record(self, identifier): | |
1375 """ | |
1376 Returns a specific record matching a given identifier. | |
1377 | |
1378 :param identifier: Record identifier. | |
1379 :return: :py:class:`ProvRecord` | |
1380 """ | |
1381 # TODO: This will not work with the new _id_map, which is now a map of | |
1382 # (QName, list(ProvRecord)) | |
1383 if identifier is None: | |
1384 return None | |
1385 valid_id = self.valid_qualified_name(identifier) | |
1386 try: | |
1387 return self._id_map[valid_id] | |
1388 except KeyError: | |
1389 # looking up the parent bundle | |
1390 if self.is_bundle(): | |
1391 return self.document.get_record(valid_id) | |
1392 else: | |
1393 return None | |
1394 | |
1395 # Miscellaneous functions | |
1396 def is_document(self): | |
1397 """ | |
1398 `True` if the object is a document, `False` otherwise. | |
1399 | |
1400 :return: bool | |
1401 """ | |
1402 return False | |
1403 | |
1404 def is_bundle(self): | |
1405 """ | |
1406 `True` if the object is a bundle, `False` otherwise. | |
1407 | |
1408 :return: bool | |
1409 """ | |
1410 return True | |
1411 | |
1412 def has_bundles(self): | |
1413 """ | |
1414 `True` if the object has at least one bundle, `False` otherwise. | |
1415 | |
1416 :return: bool | |
1417 """ | |
1418 return False | |
1419 | |
1420 @property | |
1421 def bundles(self): | |
1422 """ | |
1423 Returns bundles contained in the document | |
1424 | |
1425 :return: Iterable of :py:class:`ProvBundle`. | |
1426 """ | |
1427 return frozenset() | |
1428 | |
1429 def get_provn(self, _indent_level=0): | |
1430 """ | |
1431 Returns the PROV-N representation of the bundle. | |
1432 | |
1433 :return: String | |
1434 """ | |
1435 indentation = '' + (' ' * _indent_level) | |
1436 newline = '\n' + (' ' * (_indent_level + 1)) | |
1437 | |
1438 # if this is the document, start the document; | |
1439 # otherwise, start the bundle | |
1440 lines = ['document'] if self.is_document() \ | |
1441 else ['bundle %s' % self._identifier] | |
1442 | |
1443 default_namespace = self._namespaces.get_default_namespace() | |
1444 if default_namespace: | |
1445 lines.append('default <%s>' % default_namespace.uri) | |
1446 | |
1447 registered_namespaces = self._namespaces.get_registered_namespaces() | |
1448 if registered_namespaces: | |
1449 lines.extend( | |
1450 ['prefix %s <%s>' % (namespace.prefix, namespace.uri) | |
1451 for namespace in registered_namespaces] | |
1452 ) | |
1453 | |
1454 if default_namespace or registered_namespaces: | |
1455 # a blank line between the prefixes and the assertions | |
1456 lines.append('') | |
1457 | |
1458 # adding all the records | |
1459 lines.extend([record.get_provn() for record in self._records]) | |
1460 if self.is_document(): | |
1461 # Print out bundles | |
1462 lines.extend( | |
1463 bundle.get_provn(_indent_level + 1) for bundle in self.bundles | |
1464 ) | |
1465 provn_str = newline.join(lines) + '\n' | |
1466 | |
1467 # closing the structure | |
1468 provn_str += indentation + ( | |
1469 'endDocument' if self.is_document() else 'endBundle' | |
1470 ) | |
1471 return provn_str | |
1472 | |
1473 def __eq__(self, other): | |
1474 if not isinstance(other, ProvBundle): | |
1475 return False | |
1476 other_records = set(other.get_records()) | |
1477 this_records = set(self.get_records()) | |
1478 if len(this_records) != len(other_records): | |
1479 return False | |
1480 # check if all records for equality | |
1481 for record_a in this_records: | |
1482 # Manually look for the record | |
1483 found = False | |
1484 for record_b in other_records: | |
1485 if record_a == record_b: | |
1486 other_records.remove(record_b) | |
1487 found = True | |
1488 break | |
1489 if not found: | |
1490 logger.debug( | |
1491 'Equality (ProvBundle): Could not find this record: %s', | |
1492 six.text_type(record_a) | |
1493 ) | |
1494 return False | |
1495 return True | |
1496 | |
1497 def __ne__(self, other): | |
1498 return not (self == other) | |
1499 | |
1500 __hash__ = None | |
1501 | |
1502 # Transformations | |
1503 def _unified_records(self): | |
1504 """Returns a list of unified records.""" | |
1505 # TODO: Check unification rules in the PROV-CONSTRAINTS document | |
1506 # This method simply merges the records having the same name | |
1507 merged_records = dict() | |
1508 for identifier, records in self._id_map.items(): | |
1509 if len(records) > 1: | |
1510 # more than one record having the same identifier | |
1511 # merge the records | |
1512 merged = records[0].copy() | |
1513 for record in records[1:]: | |
1514 merged.add_attributes(record.attributes) | |
1515 # map all of them to the merged record | |
1516 for record in records: | |
1517 merged_records[record] = merged | |
1518 if not merged_records: | |
1519 # No merging done, just return the list of original records | |
1520 return list(self._records) | |
1521 | |
1522 added_merged_records = set() | |
1523 unified_records = list() | |
1524 for record in self._records: | |
1525 if record in merged_records: | |
1526 merged = merged_records[record] | |
1527 if merged not in added_merged_records: | |
1528 unified_records.append(merged) | |
1529 added_merged_records.add(merged) | |
1530 else: | |
1531 # add the original record | |
1532 unified_records.append(record) | |
1533 return unified_records | |
1534 | |
1535 def unified(self): | |
1536 """ | |
1537 Unifies all records in the bundle that haves same identifiers | |
1538 | |
1539 :returns: :py:class:`ProvBundle` -- the new unified bundle. | |
1540 """ | |
1541 unified_records = self._unified_records() | |
1542 bundle = ProvBundle( | |
1543 records=unified_records, identifier=self.identifier | |
1544 ) | |
1545 return bundle | |
1546 | |
1547 def update(self, other): | |
1548 """ | |
1549 Append all the records of the *other* ProvBundle into this bundle. | |
1550 | |
1551 :param other: the other bundle whose records to be appended. | |
1552 :type other: :py:class:`ProvBundle` | |
1553 :returns: None. | |
1554 """ | |
1555 if isinstance(other, ProvBundle): | |
1556 if other.is_document() and other.has_bundles(): | |
1557 # Cannot add bundles to a bundle | |
1558 raise ProvException( | |
1559 'ProvBundle.update(): The other bundle is a document with ' | |
1560 'sub-bundle(s).' | |
1561 ) | |
1562 for record in other.get_records(): | |
1563 self.add_record(record) | |
1564 else: | |
1565 raise ProvException( | |
1566 'ProvBundle.update(): The other bundle is not a ProvBundle ' | |
1567 'instance (%s)' % type(other) | |
1568 ) | |
1569 | |
1570 # Provenance statements | |
1571 def _add_record(self, record): | |
1572 # IMPORTANT: All records need to be added to a bundle/document via this | |
1573 # method. Otherwise, the _id_map dict will not be correctly updated | |
1574 identifier = record.identifier | |
1575 if identifier is not None: | |
1576 self._id_map[identifier].append(record) | |
1577 self._records.append(record) | |
1578 | |
1579 def new_record(self, record_type, identifier, attributes=None, | |
1580 other_attributes=None): | |
1581 """ | |
1582 Creates a new record. | |
1583 | |
1584 :param record_type: Type of record (one of :py:const:`PROV_REC_CLS`). | |
1585 :param identifier: Identifier for new record. | |
1586 :param attributes: Attributes as a dictionary or list of tuples to be added | |
1587 to the record optionally (default: None). | |
1588 :param other_attributes: Optional other attributes as a dictionary or list | |
1589 of tuples to be added to the record optionally (default: None). | |
1590 """ | |
1591 attr_list = [] | |
1592 if attributes: | |
1593 if isinstance(attributes, dict): | |
1594 attr_list.extend( | |
1595 (attr, value) for attr, value in attributes.items() | |
1596 ) | |
1597 else: | |
1598 # expecting a list of attributes here | |
1599 attr_list.extend(attributes) | |
1600 if other_attributes: | |
1601 attr_list.extend( | |
1602 other_attributes.items() if isinstance(other_attributes, dict) | |
1603 else other_attributes | |
1604 ) | |
1605 new_record = PROV_REC_CLS[record_type]( | |
1606 self, self.valid_qualified_name(identifier), attr_list | |
1607 ) | |
1608 self._add_record(new_record) | |
1609 return new_record | |
1610 | |
1611 def add_record(self, record): | |
1612 """ | |
1613 Adds a new record that to the bundle. | |
1614 | |
1615 :param record: :py:class:`ProvRecord` to be added. | |
1616 """ | |
1617 return self.new_record( | |
1618 record.get_type(), record.identifier, record.formal_attributes, | |
1619 record.extra_attributes | |
1620 ) | |
1621 | |
1622 def entity(self, identifier, other_attributes=None): | |
1623 """ | |
1624 Creates a new entity. | |
1625 | |
1626 :param identifier: Identifier for new entity. | |
1627 :param other_attributes: Optional other attributes as a dictionary or list | |
1628 of tuples to be added to the record optionally (default: None). | |
1629 """ | |
1630 return self.new_record(PROV_ENTITY, identifier, None, other_attributes) | |
1631 | |
1632 def activity(self, identifier, startTime=None, endTime=None, | |
1633 other_attributes=None): | |
1634 """ | |
1635 Creates a new activity. | |
1636 | |
1637 :param identifier: Identifier for new activity. | |
1638 :param startTime: Optional start time for the activity (default: None). | |
1639 Either a :py:class:`datetime.datetime` object or a string that can be | |
1640 parsed by :py:func:`dateutil.parser`. | |
1641 :param endTime: Optional start time for the activity (default: None). | |
1642 Either a :py:class:`datetime.datetime` object or a string that can be | |
1643 parsed by :py:func:`dateutil.parser`. | |
1644 :param other_attributes: Optional other attributes as a dictionary or list | |
1645 of tuples to be added to the record optionally (default: None). | |
1646 """ | |
1647 return self.new_record( | |
1648 PROV_ACTIVITY, identifier, { | |
1649 PROV_ATTR_STARTTIME: _ensure_datetime(startTime), | |
1650 PROV_ATTR_ENDTIME: _ensure_datetime(endTime) | |
1651 }, | |
1652 other_attributes | |
1653 ) | |
1654 | |
1655 def generation(self, entity, activity=None, time=None, identifier=None, | |
1656 other_attributes=None): | |
1657 """ | |
1658 Creates a new generation record for an entity. | |
1659 | |
1660 :param entity: Entity or a string identifier for the entity. | |
1661 :param activity: Activity or string identifier of the activity involved in | |
1662 the generation (default: None). | |
1663 :param time: Optional time for the generation (default: None). | |
1664 Either a :py:class:`datetime.datetime` object or a string that can be | |
1665 parsed by :py:func:`dateutil.parser`. | |
1666 :param identifier: Identifier for new generation record. | |
1667 :param other_attributes: Optional other attributes as a dictionary or list | |
1668 of tuples to be added to the record optionally (default: None). | |
1669 """ | |
1670 return self.new_record( | |
1671 PROV_GENERATION, identifier, { | |
1672 PROV_ATTR_ENTITY: entity, | |
1673 PROV_ATTR_ACTIVITY: activity, | |
1674 PROV_ATTR_TIME: _ensure_datetime(time) | |
1675 }, | |
1676 other_attributes | |
1677 ) | |
1678 | |
1679 def usage(self, activity, entity=None, time=None, identifier=None, | |
1680 other_attributes=None): | |
1681 """ | |
1682 Creates a new usage record for an activity. | |
1683 | |
1684 :param activity: Activity or a string identifier for the entity. | |
1685 :param entity: Entity or string identifier of the entity involved in | |
1686 the usage relationship (default: None). | |
1687 :param time: Optional time for the usage (default: None). | |
1688 Either a :py:class:`datetime.datetime` object or a string that can be | |
1689 parsed by :py:func:`dateutil.parser`. | |
1690 :param identifier: Identifier for new usage record. | |
1691 :param other_attributes: Optional other attributes as a dictionary or list | |
1692 of tuples to be added to the record optionally (default: None). | |
1693 """ | |
1694 return self.new_record( | |
1695 PROV_USAGE, identifier, { | |
1696 PROV_ATTR_ACTIVITY: activity, | |
1697 PROV_ATTR_ENTITY: entity, | |
1698 PROV_ATTR_TIME: _ensure_datetime(time)}, | |
1699 other_attributes | |
1700 ) | |
1701 | |
1702 def start(self, activity, trigger=None, starter=None, time=None, | |
1703 identifier=None, other_attributes=None): | |
1704 """ | |
1705 Creates a new start record for an activity. | |
1706 | |
1707 :param activity: Activity or a string identifier for the entity. | |
1708 :param trigger: Entity triggering the start of this activity. | |
1709 :param starter: Optionally extra activity to state a qualified start | |
1710 through which the trigger entity for the start is generated | |
1711 (default: None). | |
1712 :param time: Optional time for the start (default: None). | |
1713 Either a :py:class:`datetime.datetime` object or a string that can be | |
1714 parsed by :py:func:`dateutil.parser`. | |
1715 :param identifier: Identifier for new start record. | |
1716 :param other_attributes: Optional other attributes as a dictionary or list | |
1717 of tuples to be added to the record optionally (default: None). | |
1718 """ | |
1719 return self.new_record( | |
1720 PROV_START, identifier, { | |
1721 PROV_ATTR_ACTIVITY: activity, | |
1722 PROV_ATTR_TRIGGER: trigger, | |
1723 PROV_ATTR_STARTER: starter, | |
1724 PROV_ATTR_TIME: _ensure_datetime(time) | |
1725 }, | |
1726 other_attributes | |
1727 ) | |
1728 | |
1729 def end(self, activity, trigger=None, ender=None, time=None, | |
1730 identifier=None, other_attributes=None): | |
1731 """ | |
1732 Creates a new end record for an activity. | |
1733 | |
1734 :param activity: Activity or a string identifier for the entity. | |
1735 :param trigger: trigger: Entity triggering the end of this activity. | |
1736 :param ender: Optionally extra activity to state a qualified end | |
1737 through which the trigger entity for the end is generated | |
1738 (default: None). | |
1739 :param time: Optional time for the end (default: None). | |
1740 Either a :py:class:`datetime.datetime` object or a string that can be | |
1741 parsed by :py:func:`dateutil.parser`. | |
1742 :param identifier: Identifier for new end record. | |
1743 :param other_attributes: Optional other attributes as a dictionary or list | |
1744 of tuples to be added to the record optionally (default: None). | |
1745 """ | |
1746 return self.new_record( | |
1747 PROV_END, identifier, { | |
1748 PROV_ATTR_ACTIVITY: activity, | |
1749 PROV_ATTR_TRIGGER: trigger, | |
1750 PROV_ATTR_ENDER: ender, | |
1751 PROV_ATTR_TIME: _ensure_datetime(time) | |
1752 }, | |
1753 other_attributes | |
1754 ) | |
1755 | |
1756 def invalidation(self, entity, activity=None, time=None, identifier=None, | |
1757 other_attributes=None): | |
1758 """ | |
1759 Creates a new invalidation record for an entity. | |
1760 | |
1761 :param entity: Entity or a string identifier for the entity. | |
1762 :param activity: Activity or string identifier of the activity involved in | |
1763 the invalidation (default: None). | |
1764 :param time: Optional time for the invalidation (default: None). | |
1765 Either a :py:class:`datetime.datetime` object or a string that can be | |
1766 parsed by :py:func:`dateutil.parser`. | |
1767 :param identifier: Identifier for new invalidation record. | |
1768 :param other_attributes: Optional other attributes as a dictionary or list | |
1769 of tuples to be added to the record optionally (default: None). | |
1770 """ | |
1771 return self.new_record( | |
1772 PROV_INVALIDATION, identifier, { | |
1773 PROV_ATTR_ENTITY: entity, | |
1774 PROV_ATTR_ACTIVITY: activity, | |
1775 PROV_ATTR_TIME: _ensure_datetime(time) | |
1776 }, | |
1777 other_attributes | |
1778 ) | |
1779 | |
1780 def communication(self, informed, informant, identifier=None, | |
1781 other_attributes=None): | |
1782 """ | |
1783 Creates a new communication record for an entity. | |
1784 | |
1785 :param informed: The informed activity (relationship destination). | |
1786 :param informant: The informing activity (relationship source). | |
1787 :param identifier: Identifier for new communication record. | |
1788 :param other_attributes: Optional other attributes as a dictionary or list | |
1789 of tuples to be added to the record optionally (default: None). | |
1790 """ | |
1791 return self.new_record( | |
1792 PROV_COMMUNICATION, identifier, { | |
1793 PROV_ATTR_INFORMED: informed, | |
1794 PROV_ATTR_INFORMANT: informant | |
1795 }, | |
1796 other_attributes | |
1797 ) | |
1798 | |
1799 def agent(self, identifier, other_attributes=None): | |
1800 """ | |
1801 Creates a new agent. | |
1802 | |
1803 :param identifier: Identifier for new agent. | |
1804 :param other_attributes: Optional other attributes as a dictionary or list | |
1805 of tuples to be added to the record optionally (default: None). | |
1806 """ | |
1807 return self.new_record(PROV_AGENT, identifier, None, other_attributes) | |
1808 | |
1809 def attribution(self, entity, agent, identifier=None, | |
1810 other_attributes=None): | |
1811 """ | |
1812 Creates a new attribution record between an entity and an agent. | |
1813 | |
1814 :param entity: Entity or a string identifier for the entity (relationship | |
1815 source). | |
1816 :param agent: Agent or string identifier of the agent involved in the | |
1817 attribution (relationship destination). | |
1818 :param identifier: Identifier for new attribution record. | |
1819 :param other_attributes: Optional other attributes as a dictionary or list | |
1820 of tuples to be added to the record optionally (default: None). | |
1821 """ | |
1822 return self.new_record( | |
1823 PROV_ATTRIBUTION, identifier, { | |
1824 PROV_ATTR_ENTITY: entity, | |
1825 PROV_ATTR_AGENT: agent | |
1826 }, | |
1827 other_attributes | |
1828 ) | |
1829 | |
1830 def association(self, activity, agent=None, plan=None, identifier=None, | |
1831 other_attributes=None): | |
1832 """ | |
1833 Creates a new association record for an activity. | |
1834 | |
1835 :param activity: Activity or a string identifier for the activity. | |
1836 :param agent: Agent or string identifier of the agent involved in the | |
1837 association (default: None). | |
1838 :param plan: Optionally extra entity to state qualified association through | |
1839 an internal plan (default: None). | |
1840 :param identifier: Identifier for new association record. | |
1841 :param other_attributes: Optional other attributes as a dictionary or list | |
1842 of tuples to be added to the record optionally (default: None). | |
1843 """ | |
1844 return self.new_record( | |
1845 PROV_ASSOCIATION, identifier, { | |
1846 PROV_ATTR_ACTIVITY: activity, | |
1847 PROV_ATTR_AGENT: agent, | |
1848 PROV_ATTR_PLAN: plan | |
1849 }, | |
1850 other_attributes | |
1851 ) | |
1852 | |
1853 def delegation(self, delegate, responsible, activity=None, identifier=None, | |
1854 other_attributes=None): | |
1855 """ | |
1856 Creates a new delegation record on behalf of an agent. | |
1857 | |
1858 :param delegate: Agent delegating the responsibility (relationship source). | |
1859 :param responsible: Agent the responsibility is delegated to (relationship | |
1860 destination). | |
1861 :param activity: Optionally extra activity to state qualified delegation | |
1862 internally (default: None). | |
1863 :param identifier: Identifier for new association record. | |
1864 :param other_attributes: Optional other attributes as a dictionary or list | |
1865 of tuples to be added to the record optionally (default: None). | |
1866 """ | |
1867 return self.new_record( | |
1868 PROV_DELEGATION, identifier, { | |
1869 PROV_ATTR_DELEGATE: delegate, | |
1870 PROV_ATTR_RESPONSIBLE: responsible, | |
1871 PROV_ATTR_ACTIVITY: activity | |
1872 }, | |
1873 other_attributes | |
1874 ) | |
1875 | |
1876 def influence(self, influencee, influencer, identifier=None, | |
1877 other_attributes=None): | |
1878 """ | |
1879 Creates a new influence record between two entities, activities or agents. | |
1880 | |
1881 :param influencee: Influenced entity, activity or agent (relationship | |
1882 source). | |
1883 :param influencer: Influencing entity, activity or agent (relationship | |
1884 destination). | |
1885 :param identifier: Identifier for new influence record. | |
1886 :param other_attributes: Optional other attributes as a dictionary or list | |
1887 of tuples to be added to the record optionally (default: None). | |
1888 """ | |
1889 return self.new_record( | |
1890 PROV_INFLUENCE, identifier, { | |
1891 PROV_ATTR_INFLUENCEE: influencee, | |
1892 PROV_ATTR_INFLUENCER: influencer | |
1893 }, | |
1894 other_attributes | |
1895 ) | |
1896 | |
1897 def derivation(self, generatedEntity, usedEntity, activity=None, | |
1898 generation=None, usage=None, | |
1899 identifier=None, other_attributes=None): | |
1900 """ | |
1901 Creates a new derivation record for a generated entity from a used entity. | |
1902 | |
1903 :param generatedEntity: Entity or a string identifier for the generated | |
1904 entity (relationship source). | |
1905 :param usedEntity: Entity or a string identifier for the used entity | |
1906 (relationship destination). | |
1907 :param activity: Activity or string identifier of the activity involved in | |
1908 the derivation (default: None). | |
1909 :param generation: Optionally extra activity to state qualified generation | |
1910 through a generation (default: None). | |
1911 :param usage: XXX (default: None). | |
1912 :param identifier: Identifier for new derivation record. | |
1913 :param other_attributes: Optional other attributes as a dictionary or list | |
1914 of tuples to be added to the record optionally (default: None). | |
1915 """ | |
1916 attributes = {PROV_ATTR_GENERATED_ENTITY: generatedEntity, | |
1917 PROV_ATTR_USED_ENTITY: usedEntity, | |
1918 PROV_ATTR_ACTIVITY: activity, | |
1919 PROV_ATTR_GENERATION: generation, | |
1920 PROV_ATTR_USAGE: usage} | |
1921 return self.new_record( | |
1922 PROV_DERIVATION, identifier, attributes, other_attributes | |
1923 ) | |
1924 | |
1925 def revision(self, generatedEntity, usedEntity, activity=None, | |
1926 generation=None, usage=None, | |
1927 identifier=None, other_attributes=None): | |
1928 """ | |
1929 Creates a new revision record for a generated entity from a used entity. | |
1930 | |
1931 :param generatedEntity: Entity or a string identifier for the generated | |
1932 entity (relationship source). | |
1933 :param usedEntity: Entity or a string identifier for the used entity | |
1934 (relationship destination). | |
1935 :param activity: Activity or string identifier of the activity involved in | |
1936 the revision (default: None). | |
1937 :param generation: Optionally to state qualified revision through a | |
1938 generation activity (default: None). | |
1939 :param usage: XXX (default: None). | |
1940 :param identifier: Identifier for new revision record. | |
1941 :param other_attributes: Optional other attributes as a dictionary or list | |
1942 of tuples to be added to the record optionally (default: None). | |
1943 """ | |
1944 record = self.derivation( | |
1945 generatedEntity, usedEntity, activity, generation, usage, | |
1946 identifier, other_attributes | |
1947 ) | |
1948 record.add_asserted_type(PROV['Revision']) | |
1949 return record | |
1950 | |
1951 def quotation(self, generatedEntity, usedEntity, activity=None, | |
1952 generation=None, usage=None, | |
1953 identifier=None, other_attributes=None): | |
1954 """ | |
1955 Creates a new quotation record for a generated entity from a used entity. | |
1956 | |
1957 :param generatedEntity: Entity or a string identifier for the generated | |
1958 entity (relationship source). | |
1959 :param usedEntity: Entity or a string identifier for the used entity | |
1960 (relationship destination). | |
1961 :param activity: Activity or string identifier of the activity involved in | |
1962 the quotation (default: None). | |
1963 :param generation: Optionally to state qualified quotation through a | |
1964 generation activity (default: None). | |
1965 :param usage: XXX (default: None). | |
1966 :param identifier: Identifier for new quotation record. | |
1967 :param other_attributes: Optional other attributes as a dictionary or list | |
1968 of tuples to be added to the record optionally (default: None). | |
1969 """ | |
1970 record = self.derivation( | |
1971 generatedEntity, usedEntity, activity, generation, usage, | |
1972 identifier, other_attributes | |
1973 ) | |
1974 record.add_asserted_type(PROV['Quotation']) | |
1975 return record | |
1976 | |
1977 def primary_source(self, generatedEntity, usedEntity, activity=None, | |
1978 generation=None, usage=None, | |
1979 identifier=None, other_attributes=None): | |
1980 """ | |
1981 Creates a new primary source record for a generated entity from a used | |
1982 entity. | |
1983 | |
1984 :param generatedEntity: Entity or a string identifier for the generated | |
1985 entity (relationship source). | |
1986 :param usedEntity: Entity or a string identifier for the used entity | |
1987 (relationship destination). | |
1988 :param activity: Activity or string identifier of the activity involved in | |
1989 the primary source (default: None). | |
1990 :param generation: Optionally to state qualified primary source through a | |
1991 generation activity (default: None). | |
1992 :param usage: XXX (default: None). | |
1993 :param identifier: Identifier for new primary source record. | |
1994 :param other_attributes: Optional other attributes as a dictionary or list | |
1995 of tuples to be added to the record optionally (default: None). | |
1996 """ | |
1997 record = self.derivation( | |
1998 generatedEntity, usedEntity, activity, generation, usage, | |
1999 identifier, other_attributes | |
2000 ) | |
2001 record.add_asserted_type(PROV['PrimarySource']) | |
2002 return record | |
2003 | |
2004 def specialization(self, specificEntity, generalEntity): | |
2005 """ | |
2006 Creates a new specialisation record for a specific from a general entity. | |
2007 | |
2008 :param specificEntity: Entity or a string identifier for the specific | |
2009 entity (relationship source). | |
2010 :param generalEntity: Entity or a string identifier for the general entity | |
2011 (relationship destination). | |
2012 """ | |
2013 return self.new_record( | |
2014 PROV_SPECIALIZATION, None, { | |
2015 PROV_ATTR_SPECIFIC_ENTITY: specificEntity, | |
2016 PROV_ATTR_GENERAL_ENTITY: generalEntity | |
2017 } | |
2018 ) | |
2019 | |
2020 def alternate(self, alternate1, alternate2): | |
2021 """ | |
2022 Creates a new alternate record between two entities. | |
2023 | |
2024 :param alternate1: Entity or a string identifier for the first entity | |
2025 (relationship source). | |
2026 :param alternate2: Entity or a string identifier for the second entity | |
2027 (relationship destination). | |
2028 """ | |
2029 return self.new_record( | |
2030 PROV_ALTERNATE, None, { | |
2031 PROV_ATTR_ALTERNATE1: alternate1, | |
2032 PROV_ATTR_ALTERNATE2: alternate2 | |
2033 }, | |
2034 ) | |
2035 | |
2036 def mention(self, specificEntity, generalEntity, bundle): | |
2037 """ | |
2038 Creates a new mention record for a specific from a general entity. | |
2039 | |
2040 :param specificEntity: Entity or a string identifier for the specific | |
2041 entity (relationship source). | |
2042 :param generalEntity: Entity or a string identifier for the general entity | |
2043 (relationship destination). | |
2044 :param bundle: XXX | |
2045 """ | |
2046 return self.new_record( | |
2047 PROV_MENTION, None, { | |
2048 PROV_ATTR_SPECIFIC_ENTITY: specificEntity, | |
2049 PROV_ATTR_GENERAL_ENTITY: generalEntity, | |
2050 PROV_ATTR_BUNDLE: bundle | |
2051 } | |
2052 ) | |
2053 | |
2054 def collection(self, identifier, other_attributes=None): | |
2055 """ | |
2056 Creates a new collection record for a particular record. | |
2057 | |
2058 :param identifier: Identifier for new collection record. | |
2059 :param other_attributes: Optional other attributes as a dictionary or list | |
2060 of tuples to be added to the record optionally (default: None). | |
2061 """ | |
2062 record = self.new_record( | |
2063 PROV_ENTITY, identifier, None, other_attributes | |
2064 ) | |
2065 record.add_asserted_type(PROV['Collection']) | |
2066 return record | |
2067 | |
2068 def membership(self, collection, entity): | |
2069 """ | |
2070 Creates a new membership record for an entity to a collection. | |
2071 | |
2072 :param collection: Collection the entity is to be added to. | |
2073 :param entity: Entity to be added to the collection. | |
2074 """ | |
2075 return self.new_record( | |
2076 PROV_MEMBERSHIP, None, { | |
2077 PROV_ATTR_COLLECTION: collection, | |
2078 PROV_ATTR_ENTITY: entity | |
2079 } | |
2080 ) | |
2081 | |
2082 def plot(self, filename=None, show_nary=True, use_labels=False, | |
2083 show_element_attributes=True, show_relation_attributes=True): | |
2084 """ | |
2085 Convenience function to plot a PROV document. | |
2086 | |
2087 :param filename: The filename to save to. If not given, it will open | |
2088 an interactive matplotlib plot. The filetype is determined from | |
2089 the filename ending. | |
2090 :type filename: String | |
2091 :param show_nary: Shows all elements in n-ary relations. | |
2092 :type show_nary: bool | |
2093 :param use_labels: Uses the `prov:label` property of an element as its | |
2094 name (instead of its identifier). | |
2095 :type use_labels: bool | |
2096 :param show_element_attributes: Shows attributes of elements. | |
2097 :type show_element_attributes: bool | |
2098 :param show_relation_attributes: Shows attributes of relations. | |
2099 :type show_relation_attributes: bool | |
2100 """ | |
2101 # Lazy imports to have soft dependencies on pydot and matplotlib | |
2102 # (imported even later). | |
2103 from prov import dot | |
2104 | |
2105 if filename: | |
2106 format = os.path.splitext(filename)[-1].lower().strip( | |
2107 os.path.extsep) | |
2108 else: | |
2109 format = "png" | |
2110 format = format.lower() | |
2111 d = dot.prov_to_dot(self, show_nary=show_nary, use_labels=use_labels, | |
2112 show_element_attributes=show_element_attributes, | |
2113 show_relation_attributes=show_relation_attributes) | |
2114 method = "create_%s" % format | |
2115 if not hasattr(d, method): | |
2116 raise ValueError("Format '%s' cannot be saved." % format) | |
2117 with io.BytesIO() as buf: | |
2118 buf.write(getattr(d, method)()) | |
2119 | |
2120 buf.seek(0, 0) | |
2121 if filename: | |
2122 with open(filename, "wb") as fh: | |
2123 fh.write(buf.read()) | |
2124 else: | |
2125 # Use matplotlib to show the image as it likely is more | |
2126 # widespread then PIL and works nicely in the ipython notebook. | |
2127 import matplotlib.pylab as plt | |
2128 import matplotlib.image as mpimg | |
2129 | |
2130 max_size = 30 | |
2131 | |
2132 img = mpimg.imread(buf) | |
2133 # pydot makes a border around the image. remove it. | |
2134 img = img[1:-1, 1:-1] | |
2135 size = (img.shape[1] / 100.0, img.shape[0] / 100.0) | |
2136 if max(size) > max_size: | |
2137 scale = max_size / max(size) | |
2138 else: | |
2139 scale = 1.0 | |
2140 size = (scale * size[0], scale * size[1]) | |
2141 | |
2142 plt.figure(figsize=size) | |
2143 plt.subplots_adjust(bottom=0, top=1, left=0, right=1) | |
2144 plt.xticks([]) | |
2145 plt.yticks([]) | |
2146 plt.imshow(img) | |
2147 plt.axis("off") | |
2148 plt.show() | |
2149 | |
2150 # Aliases | |
2151 wasGeneratedBy = generation | |
2152 used = usage | |
2153 wasStartedBy = start | |
2154 wasEndedBy = end | |
2155 wasInvalidatedBy = invalidation | |
2156 wasInformedBy = communication | |
2157 wasAttributedTo = attribution | |
2158 wasAssociatedWith = association | |
2159 actedOnBehalfOf = delegation | |
2160 wasInfluencedBy = influence | |
2161 wasDerivedFrom = derivation | |
2162 wasRevisionOf = revision | |
2163 wasQuotedFrom = quotation | |
2164 hadPrimarySource = primary_source | |
2165 alternateOf = alternate | |
2166 specializationOf = specialization | |
2167 mentionOf = mention | |
2168 hadMember = membership | |
2169 | |
2170 | |
2171 class ProvDocument(ProvBundle): | |
2172 """Provenance Document.""" | |
2173 | |
2174 def __init__(self, records=None, namespaces=None): | |
2175 """ | |
2176 Constructor. | |
2177 | |
2178 :param records: Optional records to add to the document (default: None). | |
2179 :param namespaces: Optional iterable of :py:class:`~prov.identifier.Namespace`s | |
2180 to set the document up with (default: None). | |
2181 """ | |
2182 ProvBundle.__init__( | |
2183 self, records=records, identifier=None, namespaces=namespaces | |
2184 ) | |
2185 self._bundles = dict() | |
2186 | |
2187 def __repr__(self): | |
2188 return '<ProvDocument>' | |
2189 | |
2190 def __eq__(self, other): | |
2191 if not isinstance(other, ProvDocument): | |
2192 return False | |
2193 # Comparing the documents' content | |
2194 if not super(ProvDocument, self).__eq__(other): | |
2195 return False | |
2196 | |
2197 # Comparing the documents' bundles | |
2198 for b_id, bundle in self._bundles.items(): | |
2199 if b_id not in other._bundles: | |
2200 return False | |
2201 other_bundle = other._bundles[b_id] | |
2202 if bundle != other_bundle: | |
2203 return False | |
2204 | |
2205 # Everything is the same | |
2206 return True | |
2207 | |
2208 def is_document(self): | |
2209 """ | |
2210 `True` if the object is a document, `False` otherwise. | |
2211 | |
2212 :return: bool | |
2213 """ | |
2214 return True | |
2215 | |
2216 def is_bundle(self): | |
2217 """ | |
2218 `True` if the object is a bundle, `False` otherwise. | |
2219 | |
2220 :return: bool | |
2221 """ | |
2222 return False | |
2223 | |
2224 def has_bundles(self): | |
2225 """ | |
2226 `True` if the object has at least one bundle, `False` otherwise. | |
2227 | |
2228 :return: bool | |
2229 """ | |
2230 return len(self._bundles) > 0 | |
2231 | |
2232 @property | |
2233 def bundles(self): | |
2234 """ | |
2235 Returns bundles contained in the document | |
2236 | |
2237 :return: Iterable of :py:class:`ProvBundle`. | |
2238 """ | |
2239 return self._bundles.values() | |
2240 | |
2241 # Transformations | |
2242 def flattened(self): | |
2243 """ | |
2244 Flattens the document by moving all the records in its bundles up | |
2245 to the document level. | |
2246 | |
2247 :returns: :py:class:`ProvDocument` -- the (new) flattened document. | |
2248 """ | |
2249 if self._bundles: | |
2250 # Creating a new document for all the records | |
2251 new_doc = ProvDocument() | |
2252 bundled_records = itertools.chain( | |
2253 *[b.get_records() for b in self._bundles.values()] | |
2254 ) | |
2255 for record in itertools.chain(self._records, bundled_records): | |
2256 new_doc.add_record(record) | |
2257 return new_doc | |
2258 else: | |
2259 # returning the same document | |
2260 return self | |
2261 | |
2262 def unified(self): | |
2263 """ | |
2264 Returns a new document containing all records having same identifiers | |
2265 unified (including those inside bundles). | |
2266 | |
2267 :return: :py:class:`ProvDocument` | |
2268 """ | |
2269 document = ProvDocument(self._unified_records()) | |
2270 document._namespaces = self._namespaces | |
2271 for bundle in self.bundles: | |
2272 unified_bundle = bundle.unified() | |
2273 document.add_bundle(unified_bundle) | |
2274 return document | |
2275 | |
2276 def update(self, other): | |
2277 """ | |
2278 Append all the records of the *other* document/bundle into this document. | |
2279 Bundles having same identifiers will be merged. | |
2280 | |
2281 :param other: The other document/bundle whose records to be appended. | |
2282 :type other: :py:class:`ProvDocument` or :py:class:`ProvBundle` | |
2283 :returns: None. | |
2284 """ | |
2285 if isinstance(other, ProvBundle): | |
2286 for record in other.get_records(): | |
2287 self.add_record(record) | |
2288 if other.has_bundles(): | |
2289 for bundle in other.bundles: | |
2290 if bundle.identifier in self._bundles: | |
2291 self._bundles[bundle.identifier].update(bundle) | |
2292 else: | |
2293 new_bundle = self.bundle(bundle.identifier) | |
2294 new_bundle.update(bundle) | |
2295 else: | |
2296 raise ProvException( | |
2297 'ProvDocument.update(): The other is not a ProvDocument or ' | |
2298 'ProvBundle instance (%s)' % type(other) | |
2299 ) | |
2300 | |
2301 # Bundle operations | |
2302 def add_bundle(self, bundle, identifier=None): | |
2303 """ | |
2304 Add a bundle to the current document. | |
2305 | |
2306 :param bundle: The bundle to add to the document. | |
2307 :type bundle: :py:class:`ProvBundle` | |
2308 :param identifier: The (optional) identifier to use for the bundle | |
2309 (default: None). If none given, use the identifier from the bundle | |
2310 itself. | |
2311 """ | |
2312 if not isinstance(bundle, ProvBundle): | |
2313 raise ProvException( | |
2314 'Only a ProvBundle instance can be added as a bundle in a ' | |
2315 'ProvDocument.' | |
2316 ) | |
2317 | |
2318 if bundle.is_document(): | |
2319 if bundle.has_bundles(): | |
2320 raise ProvException( | |
2321 'Cannot add a document with nested bundles as a bundle.' | |
2322 ) | |
2323 # Make it a new ProvBundle | |
2324 new_bundle = ProvBundle(namespaces=bundle.namespaces) | |
2325 new_bundle.update(bundle) | |
2326 bundle = new_bundle | |
2327 | |
2328 if identifier is None: | |
2329 identifier = bundle.identifier | |
2330 | |
2331 if not identifier: | |
2332 raise ProvException('The provided bundle has no identifier') | |
2333 | |
2334 # Link the bundle namespace manager to the document's | |
2335 bundle._namespaces.parent = self._namespaces | |
2336 | |
2337 valid_id = bundle.valid_qualified_name(identifier) | |
2338 # IMPORTANT: Rewriting the bundle identifier for consistency | |
2339 bundle._identifier = valid_id | |
2340 | |
2341 if valid_id in self._bundles: | |
2342 raise ProvException('A bundle with that identifier already exists') | |
2343 | |
2344 self._bundles[valid_id] = bundle | |
2345 bundle._document = self | |
2346 | |
2347 def bundle(self, identifier): | |
2348 """ | |
2349 Returns a new bundle from the current document. | |
2350 | |
2351 :param identifier: The identifier to use for the bundle. | |
2352 :return: :py:class:`ProvBundle` | |
2353 """ | |
2354 if identifier is None: | |
2355 raise ProvException( | |
2356 'An identifier is required. Cannot create an unnamed bundle.' | |
2357 ) | |
2358 valid_id = self.valid_qualified_name(identifier) | |
2359 if valid_id is None: | |
2360 raise ProvException( | |
2361 'The provided identifier "%s" is not valid' % identifier | |
2362 ) | |
2363 if valid_id in self._bundles: | |
2364 raise ProvException('A bundle with that identifier already exists') | |
2365 b = ProvBundle(identifier=valid_id, document=self) | |
2366 self._bundles[valid_id] = b | |
2367 return b | |
2368 | |
2369 # Serializing and deserializing | |
2370 def serialize(self, destination=None, format='json', **args): | |
2371 """ | |
2372 Serialize the :py:class:`ProvDocument` to the destination. | |
2373 | |
2374 Available serializers can be queried by the value of | |
2375 `:py:attr:~prov.serializers.Registry.serializers` after loading them via | |
2376 `:py:func:~prov.serializers.Registry.load_serializers()`. | |
2377 | |
2378 :param destination: Stream object to serialize the output to. Default is | |
2379 `None`, which serializes as a string. | |
2380 :param format: Serialization format (default: 'json'), defaulting to | |
2381 PROV-JSON. | |
2382 :return: Serialization in a string if no destination was given, | |
2383 None otherwise. | |
2384 """ | |
2385 serializer = serializers.get(format)(self) | |
2386 if destination is None: | |
2387 stream = io.StringIO() | |
2388 serializer.serialize(stream, **args) | |
2389 return stream.getvalue() | |
2390 if hasattr(destination, "write"): | |
2391 stream = destination | |
2392 serializer.serialize(stream, **args) | |
2393 else: | |
2394 location = destination | |
2395 scheme, netloc, path, params, _query, fragment = urlparse(location) | |
2396 if netloc != "": | |
2397 print("WARNING: not saving as location " + | |
2398 "is not a local file reference") | |
2399 return | |
2400 fd, name = tempfile.mkstemp() | |
2401 stream = os.fdopen(fd, "wb") | |
2402 serializer.serialize(stream, **args) | |
2403 stream.close() | |
2404 if hasattr(shutil, "move"): | |
2405 shutil.move(name, path) | |
2406 else: | |
2407 shutil.copy(name, path) | |
2408 os.remove(name) | |
2409 | |
2410 @staticmethod | |
2411 def deserialize(source=None, content=None, format='json', **args): | |
2412 """ | |
2413 Deserialize the :py:class:`ProvDocument` from source (a stream or a | |
2414 file path) or directly from a string content. | |
2415 | |
2416 Available serializers can be queried by the value of | |
2417 `:py:attr:~prov.serializers.Registry.serializers` after loading them via | |
2418 `:py:func:~prov.serializers.Registry.load_serializers()`. | |
2419 | |
2420 Note: Not all serializers support deserialization. | |
2421 | |
2422 :param source: Stream object to deserialize the PROV document from | |
2423 (default: None). | |
2424 :param content: String to deserialize the PROV document from | |
2425 (default: None). | |
2426 :param format: Serialization format (default: 'json'), defaulting to | |
2427 PROV-JSON. | |
2428 :return: :py:class:`ProvDocument` | |
2429 """ | |
2430 serializer = serializers.get(format)() | |
2431 | |
2432 if content is not None: | |
2433 # io.StringIO only accepts unicode strings | |
2434 stream = io.StringIO( | |
2435 content if not isinstance(content, six.binary_type) | |
2436 else content.decode() | |
2437 ) | |
2438 return serializer.deserialize(stream, **args) | |
2439 | |
2440 if source is not None: | |
2441 if hasattr(source, "read"): | |
2442 return serializer.deserialize(source, **args) | |
2443 else: | |
2444 with open(source) as f: | |
2445 return serializer.deserialize(f, **args) | |
2446 | |
2447 | |
2448 def sorted_attributes(element, attributes): | |
2449 """ | |
2450 Helper function sorting attributes into the order required by PROV-XML. | |
2451 | |
2452 :param element: The prov element used to derive the type and the | |
2453 attribute order for the type. | |
2454 :param attributes: The attributes to sort. | |
2455 """ | |
2456 attributes = list(attributes) | |
2457 order = list(PROV_REC_CLS[element].FORMAL_ATTRIBUTES) | |
2458 | |
2459 # Append label, location, role, type, and value attributes. This is | |
2460 # universal amongst all elements. | |
2461 order.extend([PROV_LABEL, PROV_LOCATION, PROV_ROLE, PROV_TYPE, | |
2462 PROV_VALUE]) | |
2463 | |
2464 # Sort function. The PROV XML specification talks about alphabetical | |
2465 # sorting. We now interpret it as sorting by tag including the prefix | |
2466 # first and then sorting by the text, also including the namespace | |
2467 # prefix if given. | |
2468 def sort_fct(x): | |
2469 return ( | |
2470 six.text_type(x[0]), | |
2471 six.text_type(x[1].value if hasattr(x[1], "value") else x[1]) | |
2472 ) | |
2473 | |
2474 sorted_elements = [] | |
2475 for item in order: | |
2476 this_type_list = [] | |
2477 for e in list(attributes): | |
2478 if e[0] != item: | |
2479 continue | |
2480 this_type_list.append(e) | |
2481 attributes.remove(e) | |
2482 this_type_list.sort(key=sort_fct) | |
2483 sorted_elements.extend(this_type_list) | |
2484 # Add remaining attributes. According to the spec, the other attributes | |
2485 # have a fixed alphabetical order. | |
2486 attributes.sort(key=sort_fct) | |
2487 sorted_elements.extend(attributes) | |
2488 | |
2489 return sorted_elements |