comparison env/lib/python3.9/site-packages/prov/model.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """Python implementation of the W3C Provenance Data Model (PROV-DM), including
2 support for PROV-JSON import/export
3
4 References:
5
6 PROV-DM: http://www.w3.org/TR/prov-dm/
7 PROV-JSON: https://provenance.ecs.soton.ac.uk/prov-json/
8 """
9 from __future__ import (absolute_import, division, print_function,
10 unicode_literals)
11
12 from collections import defaultdict
13 from copy import deepcopy
14 import datetime
15 import io
16 import itertools
17 import logging
18 import os
19 import shutil
20 import tempfile
21
22 import dateutil.parser
23 from prov import Error, serializers
24 from prov.constants import *
25 from prov.identifier import Identifier, QualifiedName, Namespace
26 from six.moves.urllib.parse import urlparse
27
28
29 __author__ = 'Trung Dong Huynh'
30 __email__ = 'trungdong@donggiang.com'
31
32
33 logger = logging.getLogger(__name__)
34
35
36 # Data Types
37 def _ensure_datetime(value):
38 if isinstance(value, six.string_types):
39 return dateutil.parser.parse(value)
40 else:
41 return value
42
43
44 def parse_xsd_datetime(value):
45 try:
46 return dateutil.parser.parse(value)
47 except ValueError:
48 pass
49 return None
50
51
52 def parse_boolean(value):
53 if value.lower() in ("false", "0"):
54 return False
55 elif value.lower() in ("true", "1"):
56 return True
57 else:
58 return None
59
60 DATATYPE_PARSERS = {
61 datetime.datetime: parse_xsd_datetime,
62 }
63
64
65 # Mappings for XSD datatypes to Python standard types
66 XSD_DATATYPE_PARSERS = {
67 XSD_STRING: six.text_type,
68 XSD_DOUBLE: float,
69 # long on Python 2, int on Python 3
70 XSD_LONG: six.integer_types[-1],
71 XSD_INT: int,
72 XSD_BOOLEAN: parse_boolean,
73 XSD_DATETIME: parse_xsd_datetime,
74 XSD_ANYURI: Identifier
75 }
76
77
78 def parse_xsd_types(value, datatype):
79 return (
80 XSD_DATATYPE_PARSERS[datatype](value)
81 if datatype in XSD_DATATYPE_PARSERS else None
82 )
83
84
85 def first(a_set):
86 return next(iter(a_set), None)
87
88
89 def _ensure_multiline_string_triple_quoted(value):
90 # converting the value to a string
91 s = six.text_type(value)
92 # Escaping any double quote
93 s = s.replace('"', '\\"')
94 if '\n' in s:
95 return '"""%s"""' % s
96 else:
97 return '"%s"' % s
98
99
100 def encoding_provn_value(value):
101 if isinstance(value, six.string_types):
102 return _ensure_multiline_string_triple_quoted(value)
103 elif isinstance(value, datetime.datetime):
104 return u'"{0}" %% xsd:dateTime'.format(value.isoformat())
105 elif isinstance(value, float):
106 return u'"%g" %%%% xsd:float' % value
107 elif isinstance(value, bool):
108 return u'"%i" %%%% xsd:boolean' % value
109 else:
110 # TODO: QName export
111 return six.text_type(value)
112
113
114 @six.python_2_unicode_compatible
115 class Literal(object):
116 def __init__(self, value, datatype=None, langtag=None):
117 self._value = six.text_type(value) # value is always a string
118 if langtag:
119 if datatype is None:
120 logger.debug(
121 'Assuming prov:InternationalizedString as the type of '
122 '"%s"@%s' % (value, langtag)
123 )
124 datatype = PROV["InternationalizedString"]
125 # PROV JSON states that the type field must not be set when
126 # using the lang attribute and PROV XML requires it to be an
127 # internationalized string.
128 elif datatype != PROV["InternationalizedString"]:
129 logger.warn(
130 'Invalid data type (%s) for "%s"@%s, overridden as '
131 'prov:InternationalizedString.' %
132 (datatype, value, langtag)
133 )
134 datatype = PROV["InternationalizedString"]
135 self._datatype = datatype
136 # langtag is always a string
137 self._langtag = six.text_type(langtag) if langtag is not None else None
138
139 def __str__(self):
140 return self.provn_representation()
141
142 def __repr__(self):
143 return u'<Literal: %s>' % self.provn_representation()
144
145 def __eq__(self, other):
146 return (
147 (self._value == other.value and
148 self._datatype == other.datatype and
149 self._langtag == other.langtag)
150 if isinstance(other, Literal) else False
151 )
152
153 def __ne__(self, other):
154 return not (self == other)
155
156 def __hash__(self):
157 return hash((self._value, self._datatype, self._langtag))
158
159 @property
160 def value(self):
161 return self._value
162
163 @property
164 def datatype(self):
165 return self._datatype
166
167 @property
168 def langtag(self):
169 return self._langtag
170
171 def has_no_langtag(self):
172 return self._langtag is None
173
174 def provn_representation(self):
175 if self._langtag:
176 # a language tag can only go with prov:InternationalizedString
177 return '%s@%s' % (
178 _ensure_multiline_string_triple_quoted(self._value),
179 six.text_type(self._langtag)
180 )
181 else:
182 return '%s %%%% %s' % (
183 _ensure_multiline_string_triple_quoted(self._value),
184 six.text_type(self._datatype)
185 )
186
187
188 # Exceptions and warnings
189 class ProvException(Error):
190 """Base class for PROV model exceptions."""
191 pass
192
193
194 class ProvWarning(Warning):
195 """Base class for PROV model warnings."""
196 pass
197
198
199 @six.python_2_unicode_compatible
200 class ProvExceptionInvalidQualifiedName(ProvException):
201 """Exception for an invalid qualified identifier name."""
202
203 qname = None
204 """Intended qualified name."""
205
206 def __init__(self, qname):
207 """
208 Constructor.
209
210 :param qname: Invalid qualified name.
211 """
212 self.qname = qname
213
214 def __str__(self):
215 return u'Invalid Qualified Name: %s' % self.qname
216
217
218 @six.python_2_unicode_compatible
219 class ProvElementIdentifierRequired(ProvException):
220 """Exception for a missing element identifier."""
221
222 def __str__(self):
223 return u'An identifier is missing. All PROV elements require a valid ' \
224 u'identifier.'
225
226
227 # PROV records
228 @six.python_2_unicode_compatible
229 class ProvRecord(object):
230 """Base class for PROV records."""
231
232 FORMAL_ATTRIBUTES = ()
233
234 _prov_type = None
235 """PROV type of record."""
236
237 def __init__(self, bundle, identifier, attributes=None):
238 """
239 Constructor.
240
241 :param bundle: Bundle for the PROV record.
242 :param identifier: (Unique) identifier of the record.
243 :param attributes: Attributes to associate with the record (default: None).
244 """
245 self._bundle = bundle
246 self._identifier = identifier
247 self._attributes = defaultdict(set)
248 if attributes:
249 self.add_attributes(attributes)
250
251 def __hash__(self):
252 return hash(
253 (self.get_type(), self._identifier, frozenset(self.attributes))
254 )
255
256 def copy(self):
257 """
258 Return an exact copy of this record.
259 """
260 return PROV_REC_CLS[self.get_type()](
261 self._bundle, self.identifier, self.attributes
262 )
263
264 def get_type(self):
265 """Returns the PROV type of the record."""
266 return self._prov_type
267
268 def get_asserted_types(self):
269 """Returns the set of all asserted PROV types of this record."""
270 return self._attributes[PROV_TYPE]
271
272 def add_asserted_type(self, type_identifier):
273 """
274 Adds a PROV type assertion to the record.
275
276 :param type_identifier: PROV namespace identifier to add.
277 """
278 self._attributes[PROV_TYPE].add(type_identifier)
279
280 def get_attribute(self, attr_name):
281 """
282 Returns the attribute of the given name.
283
284 :param attr_name: Name of the attribute.
285 :return: Tuple (name, value)
286 """
287 attr_name = self._bundle.valid_qualified_name(attr_name)
288 return self._attributes[attr_name]
289
290 @property
291 def identifier(self):
292 """Record's identifier."""
293 return self._identifier
294
295 @property
296 def attributes(self):
297 """
298 All record attributes.
299
300 :return: List of tuples (name, value)
301 """
302 return [
303 (attr_name, value)
304 for attr_name, values in self._attributes.items()
305 for value in values
306 ]
307
308 @property
309 def args(self):
310 """
311 All values of the record's formal attributes.
312
313 :return: Tuple
314 """
315 return tuple(
316 first(self._attributes[attr_name])
317 for attr_name in self.FORMAL_ATTRIBUTES
318 )
319
320 @property
321 def formal_attributes(self):
322 """
323 All names and values of the record's formal attributes.
324
325 :return: Tuple of tuples (name, value)
326 """
327 return tuple(
328 (attr_name, first(self._attributes[attr_name]))
329 for attr_name in self.FORMAL_ATTRIBUTES
330 )
331
332 @property
333 def extra_attributes(self):
334 """
335 All names and values of the record's attributes that are not formal
336 attributes.
337
338 :return: Tuple of tuples (name, value)
339 """
340 return [
341 (attr_name, attr_value) for attr_name, attr_value in self.attributes
342 if attr_name not in self.FORMAL_ATTRIBUTES
343 ]
344
345 @property
346 def bundle(self):
347 """
348 Bundle of the record.
349
350 :return: :py:class:`ProvBundle`
351 """
352 return self._bundle
353
354 @property
355 def label(self):
356 """Identifying label of the record."""
357 return first(self._attributes[PROV_LABEL]) \
358 if self._attributes[PROV_LABEL] else self._identifier
359
360 @property
361 def value(self):
362 """Value of the record."""
363 return self._attributes[PROV_VALUE]
364
365 # Handling attributes
366 def _auto_literal_conversion(self, literal):
367 # This method normalise datatype for literals
368
369 if isinstance(literal, ProvRecord):
370 # Use the QName of the record as the literal
371 literal = literal.identifier
372
373 if isinstance(literal, str):
374 return six.text_type(literal)
375 elif isinstance(literal, QualifiedName):
376 return self._bundle.valid_qualified_name(literal)
377 elif isinstance(literal, Literal) and literal.has_no_langtag():
378 if literal.datatype:
379 # try convert generic Literal object to Python standard type
380 # this is to match JSON decoding's literal conversion
381 value = parse_xsd_types(literal.value, literal.datatype)
382 else:
383 # A literal with no datatype nor langtag defined
384 # try auto-converting the value
385 value = self._auto_literal_conversion(literal.value)
386 if value is not None:
387 return value
388
389 # No conversion possible, return the original value
390 return literal
391
392 def add_attributes(self, attributes):
393 """
394 Add attributes to the record.
395
396 :param attributes: Dictionary of attributes, with keys being qualified
397 identifiers. Alternatively an iterable of tuples (key, value) with the
398 keys satisfying the same condition.
399 """
400 if attributes:
401 if isinstance(attributes, dict):
402 # Converting the dictionary into a list of tuples
403 # (i.e. attribute-value pairs)
404 attributes = attributes.items()
405
406 # Check if one of the attributes specifies that the current type
407 # is a collection. In that case multiple attributes of the same
408 # type are allowed.
409 if PROV_ATTR_COLLECTION in [_i[0] for _i in attributes]:
410 is_collection = True
411 else:
412 is_collection = False
413
414 for attr_name, original_value in attributes:
415 if original_value is None:
416 continue
417
418 # make sure the attribute name is valid
419 attr = self._bundle.valid_qualified_name(attr_name)
420 if attr is None:
421 raise ProvExceptionInvalidQualifiedName(attr_name)
422
423 if attr in PROV_ATTRIBUTE_QNAMES:
424 # Expecting a qualified name
425 qname = original_value.identifier \
426 if isinstance(original_value, ProvRecord) \
427 else original_value
428 value = self._bundle.valid_qualified_name(qname)
429 elif attr in PROV_ATTRIBUTE_LITERALS:
430 value = original_value \
431 if isinstance(original_value, datetime.datetime) \
432 else parse_xsd_datetime(original_value)
433 else:
434 value = self._auto_literal_conversion(original_value)
435
436 if value is None:
437 raise ProvException(
438 'Invalid value for attribute %s: %s' %
439 (attr, original_value)
440 )
441
442 if not is_collection and attr in PROV_ATTRIBUTES and \
443 self._attributes[attr]:
444 existing_value = first(self._attributes[attr])
445 is_not_same_value = True
446 try:
447 is_not_same_value = value != existing_value
448 except TypeError:
449 # Cannot compare them
450 pass # consider them different values
451
452 if is_not_same_value:
453 raise ProvException(
454 'Cannot have more than one value for attribute %s'
455 % attr
456 )
457 else:
458 # Same value, ignore it
459 continue
460
461 self._attributes[attr].add(value)
462
463 def __eq__(self, other):
464 if self.get_type() != other.get_type():
465 return False
466 if self._identifier and not (self._identifier == other._identifier):
467 return False
468
469 return set(self.attributes) == set(other.attributes)
470
471 def __str__(self):
472 return self.get_provn()
473
474 def get_provn(self):
475 """
476 Returns the PROV-N representation of the record.
477
478 :return: String
479 """
480 items = []
481
482 # Generating identifier
483 relation_id = '' # default blank
484 if self._identifier:
485 identifier = six.text_type(self._identifier) # TODO: QName export
486 if self.is_element():
487 items.append(identifier)
488 else:
489 # this is a relation
490 # relations use ; to separate identifiers
491 relation_id = identifier + '; '
492
493 # Writing out the formal attributes
494 for attr in self.FORMAL_ATTRIBUTES:
495 if attr in self._attributes and self._attributes[attr]:
496 # Formal attributes always have single values
497 value = first(self._attributes[attr])
498 # TODO: QName export
499 items.append(
500 value.isoformat() if isinstance(value, datetime.datetime)
501 else six.text_type(value)
502 )
503 else:
504 items.append('-')
505
506 # Writing out the remaining attributes
507 extra = []
508 for attr in self._attributes:
509 if attr not in self.FORMAL_ATTRIBUTES:
510 for value in self._attributes[attr]:
511 try:
512 # try if there is a prov-n representation defined
513 provn_represenation = value.provn_representation()
514 except AttributeError:
515 provn_represenation = encoding_provn_value(value)
516 # TODO: QName export
517 extra.append(
518 '%s=%s' % (six.text_type(attr), provn_represenation)
519 )
520
521 if extra:
522 items.append('[%s]' % ', '.join(extra))
523 prov_n = '%s(%s%s)' % (
524 PROV_N_MAP[self.get_type()], relation_id, ', '.join(items)
525 )
526 return prov_n
527
528 def is_element(self):
529 """
530 True, if the record is an element, False otherwise.
531
532 :return: bool
533 """
534 return False
535
536 def is_relation(self):
537 """
538 True, if the record is a relation, False otherwise.
539
540 :return: bool
541 """
542 return False
543
544
545 # Abstract classes for elements and relations
546 class ProvElement(ProvRecord):
547 """Provenance Element (nodes in the provenance graph)."""
548
549 def __init__(self, bundle, identifier, attributes=None):
550 if identifier is None:
551 # All types of PROV elements require a valid identifier
552 raise ProvElementIdentifierRequired()
553
554 super(ProvElement, self).__init__(bundle, identifier, attributes)
555
556 def is_element(self):
557 """
558 True, if the record is an element, False otherwise.
559
560 :return: bool
561 """
562 return True
563
564 def __repr__(self):
565 return '<%s: %s>' % (self.__class__.__name__, self._identifier)
566
567
568 class ProvRelation(ProvRecord):
569 """Provenance Relationship (edge between nodes)."""
570
571 def is_relation(self):
572 """
573 True, if the record is a relation, False otherwise.
574
575 :return: bool
576 """
577 return True
578
579 def __repr__(self):
580 identifier = ' %s' % self._identifier if self._identifier else ''
581 element_1, element_2 = [
582 qname for _, qname in self.formal_attributes[:2]
583 ]
584 return '<%s:%s (%s, %s)>' % (
585 self.__class__.__name__, identifier, element_1, element_2
586 )
587
588
589 # Component 1: Entities and Activities
590 class ProvEntity(ProvElement):
591 """Provenance Entity element"""
592
593 _prov_type = PROV_ENTITY
594
595 # Convenient assertions that take the current ProvEntity as the first
596 # (formal) argument
597 def wasGeneratedBy(self, activity, time=None, attributes=None):
598 """
599 Creates a new generation record to this entity.
600
601 :param activity: Activity or string identifier of the activity involved in
602 the generation (default: None).
603 :param time: Optional time for the generation (default: None).
604 Either a :py:class:`datetime.datetime` object or a string that can be
605 parsed by :py:func:`dateutil.parser`.
606 :param attributes: Optional other attributes as a dictionary or list
607 of tuples to be added to the record optionally (default: None).
608 """
609 self._bundle.generation(
610 self, activity, time, other_attributes=attributes
611 )
612 return self
613
614 def wasInvalidatedBy(self, activity, time=None, attributes=None):
615 """
616 Creates a new invalidation record for this entity.
617
618 :param activity: Activity or string identifier of the activity involved in
619 the invalidation (default: None).
620 :param time: Optional time for the invalidation (default: None).
621 Either a :py:class:`datetime.datetime` object or a string that can be
622 parsed by :py:func:`dateutil.parser`.
623 :param attributes: Optional other attributes as a dictionary or list
624 of tuples to be added to the record optionally (default: None).
625 """
626 self._bundle.invalidation(
627 self, activity, time, other_attributes=attributes
628 )
629 return self
630
631 def wasDerivedFrom(self, usedEntity, activity=None, generation=None,
632 usage=None, attributes=None):
633 """
634 Creates a new derivation record for this entity from a used entity.
635
636 :param usedEntity: Entity or a string identifier for the used entity.
637 :param activity: Activity or string identifier of the activity involved in
638 the derivation (default: None).
639 :param generation: Optionally extra activity to state qualified derivation
640 through an internal generation (default: None).
641 :param usage: Optionally extra entity to state qualified derivation through
642 an internal usage (default: None).
643 :param attributes: Optional other attributes as a dictionary or list
644 of tuples to be added to the record optionally (default: None).
645 """
646 self._bundle.derivation(
647 self, usedEntity, activity, generation, usage,
648 other_attributes=attributes
649 )
650 return self
651
652 def wasAttributedTo(self, agent, attributes=None):
653 """
654 Creates a new attribution record between this entity and an agent.
655
656 :param agent: Agent or string identifier of the agent involved in the
657 attribution.
658 :param attributes: Optional other attributes as a dictionary or list
659 of tuples to be added to the record optionally (default: None).
660 """
661 self._bundle.attribution(self, agent, other_attributes=attributes)
662 return self
663
664 def alternateOf(self, alternate2):
665 """
666 Creates a new alternate record between this and another entity.
667
668 :param alternate2: Entity or a string identifier for the second entity.
669 """
670 self._bundle.alternate(self, alternate2)
671 return self
672
673 def specializationOf(self, generalEntity):
674 """
675 Creates a new specialisation record for this from a general entity.
676
677 :param generalEntity: Entity or a string identifier for the general entity.
678 """
679 self._bundle.specialization(self, generalEntity)
680 return self
681
682 def hadMember(self, entity):
683 """
684 Creates a new membership record to an entity for a collection.
685
686 :param entity: Entity to be added to the collection.
687 """
688 self._bundle.membership(self, entity)
689 return self
690
691
692 class ProvActivity(ProvElement):
693 """Provenance Activity element."""
694
695 FORMAL_ATTRIBUTES = (PROV_ATTR_STARTTIME, PROV_ATTR_ENDTIME)
696
697 _prov_type = PROV_ACTIVITY
698
699 # Convenient methods
700 def set_time(self, startTime=None, endTime=None):
701 """
702 Sets the time this activity took place.
703
704 :param startTime: Start time for the activity.
705 Either a :py:class:`datetime.datetime` object or a string that can be
706 parsed by :py:func:`dateutil.parser`.
707 :param endTime: Start time for the activity.
708 Either a :py:class:`datetime.datetime` object or a string that can be
709 parsed by :py:func:`dateutil.parser`.
710 """
711 if startTime is not None:
712 self._attributes[PROV_ATTR_STARTTIME] = {startTime}
713 if endTime is not None:
714 self._attributes[PROV_ATTR_ENDTIME] = {endTime}
715
716 def get_startTime(self):
717 """
718 Returns the time the activity started.
719
720 :return: :py:class:`datetime.datetime`
721 """
722 values = self._attributes[PROV_ATTR_STARTTIME]
723 return first(values) if values else None
724
725 def get_endTime(self):
726 """
727 Returns the time the activity ended.
728
729 :return: :py:class:`datetime.datetime`
730 """
731 values = self._attributes[PROV_ATTR_ENDTIME]
732 return first(values) if values else None
733
734 # Convenient assertions that take the current ProvActivity as the first
735 # (formal) argument
736 def used(self, entity, time=None, attributes=None):
737 """
738 Creates a new usage record for this activity.
739
740 :param entity: Entity or string identifier of the entity involved in
741 the usage relationship (default: None).
742 :param time: Optional time for the usage (default: None).
743 Either a :py:class:`datetime.datetime` object or a string that can be
744 parsed by :py:func:`dateutil.parser`.
745 :param attributes: Optional other attributes as a dictionary or list
746 of tuples to be added to the record optionally (default: None).
747 """
748 self._bundle.usage(self, entity, time, other_attributes=attributes)
749 return self
750
751 def wasInformedBy(self, informant, attributes=None):
752 """
753 Creates a new communication record for this activity.
754
755 :param informant: The informing activity (relationship source).
756 :param attributes: Optional other attributes as a dictionary or list
757 of tuples to be added to the record optionally (default: None).
758 """
759 self._bundle.communication(
760 self, informant, other_attributes=attributes
761 )
762 return self
763
764 def wasStartedBy(self, trigger, starter=None, time=None, attributes=None):
765 """
766 Creates a new start record for this activity. The activity did not exist
767 before the start by the trigger.
768
769 :param trigger: Entity triggering the start of this activity.
770 :param starter: Optionally extra activity to state a qualified start
771 through which the trigger entity for the start is generated
772 (default: None).
773 :param time: Optional time for the start (default: None).
774 Either a :py:class:`datetime.datetime` object or a string that can be
775 parsed by :py:func:`dateutil.parser`.
776 :param attributes: Optional other attributes as a dictionary or list
777 of tuples to be added to the record optionally (default: None).
778 """
779 self._bundle.start(
780 self, trigger, starter, time, other_attributes=attributes
781 )
782 return self
783
784 def wasEndedBy(self, trigger, ender=None, time=None, attributes=None):
785 """
786 Creates a new end record for this activity.
787
788 :param trigger: Entity triggering the end of this activity.
789 :param ender: Optionally extra activity to state a qualified end through
790 which the trigger entity for the end is generated (default: None).
791 :param time: Optional time for the end (default: None).
792 Either a :py:class:`datetime.datetime` object or a string that can be
793 parsed by :py:func:`dateutil.parser`.
794 :param attributes: Optional other attributes as a dictionary or list
795 of tuples to be added to the record optionally (default: None).
796 """
797 self._bundle.end(
798 self, trigger, ender, time, other_attributes=attributes
799 )
800 return self
801
802 def wasAssociatedWith(self, agent, plan=None, attributes=None):
803 """
804 Creates a new association record for this activity.
805
806 :param agent: Agent or string identifier of the agent involved in the
807 association (default: None).
808 :param plan: Optionally extra entity to state qualified association through
809 an internal plan (default: None).
810 :param attributes: Optional other attributes as a dictionary or list
811 of tuples to be added to the record optionally (default: None).
812 """
813 self._bundle.association(
814 self, agent, plan, other_attributes=attributes
815 )
816 return self
817
818
819 class ProvGeneration(ProvRelation):
820 """Provenance Generation relationship."""
821
822 FORMAL_ATTRIBUTES = (PROV_ATTR_ENTITY, PROV_ATTR_ACTIVITY, PROV_ATTR_TIME)
823
824 _prov_type = PROV_GENERATION
825
826
827 class ProvUsage(ProvRelation):
828 """Provenance Usage relationship."""
829
830 FORMAL_ATTRIBUTES = (PROV_ATTR_ACTIVITY, PROV_ATTR_ENTITY, PROV_ATTR_TIME)
831
832 _prov_type = PROV_USAGE
833
834
835 class ProvCommunication(ProvRelation):
836 """Provenance Communication relationship."""
837
838 FORMAL_ATTRIBUTES = (PROV_ATTR_INFORMED, PROV_ATTR_INFORMANT)
839
840 _prov_type = PROV_COMMUNICATION
841
842
843 class ProvStart(ProvRelation):
844 """Provenance Start relationship."""
845
846 FORMAL_ATTRIBUTES = (PROV_ATTR_ACTIVITY, PROV_ATTR_TRIGGER,
847 PROV_ATTR_STARTER, PROV_ATTR_TIME)
848
849 _prov_type = PROV_START
850
851
852 class ProvEnd(ProvRelation):
853 """Provenance End relationship."""
854
855 FORMAL_ATTRIBUTES = (PROV_ATTR_ACTIVITY, PROV_ATTR_TRIGGER,
856 PROV_ATTR_ENDER, PROV_ATTR_TIME)
857
858 _prov_type = PROV_END
859
860
861 class ProvInvalidation(ProvRelation):
862 """Provenance Invalidation relationship."""
863
864 FORMAL_ATTRIBUTES = (PROV_ATTR_ENTITY, PROV_ATTR_ACTIVITY, PROV_ATTR_TIME)
865
866 _prov_type = PROV_INVALIDATION
867
868
869 # Component 2: Derivations
870 class ProvDerivation(ProvRelation):
871 """Provenance Derivation relationship."""
872
873 FORMAL_ATTRIBUTES = (PROV_ATTR_GENERATED_ENTITY, PROV_ATTR_USED_ENTITY,
874 PROV_ATTR_ACTIVITY, PROV_ATTR_GENERATION,
875 PROV_ATTR_USAGE)
876
877 _prov_type = PROV_DERIVATION
878
879
880 # Component 3: Agents, Responsibility, and Influence
881 class ProvAgent(ProvElement):
882 """Provenance Agent element."""
883
884 _prov_type = PROV_AGENT
885
886 # Convenient assertions that take the current ProvAgent as the first
887 # (formal) argument
888 def actedOnBehalfOf(self, responsible, activity=None, attributes=None):
889 """
890 Creates a new delegation record on behalf of this agent.
891
892 :param responsible: Agent the responsibility is delegated to.
893 :param activity: Optionally extra activity to state qualified delegation
894 internally (default: None).
895 :param attributes: Optional other attributes as a dictionary or list
896 of tuples to be added to the record optionally (default: None).
897 """
898 self._bundle.delegation(
899 self, responsible, activity, other_attributes=attributes
900 )
901 return self
902
903
904 class ProvAttribution(ProvRelation):
905 """Provenance Attribution relationship."""
906
907 FORMAL_ATTRIBUTES = (PROV_ATTR_ENTITY, PROV_ATTR_AGENT)
908
909 _prov_type = PROV_ATTRIBUTION
910
911
912 class ProvAssociation(ProvRelation):
913 """Provenance Association relationship."""
914
915 FORMAL_ATTRIBUTES = (PROV_ATTR_ACTIVITY, PROV_ATTR_AGENT, PROV_ATTR_PLAN)
916
917 _prov_type = PROV_ASSOCIATION
918
919
920 class ProvDelegation(ProvRelation):
921 """Provenance Delegation relationship."""
922
923 FORMAL_ATTRIBUTES = (PROV_ATTR_DELEGATE, PROV_ATTR_RESPONSIBLE,
924 PROV_ATTR_ACTIVITY)
925
926 _prov_type = PROV_DELEGATION
927
928
929 class ProvInfluence(ProvRelation):
930 """Provenance Influence relationship."""
931
932 FORMAL_ATTRIBUTES = (PROV_ATTR_INFLUENCEE, PROV_ATTR_INFLUENCER)
933
934 _prov_type = PROV_INFLUENCE
935
936
937 # Component 5: Alternate Entities
938 class ProvSpecialization(ProvRelation):
939 """Provenance Specialization relationship."""
940
941 FORMAL_ATTRIBUTES = (PROV_ATTR_SPECIFIC_ENTITY, PROV_ATTR_GENERAL_ENTITY)
942
943 _prov_type = PROV_SPECIALIZATION
944
945
946 class ProvAlternate(ProvRelation):
947 """Provenance Alternate relationship."""
948
949 FORMAL_ATTRIBUTES = (PROV_ATTR_ALTERNATE1, PROV_ATTR_ALTERNATE2)
950
951 _prov_type = PROV_ALTERNATE
952
953
954 class ProvMention(ProvSpecialization):
955 """Provenance Mention relationship (specific Specialization)."""
956
957 FORMAL_ATTRIBUTES = (PROV_ATTR_SPECIFIC_ENTITY, PROV_ATTR_GENERAL_ENTITY,
958 PROV_ATTR_BUNDLE)
959
960 _prov_type = PROV_MENTION
961
962
963 # Component 6: Collections
964 class ProvMembership(ProvRelation):
965 """Provenance Membership relationship."""
966
967 FORMAL_ATTRIBUTES = (PROV_ATTR_COLLECTION, PROV_ATTR_ENTITY)
968
969 _prov_type = PROV_MEMBERSHIP
970
971
972 # Class mappings from PROV record type
973 PROV_REC_CLS = {
974 PROV_ENTITY: ProvEntity,
975 PROV_ACTIVITY: ProvActivity,
976 PROV_GENERATION: ProvGeneration,
977 PROV_USAGE: ProvUsage,
978 PROV_COMMUNICATION: ProvCommunication,
979 PROV_START: ProvStart,
980 PROV_END: ProvEnd,
981 PROV_INVALIDATION: ProvInvalidation,
982 PROV_DERIVATION: ProvDerivation,
983 PROV_AGENT: ProvAgent,
984 PROV_ATTRIBUTION: ProvAttribution,
985 PROV_ASSOCIATION: ProvAssociation,
986 PROV_DELEGATION: ProvDelegation,
987 PROV_INFLUENCE: ProvInfluence,
988 PROV_SPECIALIZATION: ProvSpecialization,
989 PROV_ALTERNATE: ProvAlternate,
990 PROV_MENTION: ProvMention,
991 PROV_MEMBERSHIP: ProvMembership,
992 }
993
994
995 DEFAULT_NAMESPACES = {'prov': PROV, 'xsd': XSD, 'xsi': XSI}
996
997
998 # Bundle
999 class NamespaceManager(dict):
1000 """Manages namespaces for PROV documents and bundles."""
1001
1002 parent = None
1003 """Parent :py:class:`NamespaceManager` this manager one is a child of."""
1004
1005 def __init__(self, namespaces=None, default=None, parent=None):
1006 """
1007 Constructor.
1008
1009 :param namespaces: Optional namespaces to add to the manager
1010 (default: None).
1011 :param default: Optional default namespace to use (default: None).
1012 :param parent: Optional parent :py:class:`NamespaceManager` to make this
1013 namespace manager a child of (default: None).
1014 """
1015 dict.__init__(self)
1016 self._default_namespaces = DEFAULT_NAMESPACES
1017 self.update(self._default_namespaces)
1018 self._namespaces = {}
1019
1020 if default is not None:
1021 self.set_default_namespace(default)
1022 else:
1023 self._default = None
1024 self.parent = parent
1025 # TODO check if default is in the default namespaces
1026 self._anon_id_count = 0
1027 self._uri_map = dict()
1028 self._rename_map = dict()
1029 self._prefix_renamed_map = dict()
1030 self.add_namespaces(namespaces)
1031
1032 def get_namespace(self, uri):
1033 """
1034 Returns the namespace prefix for the given URI.
1035
1036 :param uri: Namespace URI.
1037 :return: :py:class:`~prov.identifier.Namespace`.
1038 """
1039 for namespace in self.values():
1040 if uri == namespace._uri:
1041 return namespace
1042 return None
1043
1044 def get_registered_namespaces(self):
1045 """
1046 Returns all registered namespaces.
1047
1048 :return: Iterable of :py:class:`~prov.identifier.Namespace`.
1049 """
1050 return self._namespaces.values()
1051
1052 def set_default_namespace(self, uri):
1053 """
1054 Sets the default namespace to the one of a given URI.
1055
1056 :param uri: Namespace URI.
1057 """
1058 self._default = Namespace('', uri)
1059 self[''] = self._default
1060
1061 def get_default_namespace(self):
1062 """
1063 Returns the default namespace.
1064
1065 :return: :py:class:`~prov.identifier.Namespace`
1066 """
1067 return self._default
1068
1069 def add_namespace(self, namespace):
1070 """
1071 Adds a namespace (if not available, yet).
1072
1073 :param namespace: :py:class:`~prov.identifier.Namespace` to add.
1074 """
1075 if namespace in self.values():
1076 # no need to do anything
1077 return namespace
1078 if namespace in self._rename_map:
1079 # already renamed and added
1080 return self._rename_map[namespace]
1081
1082 # Checking if the URI has been defined and use the existing namespace
1083 # instead
1084 uri = namespace.uri
1085 prefix = namespace.prefix
1086
1087 if uri in self._uri_map:
1088 existing_ns = self._uri_map[uri]
1089 self._rename_map[namespace] = existing_ns
1090 self._prefix_renamed_map[prefix] = existing_ns
1091 return existing_ns
1092
1093 if prefix in self:
1094 # Conflicting prefix
1095 new_prefix = self._get_unused_prefix(prefix)
1096 new_namespace = Namespace(new_prefix, namespace.uri)
1097 self._rename_map[namespace] = new_namespace
1098 # TODO: What if the prefix is already in the map and point to a
1099 # different Namespace? Raise an exception?
1100 self._prefix_renamed_map[prefix] = new_namespace
1101 prefix = new_prefix
1102 namespace = new_namespace
1103
1104 # Only now add the namespace to the registry
1105 self._namespaces[prefix] = namespace
1106 self[prefix] = namespace
1107 self._uri_map[uri] = namespace
1108
1109 return namespace
1110
1111 def add_namespaces(self, namespaces):
1112 """
1113 Add multiple namespaces into this manager.
1114
1115 :param namespaces: A collection of namespace(s) to add.
1116 :type namespaces: List of :py:class:`~prov.identifier.Namespace` or
1117 dict of {prefix: uri}.
1118 :returns: None
1119 """
1120 if isinstance(namespaces, dict):
1121 # expecting a dictionary of {prefix: uri},
1122 # convert it to a list of Namespace
1123 namespaces = [
1124 Namespace(prefix, uri) for prefix, uri in namespaces.items()
1125 ]
1126 if namespaces:
1127 for ns in namespaces:
1128 self.add_namespace(ns)
1129
1130 def valid_qualified_name(self, qname):
1131 """
1132 Resolves an identifier to a valid qualified name.
1133
1134 :param qname: Qualified name as :py:class:`~prov.identifier.QualifiedName`
1135 or a tuple (namespace, identifier).
1136 :return: :py:class:`~prov.identifier.QualifiedName` or None in case of
1137 failure.
1138 """
1139 if not qname:
1140 return None
1141
1142 if isinstance(qname, QualifiedName):
1143 # Register the namespace if it has not been registered before
1144 namespace = qname.namespace
1145 prefix = namespace.prefix
1146 local_part = qname.localpart
1147 if not prefix:
1148 # the namespace is a default namespace
1149 if self._default == namespace:
1150 # the same default namespace is defined
1151 new_qname = self._default[local_part]
1152 elif self._default is None:
1153 # no default namespace is defined, reused the one given
1154 self._default = namespace
1155 return qname # no change, return the original
1156 else:
1157 # different default namespace,
1158 # use the 'dn' prefix for the new namespace
1159 dn_namespace = Namespace('dn', namespace.uri)
1160 dn_namespace = self.add_namespace(dn_namespace)
1161 new_qname = dn_namespace[local_part]
1162 elif prefix in self and self[prefix] == namespace:
1163 # No need to add the namespace
1164 existing_ns = self[prefix]
1165 if existing_ns is namespace:
1166 return qname
1167 else:
1168 # reuse the existing namespace
1169 new_qname = existing_ns[local_part]
1170 else:
1171 # Do not reuse the namespace object
1172 ns = self.add_namespace(deepcopy(namespace))
1173 # minting the same Qualified Name from the namespace's copy
1174 new_qname = ns[qname.localpart]
1175 # returning the new qname
1176 return new_qname
1177
1178 # Trying to guess from here
1179 if not isinstance(qname, (six.string_types, Identifier)):
1180 # Only proceed for string or URI values
1181 return None
1182 # Try to generate a Qualified Name
1183 str_value = \
1184 qname.uri if isinstance(qname, Identifier) else six.text_type(qname)
1185 if str_value.startswith('_:'):
1186 # this is a blank node ID
1187 return None
1188 elif ':' in str_value:
1189 # check if the identifier contains a registered prefix
1190 prefix, local_part = str_value.split(':', 1)
1191 if prefix in self:
1192 # return a new QualifiedName
1193 return self[prefix][local_part]
1194 if prefix in self._prefix_renamed_map:
1195 # return a new QualifiedName
1196 return self._prefix_renamed_map[prefix][local_part]
1197 else:
1198 # treat as a URI (with the first part as its scheme)
1199 # check if the URI can be compacted
1200 for namespace in self.values():
1201 if str_value.startswith(namespace.uri):
1202 # create a QName with the namespace
1203 return namespace[str_value.replace(namespace.uri, '')]
1204 elif self._default:
1205 # create and return an identifier in the default namespace
1206 return self._default[qname]
1207
1208 if self.parent:
1209 # all attempts have failed so far
1210 # now delegate this to the parent NamespaceManager
1211 return self.parent.valid_qualified_name(qname)
1212
1213 # Default to FAIL
1214 return None
1215
1216 def get_anonymous_identifier(self, local_prefix='id'):
1217 """
1218 Returns an anonymous identifier (without a namespace prefix).
1219
1220 :param local_prefix: Optional local namespace prefix as a string
1221 (default: 'id').
1222 :return: :py:class:`~prov.identifier.Identifier`
1223 """
1224 self._anon_id_count += 1
1225 return Identifier('_:%s%d' % (local_prefix, self._anon_id_count))
1226
1227 def _get_unused_prefix(self, original_prefix):
1228 if original_prefix not in self:
1229 return original_prefix
1230 count = 1
1231 while True:
1232 new_prefix = '_'.join((original_prefix, six.text_type(count)))
1233 if new_prefix in self:
1234 count += 1
1235 else:
1236 return new_prefix
1237
1238
1239 class ProvBundle(object):
1240 """PROV Bundle"""
1241
1242 def __init__(self, records=None, identifier=None, namespaces=None,
1243 document=None):
1244 """
1245 Constructor.
1246
1247 :param records: Optional iterable of records to add to the bundle
1248 (default: None).
1249 :param identifier: Optional identifier of the bundle (default: None).
1250 :param namespaces: Optional iterable of :py:class:`~prov.identifier.Namespace`s
1251 to set the document up with (default: None).
1252 :param document: Optional document to add to the bundle (default: None).
1253 """
1254 # Initializing bundle-specific attributes
1255 self._identifier = identifier
1256 self._records = list()
1257 self._id_map = defaultdict(list)
1258 self._document = document
1259 self._namespaces = NamespaceManager(
1260 namespaces,
1261 parent=(document._namespaces if document is not None else None)
1262 )
1263 if records:
1264 for record in records:
1265 self.add_record(record)
1266
1267 def __repr__(self):
1268 return '<%s: %s>' % (self.__class__.__name__, self._identifier)
1269
1270 @property
1271 def namespaces(self):
1272 """
1273 Returns the set of registered namespaces.
1274
1275 :return: Set of :py:class:`~prov.identifier.Namespace`.
1276 """
1277 return set(self._namespaces.get_registered_namespaces())
1278
1279 @property
1280 def default_ns_uri(self):
1281 """
1282 Returns the default namespace's URI, if any.
1283
1284 :return: URI as string.
1285 """
1286 default_ns = self._namespaces.get_default_namespace()
1287 return default_ns.uri if default_ns else None
1288
1289 @property
1290 def document(self):
1291 """
1292 Returns the parent document, if any.
1293
1294 :return: :py:class:`ProvDocument`.
1295 """
1296 return self._document
1297
1298 @property
1299 def identifier(self):
1300 """
1301 Returns the bundle's identifier
1302 """
1303 return self._identifier
1304
1305 @property
1306 def records(self):
1307 """
1308 Returns the list of all records in the current bundle
1309 """
1310 return list(self._records)
1311
1312 # Bundle configurations
1313 def set_default_namespace(self, uri):
1314 """
1315 Sets the default namespace through a given URI.
1316
1317 :param uri: Namespace URI.
1318 """
1319 self._namespaces.set_default_namespace(uri)
1320
1321 def get_default_namespace(self):
1322 """
1323 Returns the default namespace.
1324
1325 :return: :py:class:`~prov.identifier.Namespace`
1326 """
1327 return self._namespaces.get_default_namespace()
1328
1329 def add_namespace(self, namespace_or_prefix, uri=None):
1330 """
1331 Adds a namespace (if not available, yet).
1332
1333 :param namespace_or_prefix: :py:class:`~prov.identifier.Namespace` or its
1334 prefix as a string to add.
1335 :param uri: Namespace URI (default: None). Must be present if only a
1336 prefix is given in the previous parameter.
1337 """
1338 if uri is None:
1339 return self._namespaces.add_namespace(namespace_or_prefix)
1340 else:
1341 return self._namespaces.add_namespace(
1342 Namespace(namespace_or_prefix, uri)
1343 )
1344
1345 def get_registered_namespaces(self):
1346 """
1347 Returns all registered namespaces.
1348
1349 :return: Iterable of :py:class:`~prov.identifier.Namespace`.
1350 """
1351 return self._namespaces.get_registered_namespaces()
1352
1353 def valid_qualified_name(self, identifier):
1354 return self._namespaces.valid_qualified_name(identifier)
1355
1356 def get_records(self, class_or_type_or_tuple=None):
1357 """
1358 Returns all records. Returned records may be filtered by the optional
1359 argument.
1360
1361 :param class_or_type_or_tuple: A filter on the type for which records are
1362 to be returned (default: None). The filter checks by the type of the
1363 record using the `isinstance` check on the record.
1364 :return: List of :py:class:`ProvRecord` objects.
1365 """
1366 results = list(self._records)
1367 if class_or_type_or_tuple:
1368 return filter(
1369 lambda rec: isinstance(rec, class_or_type_or_tuple), results
1370 )
1371 else:
1372 return results
1373
1374 def get_record(self, identifier):
1375 """
1376 Returns a specific record matching a given identifier.
1377
1378 :param identifier: Record identifier.
1379 :return: :py:class:`ProvRecord`
1380 """
1381 # TODO: This will not work with the new _id_map, which is now a map of
1382 # (QName, list(ProvRecord))
1383 if identifier is None:
1384 return None
1385 valid_id = self.valid_qualified_name(identifier)
1386 try:
1387 return self._id_map[valid_id]
1388 except KeyError:
1389 # looking up the parent bundle
1390 if self.is_bundle():
1391 return self.document.get_record(valid_id)
1392 else:
1393 return None
1394
1395 # Miscellaneous functions
1396 def is_document(self):
1397 """
1398 `True` if the object is a document, `False` otherwise.
1399
1400 :return: bool
1401 """
1402 return False
1403
1404 def is_bundle(self):
1405 """
1406 `True` if the object is a bundle, `False` otherwise.
1407
1408 :return: bool
1409 """
1410 return True
1411
1412 def has_bundles(self):
1413 """
1414 `True` if the object has at least one bundle, `False` otherwise.
1415
1416 :return: bool
1417 """
1418 return False
1419
1420 @property
1421 def bundles(self):
1422 """
1423 Returns bundles contained in the document
1424
1425 :return: Iterable of :py:class:`ProvBundle`.
1426 """
1427 return frozenset()
1428
1429 def get_provn(self, _indent_level=0):
1430 """
1431 Returns the PROV-N representation of the bundle.
1432
1433 :return: String
1434 """
1435 indentation = '' + (' ' * _indent_level)
1436 newline = '\n' + (' ' * (_indent_level + 1))
1437
1438 # if this is the document, start the document;
1439 # otherwise, start the bundle
1440 lines = ['document'] if self.is_document() \
1441 else ['bundle %s' % self._identifier]
1442
1443 default_namespace = self._namespaces.get_default_namespace()
1444 if default_namespace:
1445 lines.append('default <%s>' % default_namespace.uri)
1446
1447 registered_namespaces = self._namespaces.get_registered_namespaces()
1448 if registered_namespaces:
1449 lines.extend(
1450 ['prefix %s <%s>' % (namespace.prefix, namespace.uri)
1451 for namespace in registered_namespaces]
1452 )
1453
1454 if default_namespace or registered_namespaces:
1455 # a blank line between the prefixes and the assertions
1456 lines.append('')
1457
1458 # adding all the records
1459 lines.extend([record.get_provn() for record in self._records])
1460 if self.is_document():
1461 # Print out bundles
1462 lines.extend(
1463 bundle.get_provn(_indent_level + 1) for bundle in self.bundles
1464 )
1465 provn_str = newline.join(lines) + '\n'
1466
1467 # closing the structure
1468 provn_str += indentation + (
1469 'endDocument' if self.is_document() else 'endBundle'
1470 )
1471 return provn_str
1472
1473 def __eq__(self, other):
1474 if not isinstance(other, ProvBundle):
1475 return False
1476 other_records = set(other.get_records())
1477 this_records = set(self.get_records())
1478 if len(this_records) != len(other_records):
1479 return False
1480 # check if all records for equality
1481 for record_a in this_records:
1482 # Manually look for the record
1483 found = False
1484 for record_b in other_records:
1485 if record_a == record_b:
1486 other_records.remove(record_b)
1487 found = True
1488 break
1489 if not found:
1490 logger.debug(
1491 'Equality (ProvBundle): Could not find this record: %s',
1492 six.text_type(record_a)
1493 )
1494 return False
1495 return True
1496
1497 def __ne__(self, other):
1498 return not (self == other)
1499
1500 __hash__ = None
1501
1502 # Transformations
1503 def _unified_records(self):
1504 """Returns a list of unified records."""
1505 # TODO: Check unification rules in the PROV-CONSTRAINTS document
1506 # This method simply merges the records having the same name
1507 merged_records = dict()
1508 for identifier, records in self._id_map.items():
1509 if len(records) > 1:
1510 # more than one record having the same identifier
1511 # merge the records
1512 merged = records[0].copy()
1513 for record in records[1:]:
1514 merged.add_attributes(record.attributes)
1515 # map all of them to the merged record
1516 for record in records:
1517 merged_records[record] = merged
1518 if not merged_records:
1519 # No merging done, just return the list of original records
1520 return list(self._records)
1521
1522 added_merged_records = set()
1523 unified_records = list()
1524 for record in self._records:
1525 if record in merged_records:
1526 merged = merged_records[record]
1527 if merged not in added_merged_records:
1528 unified_records.append(merged)
1529 added_merged_records.add(merged)
1530 else:
1531 # add the original record
1532 unified_records.append(record)
1533 return unified_records
1534
1535 def unified(self):
1536 """
1537 Unifies all records in the bundle that haves same identifiers
1538
1539 :returns: :py:class:`ProvBundle` -- the new unified bundle.
1540 """
1541 unified_records = self._unified_records()
1542 bundle = ProvBundle(
1543 records=unified_records, identifier=self.identifier
1544 )
1545 return bundle
1546
1547 def update(self, other):
1548 """
1549 Append all the records of the *other* ProvBundle into this bundle.
1550
1551 :param other: the other bundle whose records to be appended.
1552 :type other: :py:class:`ProvBundle`
1553 :returns: None.
1554 """
1555 if isinstance(other, ProvBundle):
1556 if other.is_document() and other.has_bundles():
1557 # Cannot add bundles to a bundle
1558 raise ProvException(
1559 'ProvBundle.update(): The other bundle is a document with '
1560 'sub-bundle(s).'
1561 )
1562 for record in other.get_records():
1563 self.add_record(record)
1564 else:
1565 raise ProvException(
1566 'ProvBundle.update(): The other bundle is not a ProvBundle '
1567 'instance (%s)' % type(other)
1568 )
1569
1570 # Provenance statements
1571 def _add_record(self, record):
1572 # IMPORTANT: All records need to be added to a bundle/document via this
1573 # method. Otherwise, the _id_map dict will not be correctly updated
1574 identifier = record.identifier
1575 if identifier is not None:
1576 self._id_map[identifier].append(record)
1577 self._records.append(record)
1578
1579 def new_record(self, record_type, identifier, attributes=None,
1580 other_attributes=None):
1581 """
1582 Creates a new record.
1583
1584 :param record_type: Type of record (one of :py:const:`PROV_REC_CLS`).
1585 :param identifier: Identifier for new record.
1586 :param attributes: Attributes as a dictionary or list of tuples to be added
1587 to the record optionally (default: None).
1588 :param other_attributes: Optional other attributes as a dictionary or list
1589 of tuples to be added to the record optionally (default: None).
1590 """
1591 attr_list = []
1592 if attributes:
1593 if isinstance(attributes, dict):
1594 attr_list.extend(
1595 (attr, value) for attr, value in attributes.items()
1596 )
1597 else:
1598 # expecting a list of attributes here
1599 attr_list.extend(attributes)
1600 if other_attributes:
1601 attr_list.extend(
1602 other_attributes.items() if isinstance(other_attributes, dict)
1603 else other_attributes
1604 )
1605 new_record = PROV_REC_CLS[record_type](
1606 self, self.valid_qualified_name(identifier), attr_list
1607 )
1608 self._add_record(new_record)
1609 return new_record
1610
1611 def add_record(self, record):
1612 """
1613 Adds a new record that to the bundle.
1614
1615 :param record: :py:class:`ProvRecord` to be added.
1616 """
1617 return self.new_record(
1618 record.get_type(), record.identifier, record.formal_attributes,
1619 record.extra_attributes
1620 )
1621
1622 def entity(self, identifier, other_attributes=None):
1623 """
1624 Creates a new entity.
1625
1626 :param identifier: Identifier for new entity.
1627 :param other_attributes: Optional other attributes as a dictionary or list
1628 of tuples to be added to the record optionally (default: None).
1629 """
1630 return self.new_record(PROV_ENTITY, identifier, None, other_attributes)
1631
1632 def activity(self, identifier, startTime=None, endTime=None,
1633 other_attributes=None):
1634 """
1635 Creates a new activity.
1636
1637 :param identifier: Identifier for new activity.
1638 :param startTime: Optional start time for the activity (default: None).
1639 Either a :py:class:`datetime.datetime` object or a string that can be
1640 parsed by :py:func:`dateutil.parser`.
1641 :param endTime: Optional start time for the activity (default: None).
1642 Either a :py:class:`datetime.datetime` object or a string that can be
1643 parsed by :py:func:`dateutil.parser`.
1644 :param other_attributes: Optional other attributes as a dictionary or list
1645 of tuples to be added to the record optionally (default: None).
1646 """
1647 return self.new_record(
1648 PROV_ACTIVITY, identifier, {
1649 PROV_ATTR_STARTTIME: _ensure_datetime(startTime),
1650 PROV_ATTR_ENDTIME: _ensure_datetime(endTime)
1651 },
1652 other_attributes
1653 )
1654
1655 def generation(self, entity, activity=None, time=None, identifier=None,
1656 other_attributes=None):
1657 """
1658 Creates a new generation record for an entity.
1659
1660 :param entity: Entity or a string identifier for the entity.
1661 :param activity: Activity or string identifier of the activity involved in
1662 the generation (default: None).
1663 :param time: Optional time for the generation (default: None).
1664 Either a :py:class:`datetime.datetime` object or a string that can be
1665 parsed by :py:func:`dateutil.parser`.
1666 :param identifier: Identifier for new generation record.
1667 :param other_attributes: Optional other attributes as a dictionary or list
1668 of tuples to be added to the record optionally (default: None).
1669 """
1670 return self.new_record(
1671 PROV_GENERATION, identifier, {
1672 PROV_ATTR_ENTITY: entity,
1673 PROV_ATTR_ACTIVITY: activity,
1674 PROV_ATTR_TIME: _ensure_datetime(time)
1675 },
1676 other_attributes
1677 )
1678
1679 def usage(self, activity, entity=None, time=None, identifier=None,
1680 other_attributes=None):
1681 """
1682 Creates a new usage record for an activity.
1683
1684 :param activity: Activity or a string identifier for the entity.
1685 :param entity: Entity or string identifier of the entity involved in
1686 the usage relationship (default: None).
1687 :param time: Optional time for the usage (default: None).
1688 Either a :py:class:`datetime.datetime` object or a string that can be
1689 parsed by :py:func:`dateutil.parser`.
1690 :param identifier: Identifier for new usage record.
1691 :param other_attributes: Optional other attributes as a dictionary or list
1692 of tuples to be added to the record optionally (default: None).
1693 """
1694 return self.new_record(
1695 PROV_USAGE, identifier, {
1696 PROV_ATTR_ACTIVITY: activity,
1697 PROV_ATTR_ENTITY: entity,
1698 PROV_ATTR_TIME: _ensure_datetime(time)},
1699 other_attributes
1700 )
1701
1702 def start(self, activity, trigger=None, starter=None, time=None,
1703 identifier=None, other_attributes=None):
1704 """
1705 Creates a new start record for an activity.
1706
1707 :param activity: Activity or a string identifier for the entity.
1708 :param trigger: Entity triggering the start of this activity.
1709 :param starter: Optionally extra activity to state a qualified start
1710 through which the trigger entity for the start is generated
1711 (default: None).
1712 :param time: Optional time for the start (default: None).
1713 Either a :py:class:`datetime.datetime` object or a string that can be
1714 parsed by :py:func:`dateutil.parser`.
1715 :param identifier: Identifier for new start record.
1716 :param other_attributes: Optional other attributes as a dictionary or list
1717 of tuples to be added to the record optionally (default: None).
1718 """
1719 return self.new_record(
1720 PROV_START, identifier, {
1721 PROV_ATTR_ACTIVITY: activity,
1722 PROV_ATTR_TRIGGER: trigger,
1723 PROV_ATTR_STARTER: starter,
1724 PROV_ATTR_TIME: _ensure_datetime(time)
1725 },
1726 other_attributes
1727 )
1728
1729 def end(self, activity, trigger=None, ender=None, time=None,
1730 identifier=None, other_attributes=None):
1731 """
1732 Creates a new end record for an activity.
1733
1734 :param activity: Activity or a string identifier for the entity.
1735 :param trigger: trigger: Entity triggering the end of this activity.
1736 :param ender: Optionally extra activity to state a qualified end
1737 through which the trigger entity for the end is generated
1738 (default: None).
1739 :param time: Optional time for the end (default: None).
1740 Either a :py:class:`datetime.datetime` object or a string that can be
1741 parsed by :py:func:`dateutil.parser`.
1742 :param identifier: Identifier for new end record.
1743 :param other_attributes: Optional other attributes as a dictionary or list
1744 of tuples to be added to the record optionally (default: None).
1745 """
1746 return self.new_record(
1747 PROV_END, identifier, {
1748 PROV_ATTR_ACTIVITY: activity,
1749 PROV_ATTR_TRIGGER: trigger,
1750 PROV_ATTR_ENDER: ender,
1751 PROV_ATTR_TIME: _ensure_datetime(time)
1752 },
1753 other_attributes
1754 )
1755
1756 def invalidation(self, entity, activity=None, time=None, identifier=None,
1757 other_attributes=None):
1758 """
1759 Creates a new invalidation record for an entity.
1760
1761 :param entity: Entity or a string identifier for the entity.
1762 :param activity: Activity or string identifier of the activity involved in
1763 the invalidation (default: None).
1764 :param time: Optional time for the invalidation (default: None).
1765 Either a :py:class:`datetime.datetime` object or a string that can be
1766 parsed by :py:func:`dateutil.parser`.
1767 :param identifier: Identifier for new invalidation record.
1768 :param other_attributes: Optional other attributes as a dictionary or list
1769 of tuples to be added to the record optionally (default: None).
1770 """
1771 return self.new_record(
1772 PROV_INVALIDATION, identifier, {
1773 PROV_ATTR_ENTITY: entity,
1774 PROV_ATTR_ACTIVITY: activity,
1775 PROV_ATTR_TIME: _ensure_datetime(time)
1776 },
1777 other_attributes
1778 )
1779
1780 def communication(self, informed, informant, identifier=None,
1781 other_attributes=None):
1782 """
1783 Creates a new communication record for an entity.
1784
1785 :param informed: The informed activity (relationship destination).
1786 :param informant: The informing activity (relationship source).
1787 :param identifier: Identifier for new communication record.
1788 :param other_attributes: Optional other attributes as a dictionary or list
1789 of tuples to be added to the record optionally (default: None).
1790 """
1791 return self.new_record(
1792 PROV_COMMUNICATION, identifier, {
1793 PROV_ATTR_INFORMED: informed,
1794 PROV_ATTR_INFORMANT: informant
1795 },
1796 other_attributes
1797 )
1798
1799 def agent(self, identifier, other_attributes=None):
1800 """
1801 Creates a new agent.
1802
1803 :param identifier: Identifier for new agent.
1804 :param other_attributes: Optional other attributes as a dictionary or list
1805 of tuples to be added to the record optionally (default: None).
1806 """
1807 return self.new_record(PROV_AGENT, identifier, None, other_attributes)
1808
1809 def attribution(self, entity, agent, identifier=None,
1810 other_attributes=None):
1811 """
1812 Creates a new attribution record between an entity and an agent.
1813
1814 :param entity: Entity or a string identifier for the entity (relationship
1815 source).
1816 :param agent: Agent or string identifier of the agent involved in the
1817 attribution (relationship destination).
1818 :param identifier: Identifier for new attribution record.
1819 :param other_attributes: Optional other attributes as a dictionary or list
1820 of tuples to be added to the record optionally (default: None).
1821 """
1822 return self.new_record(
1823 PROV_ATTRIBUTION, identifier, {
1824 PROV_ATTR_ENTITY: entity,
1825 PROV_ATTR_AGENT: agent
1826 },
1827 other_attributes
1828 )
1829
1830 def association(self, activity, agent=None, plan=None, identifier=None,
1831 other_attributes=None):
1832 """
1833 Creates a new association record for an activity.
1834
1835 :param activity: Activity or a string identifier for the activity.
1836 :param agent: Agent or string identifier of the agent involved in the
1837 association (default: None).
1838 :param plan: Optionally extra entity to state qualified association through
1839 an internal plan (default: None).
1840 :param identifier: Identifier for new association record.
1841 :param other_attributes: Optional other attributes as a dictionary or list
1842 of tuples to be added to the record optionally (default: None).
1843 """
1844 return self.new_record(
1845 PROV_ASSOCIATION, identifier, {
1846 PROV_ATTR_ACTIVITY: activity,
1847 PROV_ATTR_AGENT: agent,
1848 PROV_ATTR_PLAN: plan
1849 },
1850 other_attributes
1851 )
1852
1853 def delegation(self, delegate, responsible, activity=None, identifier=None,
1854 other_attributes=None):
1855 """
1856 Creates a new delegation record on behalf of an agent.
1857
1858 :param delegate: Agent delegating the responsibility (relationship source).
1859 :param responsible: Agent the responsibility is delegated to (relationship
1860 destination).
1861 :param activity: Optionally extra activity to state qualified delegation
1862 internally (default: None).
1863 :param identifier: Identifier for new association record.
1864 :param other_attributes: Optional other attributes as a dictionary or list
1865 of tuples to be added to the record optionally (default: None).
1866 """
1867 return self.new_record(
1868 PROV_DELEGATION, identifier, {
1869 PROV_ATTR_DELEGATE: delegate,
1870 PROV_ATTR_RESPONSIBLE: responsible,
1871 PROV_ATTR_ACTIVITY: activity
1872 },
1873 other_attributes
1874 )
1875
1876 def influence(self, influencee, influencer, identifier=None,
1877 other_attributes=None):
1878 """
1879 Creates a new influence record between two entities, activities or agents.
1880
1881 :param influencee: Influenced entity, activity or agent (relationship
1882 source).
1883 :param influencer: Influencing entity, activity or agent (relationship
1884 destination).
1885 :param identifier: Identifier for new influence record.
1886 :param other_attributes: Optional other attributes as a dictionary or list
1887 of tuples to be added to the record optionally (default: None).
1888 """
1889 return self.new_record(
1890 PROV_INFLUENCE, identifier, {
1891 PROV_ATTR_INFLUENCEE: influencee,
1892 PROV_ATTR_INFLUENCER: influencer
1893 },
1894 other_attributes
1895 )
1896
1897 def derivation(self, generatedEntity, usedEntity, activity=None,
1898 generation=None, usage=None,
1899 identifier=None, other_attributes=None):
1900 """
1901 Creates a new derivation record for a generated entity from a used entity.
1902
1903 :param generatedEntity: Entity or a string identifier for the generated
1904 entity (relationship source).
1905 :param usedEntity: Entity or a string identifier for the used entity
1906 (relationship destination).
1907 :param activity: Activity or string identifier of the activity involved in
1908 the derivation (default: None).
1909 :param generation: Optionally extra activity to state qualified generation
1910 through a generation (default: None).
1911 :param usage: XXX (default: None).
1912 :param identifier: Identifier for new derivation record.
1913 :param other_attributes: Optional other attributes as a dictionary or list
1914 of tuples to be added to the record optionally (default: None).
1915 """
1916 attributes = {PROV_ATTR_GENERATED_ENTITY: generatedEntity,
1917 PROV_ATTR_USED_ENTITY: usedEntity,
1918 PROV_ATTR_ACTIVITY: activity,
1919 PROV_ATTR_GENERATION: generation,
1920 PROV_ATTR_USAGE: usage}
1921 return self.new_record(
1922 PROV_DERIVATION, identifier, attributes, other_attributes
1923 )
1924
1925 def revision(self, generatedEntity, usedEntity, activity=None,
1926 generation=None, usage=None,
1927 identifier=None, other_attributes=None):
1928 """
1929 Creates a new revision record for a generated entity from a used entity.
1930
1931 :param generatedEntity: Entity or a string identifier for the generated
1932 entity (relationship source).
1933 :param usedEntity: Entity or a string identifier for the used entity
1934 (relationship destination).
1935 :param activity: Activity or string identifier of the activity involved in
1936 the revision (default: None).
1937 :param generation: Optionally to state qualified revision through a
1938 generation activity (default: None).
1939 :param usage: XXX (default: None).
1940 :param identifier: Identifier for new revision record.
1941 :param other_attributes: Optional other attributes as a dictionary or list
1942 of tuples to be added to the record optionally (default: None).
1943 """
1944 record = self.derivation(
1945 generatedEntity, usedEntity, activity, generation, usage,
1946 identifier, other_attributes
1947 )
1948 record.add_asserted_type(PROV['Revision'])
1949 return record
1950
1951 def quotation(self, generatedEntity, usedEntity, activity=None,
1952 generation=None, usage=None,
1953 identifier=None, other_attributes=None):
1954 """
1955 Creates a new quotation record for a generated entity from a used entity.
1956
1957 :param generatedEntity: Entity or a string identifier for the generated
1958 entity (relationship source).
1959 :param usedEntity: Entity or a string identifier for the used entity
1960 (relationship destination).
1961 :param activity: Activity or string identifier of the activity involved in
1962 the quotation (default: None).
1963 :param generation: Optionally to state qualified quotation through a
1964 generation activity (default: None).
1965 :param usage: XXX (default: None).
1966 :param identifier: Identifier for new quotation record.
1967 :param other_attributes: Optional other attributes as a dictionary or list
1968 of tuples to be added to the record optionally (default: None).
1969 """
1970 record = self.derivation(
1971 generatedEntity, usedEntity, activity, generation, usage,
1972 identifier, other_attributes
1973 )
1974 record.add_asserted_type(PROV['Quotation'])
1975 return record
1976
1977 def primary_source(self, generatedEntity, usedEntity, activity=None,
1978 generation=None, usage=None,
1979 identifier=None, other_attributes=None):
1980 """
1981 Creates a new primary source record for a generated entity from a used
1982 entity.
1983
1984 :param generatedEntity: Entity or a string identifier for the generated
1985 entity (relationship source).
1986 :param usedEntity: Entity or a string identifier for the used entity
1987 (relationship destination).
1988 :param activity: Activity or string identifier of the activity involved in
1989 the primary source (default: None).
1990 :param generation: Optionally to state qualified primary source through a
1991 generation activity (default: None).
1992 :param usage: XXX (default: None).
1993 :param identifier: Identifier for new primary source record.
1994 :param other_attributes: Optional other attributes as a dictionary or list
1995 of tuples to be added to the record optionally (default: None).
1996 """
1997 record = self.derivation(
1998 generatedEntity, usedEntity, activity, generation, usage,
1999 identifier, other_attributes
2000 )
2001 record.add_asserted_type(PROV['PrimarySource'])
2002 return record
2003
2004 def specialization(self, specificEntity, generalEntity):
2005 """
2006 Creates a new specialisation record for a specific from a general entity.
2007
2008 :param specificEntity: Entity or a string identifier for the specific
2009 entity (relationship source).
2010 :param generalEntity: Entity or a string identifier for the general entity
2011 (relationship destination).
2012 """
2013 return self.new_record(
2014 PROV_SPECIALIZATION, None, {
2015 PROV_ATTR_SPECIFIC_ENTITY: specificEntity,
2016 PROV_ATTR_GENERAL_ENTITY: generalEntity
2017 }
2018 )
2019
2020 def alternate(self, alternate1, alternate2):
2021 """
2022 Creates a new alternate record between two entities.
2023
2024 :param alternate1: Entity or a string identifier for the first entity
2025 (relationship source).
2026 :param alternate2: Entity or a string identifier for the second entity
2027 (relationship destination).
2028 """
2029 return self.new_record(
2030 PROV_ALTERNATE, None, {
2031 PROV_ATTR_ALTERNATE1: alternate1,
2032 PROV_ATTR_ALTERNATE2: alternate2
2033 },
2034 )
2035
2036 def mention(self, specificEntity, generalEntity, bundle):
2037 """
2038 Creates a new mention record for a specific from a general entity.
2039
2040 :param specificEntity: Entity or a string identifier for the specific
2041 entity (relationship source).
2042 :param generalEntity: Entity or a string identifier for the general entity
2043 (relationship destination).
2044 :param bundle: XXX
2045 """
2046 return self.new_record(
2047 PROV_MENTION, None, {
2048 PROV_ATTR_SPECIFIC_ENTITY: specificEntity,
2049 PROV_ATTR_GENERAL_ENTITY: generalEntity,
2050 PROV_ATTR_BUNDLE: bundle
2051 }
2052 )
2053
2054 def collection(self, identifier, other_attributes=None):
2055 """
2056 Creates a new collection record for a particular record.
2057
2058 :param identifier: Identifier for new collection record.
2059 :param other_attributes: Optional other attributes as a dictionary or list
2060 of tuples to be added to the record optionally (default: None).
2061 """
2062 record = self.new_record(
2063 PROV_ENTITY, identifier, None, other_attributes
2064 )
2065 record.add_asserted_type(PROV['Collection'])
2066 return record
2067
2068 def membership(self, collection, entity):
2069 """
2070 Creates a new membership record for an entity to a collection.
2071
2072 :param collection: Collection the entity is to be added to.
2073 :param entity: Entity to be added to the collection.
2074 """
2075 return self.new_record(
2076 PROV_MEMBERSHIP, None, {
2077 PROV_ATTR_COLLECTION: collection,
2078 PROV_ATTR_ENTITY: entity
2079 }
2080 )
2081
2082 def plot(self, filename=None, show_nary=True, use_labels=False,
2083 show_element_attributes=True, show_relation_attributes=True):
2084 """
2085 Convenience function to plot a PROV document.
2086
2087 :param filename: The filename to save to. If not given, it will open
2088 an interactive matplotlib plot. The filetype is determined from
2089 the filename ending.
2090 :type filename: String
2091 :param show_nary: Shows all elements in n-ary relations.
2092 :type show_nary: bool
2093 :param use_labels: Uses the `prov:label` property of an element as its
2094 name (instead of its identifier).
2095 :type use_labels: bool
2096 :param show_element_attributes: Shows attributes of elements.
2097 :type show_element_attributes: bool
2098 :param show_relation_attributes: Shows attributes of relations.
2099 :type show_relation_attributes: bool
2100 """
2101 # Lazy imports to have soft dependencies on pydot and matplotlib
2102 # (imported even later).
2103 from prov import dot
2104
2105 if filename:
2106 format = os.path.splitext(filename)[-1].lower().strip(
2107 os.path.extsep)
2108 else:
2109 format = "png"
2110 format = format.lower()
2111 d = dot.prov_to_dot(self, show_nary=show_nary, use_labels=use_labels,
2112 show_element_attributes=show_element_attributes,
2113 show_relation_attributes=show_relation_attributes)
2114 method = "create_%s" % format
2115 if not hasattr(d, method):
2116 raise ValueError("Format '%s' cannot be saved." % format)
2117 with io.BytesIO() as buf:
2118 buf.write(getattr(d, method)())
2119
2120 buf.seek(0, 0)
2121 if filename:
2122 with open(filename, "wb") as fh:
2123 fh.write(buf.read())
2124 else:
2125 # Use matplotlib to show the image as it likely is more
2126 # widespread then PIL and works nicely in the ipython notebook.
2127 import matplotlib.pylab as plt
2128 import matplotlib.image as mpimg
2129
2130 max_size = 30
2131
2132 img = mpimg.imread(buf)
2133 # pydot makes a border around the image. remove it.
2134 img = img[1:-1, 1:-1]
2135 size = (img.shape[1] / 100.0, img.shape[0] / 100.0)
2136 if max(size) > max_size:
2137 scale = max_size / max(size)
2138 else:
2139 scale = 1.0
2140 size = (scale * size[0], scale * size[1])
2141
2142 plt.figure(figsize=size)
2143 plt.subplots_adjust(bottom=0, top=1, left=0, right=1)
2144 plt.xticks([])
2145 plt.yticks([])
2146 plt.imshow(img)
2147 plt.axis("off")
2148 plt.show()
2149
2150 # Aliases
2151 wasGeneratedBy = generation
2152 used = usage
2153 wasStartedBy = start
2154 wasEndedBy = end
2155 wasInvalidatedBy = invalidation
2156 wasInformedBy = communication
2157 wasAttributedTo = attribution
2158 wasAssociatedWith = association
2159 actedOnBehalfOf = delegation
2160 wasInfluencedBy = influence
2161 wasDerivedFrom = derivation
2162 wasRevisionOf = revision
2163 wasQuotedFrom = quotation
2164 hadPrimarySource = primary_source
2165 alternateOf = alternate
2166 specializationOf = specialization
2167 mentionOf = mention
2168 hadMember = membership
2169
2170
2171 class ProvDocument(ProvBundle):
2172 """Provenance Document."""
2173
2174 def __init__(self, records=None, namespaces=None):
2175 """
2176 Constructor.
2177
2178 :param records: Optional records to add to the document (default: None).
2179 :param namespaces: Optional iterable of :py:class:`~prov.identifier.Namespace`s
2180 to set the document up with (default: None).
2181 """
2182 ProvBundle.__init__(
2183 self, records=records, identifier=None, namespaces=namespaces
2184 )
2185 self._bundles = dict()
2186
2187 def __repr__(self):
2188 return '<ProvDocument>'
2189
2190 def __eq__(self, other):
2191 if not isinstance(other, ProvDocument):
2192 return False
2193 # Comparing the documents' content
2194 if not super(ProvDocument, self).__eq__(other):
2195 return False
2196
2197 # Comparing the documents' bundles
2198 for b_id, bundle in self._bundles.items():
2199 if b_id not in other._bundles:
2200 return False
2201 other_bundle = other._bundles[b_id]
2202 if bundle != other_bundle:
2203 return False
2204
2205 # Everything is the same
2206 return True
2207
2208 def is_document(self):
2209 """
2210 `True` if the object is a document, `False` otherwise.
2211
2212 :return: bool
2213 """
2214 return True
2215
2216 def is_bundle(self):
2217 """
2218 `True` if the object is a bundle, `False` otherwise.
2219
2220 :return: bool
2221 """
2222 return False
2223
2224 def has_bundles(self):
2225 """
2226 `True` if the object has at least one bundle, `False` otherwise.
2227
2228 :return: bool
2229 """
2230 return len(self._bundles) > 0
2231
2232 @property
2233 def bundles(self):
2234 """
2235 Returns bundles contained in the document
2236
2237 :return: Iterable of :py:class:`ProvBundle`.
2238 """
2239 return self._bundles.values()
2240
2241 # Transformations
2242 def flattened(self):
2243 """
2244 Flattens the document by moving all the records in its bundles up
2245 to the document level.
2246
2247 :returns: :py:class:`ProvDocument` -- the (new) flattened document.
2248 """
2249 if self._bundles:
2250 # Creating a new document for all the records
2251 new_doc = ProvDocument()
2252 bundled_records = itertools.chain(
2253 *[b.get_records() for b in self._bundles.values()]
2254 )
2255 for record in itertools.chain(self._records, bundled_records):
2256 new_doc.add_record(record)
2257 return new_doc
2258 else:
2259 # returning the same document
2260 return self
2261
2262 def unified(self):
2263 """
2264 Returns a new document containing all records having same identifiers
2265 unified (including those inside bundles).
2266
2267 :return: :py:class:`ProvDocument`
2268 """
2269 document = ProvDocument(self._unified_records())
2270 document._namespaces = self._namespaces
2271 for bundle in self.bundles:
2272 unified_bundle = bundle.unified()
2273 document.add_bundle(unified_bundle)
2274 return document
2275
2276 def update(self, other):
2277 """
2278 Append all the records of the *other* document/bundle into this document.
2279 Bundles having same identifiers will be merged.
2280
2281 :param other: The other document/bundle whose records to be appended.
2282 :type other: :py:class:`ProvDocument` or :py:class:`ProvBundle`
2283 :returns: None.
2284 """
2285 if isinstance(other, ProvBundle):
2286 for record in other.get_records():
2287 self.add_record(record)
2288 if other.has_bundles():
2289 for bundle in other.bundles:
2290 if bundle.identifier in self._bundles:
2291 self._bundles[bundle.identifier].update(bundle)
2292 else:
2293 new_bundle = self.bundle(bundle.identifier)
2294 new_bundle.update(bundle)
2295 else:
2296 raise ProvException(
2297 'ProvDocument.update(): The other is not a ProvDocument or '
2298 'ProvBundle instance (%s)' % type(other)
2299 )
2300
2301 # Bundle operations
2302 def add_bundle(self, bundle, identifier=None):
2303 """
2304 Add a bundle to the current document.
2305
2306 :param bundle: The bundle to add to the document.
2307 :type bundle: :py:class:`ProvBundle`
2308 :param identifier: The (optional) identifier to use for the bundle
2309 (default: None). If none given, use the identifier from the bundle
2310 itself.
2311 """
2312 if not isinstance(bundle, ProvBundle):
2313 raise ProvException(
2314 'Only a ProvBundle instance can be added as a bundle in a '
2315 'ProvDocument.'
2316 )
2317
2318 if bundle.is_document():
2319 if bundle.has_bundles():
2320 raise ProvException(
2321 'Cannot add a document with nested bundles as a bundle.'
2322 )
2323 # Make it a new ProvBundle
2324 new_bundle = ProvBundle(namespaces=bundle.namespaces)
2325 new_bundle.update(bundle)
2326 bundle = new_bundle
2327
2328 if identifier is None:
2329 identifier = bundle.identifier
2330
2331 if not identifier:
2332 raise ProvException('The provided bundle has no identifier')
2333
2334 # Link the bundle namespace manager to the document's
2335 bundle._namespaces.parent = self._namespaces
2336
2337 valid_id = bundle.valid_qualified_name(identifier)
2338 # IMPORTANT: Rewriting the bundle identifier for consistency
2339 bundle._identifier = valid_id
2340
2341 if valid_id in self._bundles:
2342 raise ProvException('A bundle with that identifier already exists')
2343
2344 self._bundles[valid_id] = bundle
2345 bundle._document = self
2346
2347 def bundle(self, identifier):
2348 """
2349 Returns a new bundle from the current document.
2350
2351 :param identifier: The identifier to use for the bundle.
2352 :return: :py:class:`ProvBundle`
2353 """
2354 if identifier is None:
2355 raise ProvException(
2356 'An identifier is required. Cannot create an unnamed bundle.'
2357 )
2358 valid_id = self.valid_qualified_name(identifier)
2359 if valid_id is None:
2360 raise ProvException(
2361 'The provided identifier "%s" is not valid' % identifier
2362 )
2363 if valid_id in self._bundles:
2364 raise ProvException('A bundle with that identifier already exists')
2365 b = ProvBundle(identifier=valid_id, document=self)
2366 self._bundles[valid_id] = b
2367 return b
2368
2369 # Serializing and deserializing
2370 def serialize(self, destination=None, format='json', **args):
2371 """
2372 Serialize the :py:class:`ProvDocument` to the destination.
2373
2374 Available serializers can be queried by the value of
2375 `:py:attr:~prov.serializers.Registry.serializers` after loading them via
2376 `:py:func:~prov.serializers.Registry.load_serializers()`.
2377
2378 :param destination: Stream object to serialize the output to. Default is
2379 `None`, which serializes as a string.
2380 :param format: Serialization format (default: 'json'), defaulting to
2381 PROV-JSON.
2382 :return: Serialization in a string if no destination was given,
2383 None otherwise.
2384 """
2385 serializer = serializers.get(format)(self)
2386 if destination is None:
2387 stream = io.StringIO()
2388 serializer.serialize(stream, **args)
2389 return stream.getvalue()
2390 if hasattr(destination, "write"):
2391 stream = destination
2392 serializer.serialize(stream, **args)
2393 else:
2394 location = destination
2395 scheme, netloc, path, params, _query, fragment = urlparse(location)
2396 if netloc != "":
2397 print("WARNING: not saving as location " +
2398 "is not a local file reference")
2399 return
2400 fd, name = tempfile.mkstemp()
2401 stream = os.fdopen(fd, "wb")
2402 serializer.serialize(stream, **args)
2403 stream.close()
2404 if hasattr(shutil, "move"):
2405 shutil.move(name, path)
2406 else:
2407 shutil.copy(name, path)
2408 os.remove(name)
2409
2410 @staticmethod
2411 def deserialize(source=None, content=None, format='json', **args):
2412 """
2413 Deserialize the :py:class:`ProvDocument` from source (a stream or a
2414 file path) or directly from a string content.
2415
2416 Available serializers can be queried by the value of
2417 `:py:attr:~prov.serializers.Registry.serializers` after loading them via
2418 `:py:func:~prov.serializers.Registry.load_serializers()`.
2419
2420 Note: Not all serializers support deserialization.
2421
2422 :param source: Stream object to deserialize the PROV document from
2423 (default: None).
2424 :param content: String to deserialize the PROV document from
2425 (default: None).
2426 :param format: Serialization format (default: 'json'), defaulting to
2427 PROV-JSON.
2428 :return: :py:class:`ProvDocument`
2429 """
2430 serializer = serializers.get(format)()
2431
2432 if content is not None:
2433 # io.StringIO only accepts unicode strings
2434 stream = io.StringIO(
2435 content if not isinstance(content, six.binary_type)
2436 else content.decode()
2437 )
2438 return serializer.deserialize(stream, **args)
2439
2440 if source is not None:
2441 if hasattr(source, "read"):
2442 return serializer.deserialize(source, **args)
2443 else:
2444 with open(source) as f:
2445 return serializer.deserialize(f, **args)
2446
2447
2448 def sorted_attributes(element, attributes):
2449 """
2450 Helper function sorting attributes into the order required by PROV-XML.
2451
2452 :param element: The prov element used to derive the type and the
2453 attribute order for the type.
2454 :param attributes: The attributes to sort.
2455 """
2456 attributes = list(attributes)
2457 order = list(PROV_REC_CLS[element].FORMAL_ATTRIBUTES)
2458
2459 # Append label, location, role, type, and value attributes. This is
2460 # universal amongst all elements.
2461 order.extend([PROV_LABEL, PROV_LOCATION, PROV_ROLE, PROV_TYPE,
2462 PROV_VALUE])
2463
2464 # Sort function. The PROV XML specification talks about alphabetical
2465 # sorting. We now interpret it as sorting by tag including the prefix
2466 # first and then sorting by the text, also including the namespace
2467 # prefix if given.
2468 def sort_fct(x):
2469 return (
2470 six.text_type(x[0]),
2471 six.text_type(x[1].value if hasattr(x[1], "value") else x[1])
2472 )
2473
2474 sorted_elements = []
2475 for item in order:
2476 this_type_list = []
2477 for e in list(attributes):
2478 if e[0] != item:
2479 continue
2480 this_type_list.append(e)
2481 attributes.remove(e)
2482 this_type_list.sort(key=sort_fct)
2483 sorted_elements.extend(this_type_list)
2484 # Add remaining attributes. According to the spec, the other attributes
2485 # have a fixed alphabetical order.
2486 attributes.sort(key=sort_fct)
2487 sorted_elements.extend(attributes)
2488
2489 return sorted_elements