Mercurial > repos > guerler > springsuite
annotate planemo/lib/python3.7/site-packages/prov/graph.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler | 
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 
1
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
1 from __future__ import (absolute_import, division, print_function, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
2 unicode_literals) | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
3 | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
4 import networkx as nx | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
5 from prov.model import ( | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
6 ProvDocument, ProvRecord, ProvElement, ProvEntity, ProvActivity, ProvAgent, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
7 ProvRelation, PROV_ATTR_ENTITY, PROV_ATTR_ACTIVITY, PROV_ATTR_AGENT, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
8 PROV_ATTR_TRIGGER, PROV_ATTR_GENERATED_ENTITY, PROV_ATTR_USED_ENTITY, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
9 PROV_ATTR_DELEGATE, PROV_ATTR_RESPONSIBLE, PROV_ATTR_SPECIFIC_ENTITY, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
10 PROV_ATTR_GENERAL_ENTITY, PROV_ATTR_ALTERNATE1, PROV_ATTR_ALTERNATE2, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
11 PROV_ATTR_COLLECTION, PROV_ATTR_INFORMED, PROV_ATTR_INFORMANT | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
12 ) | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
13 | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
14 __author__ = 'Trung Dong Huynh' | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
15 __email__ = 'trungdong@donggiang.com' | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
16 | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
17 | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
18 INFERRED_ELEMENT_CLASS = { | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
19 PROV_ATTR_ENTITY: ProvEntity, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
20 PROV_ATTR_ACTIVITY: ProvActivity, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
21 PROV_ATTR_AGENT: ProvAgent, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
22 PROV_ATTR_TRIGGER: ProvEntity, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
23 PROV_ATTR_GENERATED_ENTITY: ProvEntity, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
24 PROV_ATTR_USED_ENTITY: ProvEntity, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
25 PROV_ATTR_DELEGATE: ProvAgent, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
26 PROV_ATTR_RESPONSIBLE: ProvAgent, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
27 PROV_ATTR_SPECIFIC_ENTITY: ProvEntity, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
28 PROV_ATTR_GENERAL_ENTITY: ProvEntity, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
29 PROV_ATTR_ALTERNATE1: ProvEntity, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
30 PROV_ATTR_ALTERNATE2: ProvEntity, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
31 PROV_ATTR_COLLECTION: ProvEntity, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
32 PROV_ATTR_INFORMED: ProvActivity, | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
33 PROV_ATTR_INFORMANT: ProvActivity | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
34 } | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
35 | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
36 | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
37 def prov_to_graph(prov_document): | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
38 """ | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
39 Convert a :class:`~prov.model.ProvDocument` to a `MultiDiGraph | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
40 <https://networkx.readthedocs.io/en/stable/reference/classes.multigraph.html>`_ | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
41 instance of the `NetworkX <https://networkx.github.io/>`_ library. | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
42 | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
43 :param prov_document: The :class:`~prov.model.ProvDocument` instance to convert. | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
44 """ | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
45 g = nx.MultiDiGraph() | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
46 unified = prov_document.unified() | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
47 node_map = dict() | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
48 for element in unified.get_records(ProvElement): | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
49 g.add_node(element) | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
50 node_map[element.identifier] = element | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
51 | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
52 for relation in unified.get_records(ProvRelation): | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
53 # taking the first two elements of a relation | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
54 attr_pair_1, attr_pair_2 = relation.formal_attributes[:2] | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
55 # only need the QualifiedName (i.e. the value of the attribute) | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
56 qn1, qn2 = attr_pair_1[1], attr_pair_2[1] | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
57 if qn1 and qn2: # only proceed if both ends of the relation exist | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
58 try: | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
59 if qn1 not in node_map: | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
60 node_map[qn1] = \ | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
61 INFERRED_ELEMENT_CLASS[attr_pair_1[0]](None, qn1) | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
62 if qn2 not in node_map: | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
63 node_map[qn2] = \ | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
64 INFERRED_ELEMENT_CLASS[attr_pair_2[0]](None, qn2) | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
65 except KeyError: | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
66 # Unsupported attribute; cannot infer the type of the element | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
67 continue # skipping this relation | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
68 g.add_edge(node_map[qn1], node_map[qn2], relation=relation) | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
69 return g | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
70 | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
71 | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
72 def graph_to_prov(g): | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
73 """ | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
74 Convert a `MultiDiGraph | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
75 <https://networkx.readthedocs.io/en/stable/reference/classes.multigraph.html>`_ | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
76 that was previously produced by :func:`prov_to_graph` back to a | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
77 :class:`~prov.model.ProvDocument`. | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
78 | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
79 :param g: The graph instance to convert. | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
80 """ | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
81 prov_doc = ProvDocument() | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
82 for n in g.nodes_iter(): | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
83 if isinstance(n, ProvRecord) and n.bundle is not None: | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
84 prov_doc.add_record(n) | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
85 for _, _, edge_data in g.edges_iter(data=True): | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
86 try: | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
87 relation = edge_data['relation'] | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
88 if isinstance(relation, ProvRecord): | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
89 prov_doc.add_record(relation) | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
90 except KeyError: | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
91 pass | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
92 | 
| 
 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 
guerler 
parents:  
diff
changeset
 | 
93 return prov_doc | 
