comparison env/lib/python3.9/site-packages/prov/serializers/provrdf.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """PROV-RDF serializers for ProvDocument
2 """
3 from __future__ import (absolute_import, division, print_function,
4 unicode_literals)
5
6 import base64
7 from collections import OrderedDict
8 import datetime
9 import io
10
11 import dateutil.parser
12 import six
13
14 from rdflib.term import URIRef, BNode
15 from rdflib.term import Literal as RDFLiteral
16 from rdflib.graph import ConjunctiveGraph
17 from rdflib.namespace import RDF, RDFS, XSD
18
19 import prov.model as pm
20 from prov.constants import (
21 PROV, PROV_ID_ATTRIBUTES_MAP, PROV_N_MAP, PROV_BASE_CLS, XSD_QNAME,
22 PROV_END, PROV_START, PROV_USAGE, PROV_GENERATION, PROV_DERIVATION, PROV_INVALIDATION,
23 PROV_ALTERNATE, PROV_MENTION, PROV_DELEGATION, PROV_ACTIVITY, PROV_ATTR_STARTTIME,
24 PROV_ATTR_ENDTIME, PROV_LOCATION, PROV_ATTR_TIME, PROV_ROLE, PROV_COMMUNICATION,
25 PROV_ATTR_INFORMANT, PROV_ATTR_RESPONSIBLE, PROV_ATTR_TRIGGER, PROV_ATTR_ENDER,
26 PROV_ATTR_STARTER, PROV_ATTR_USED_ENTITY)
27 from prov.serializers import Serializer, Error
28
29
30 __author__ = 'Satrajit S. Ghosh'
31 __email__ = 'satra@mit.edu'
32
33
34 class ProvRDFException(Error):
35 pass
36
37
38 class AnonymousIDGenerator:
39 def __init__(self):
40 self._cache = {}
41 self._count = 0
42
43 def get_anon_id(self, obj, local_prefix="id"):
44 if obj not in self._cache:
45 self._count += 1
46 self._cache[obj] = pm.Identifier(
47 '_:%s%d' % (local_prefix, self._count)
48 ).uri
49 return self._cache[obj]
50
51
52 # Reverse map for prov.model.XSD_DATATYPE_PARSERS
53 LITERAL_XSDTYPE_MAP = {
54 float: XSD['double'],
55 int: XSD['int'],
56 six.text_type: XSD['string'],
57 # boolean, string values are supported natively by PROV-RDF
58 # datetime values are converted separately
59 }
60
61 # Add long on Python 2
62 if six.integer_types[-1] not in LITERAL_XSDTYPE_MAP:
63 LITERAL_XSDTYPE_MAP[six.integer_types[-1]] = XSD['long']
64
65
66 def attr2rdf(attr):
67 return URIRef(PROV[PROV_ID_ATTRIBUTES_MAP[attr].split('prov:')[1]].uri)
68
69
70 def valid_qualified_name(bundle, value, xsd_qname=False):
71 if value is None:
72 return None
73 qualified_name = bundle.valid_qualified_name(value)
74 return qualified_name if not xsd_qname else XSD_QNAME(qualified_name)
75
76
77 class ProvRDFSerializer(Serializer):
78 """
79 PROV-O serializer for :class:`~prov.model.ProvDocument`
80 """
81
82 def serialize(self, stream=None, rdf_format='trig', **kwargs):
83 """
84 Serializes a :class:`~prov.model.ProvDocument` instance to
85 `PROV-O <https://www.w3.org/TR/prov-o/>`_.
86
87 :param stream: Where to save the output.
88 :param rdf_format: The RDF format of the output, default to TRiG.
89 """
90 container = self.encode_document(self.document)
91 newargs = kwargs.copy()
92 newargs['format'] = rdf_format
93
94 if six.PY2:
95 buf = io.BytesIO()
96 try:
97 container.serialize(buf, **newargs)
98 buf.seek(0, 0)
99 # Right now this is a bytestream. If the object to stream to is
100 # a text object is must be decoded. We assume utf-8 here which
101 # should be fine for almost every case.
102 if isinstance(stream, io.TextIOBase):
103 stream.write(buf.read().decode('utf-8'))
104 else:
105 stream.write(buf.read())
106 finally:
107 buf.close()
108 else:
109 buf = io.BytesIO()
110 try:
111 container.serialize(buf, **newargs)
112 buf.seek(0, 0)
113 # Right now this is a bytestream. If the object to stream to is
114 # a text object is must be decoded. We assume utf-8 here which
115 # should be fine for almost every case.
116 if isinstance(stream, io.TextIOBase):
117 stream.write(buf.read().decode('utf-8'))
118 else:
119 stream.write(buf.read())
120 finally:
121 buf.close()
122
123 def deserialize(self, stream, rdf_format='trig', **kwargs):
124 """
125 Deserialize from the `PROV-O <https://www.w3.org/TR/prov-o/>`_
126 representation to a :class:`~prov.model.ProvDocument` instance.
127
128 :param stream: Input data.
129 :param rdf_format: The RDF format of the input data, default: TRiG.
130 """
131 newargs = kwargs.copy()
132 newargs['format'] = rdf_format
133 container = ConjunctiveGraph()
134 container.parse(stream, **newargs)
135 document = pm.ProvDocument()
136 self.document = document
137 self.decode_document(container, document)
138 return document
139
140 def valid_identifier(self, value):
141 return self.document.valid_qualified_name(value)
142
143 def encode_rdf_representation(self, value):
144 if isinstance(value, URIRef):
145 return value
146 elif isinstance(value, pm.Literal):
147 return literal_rdf_representation(value)
148 elif isinstance(value, datetime.datetime):
149 return RDFLiteral(value.isoformat(), datatype=XSD['dateTime'])
150 elif isinstance(value, pm.QualifiedName):
151 return URIRef(value.uri)
152 elif isinstance(value, pm.Identifier):
153 return RDFLiteral(value.uri, datatype=XSD['anyURI'])
154 elif type(value) in LITERAL_XSDTYPE_MAP:
155 return RDFLiteral(value, datatype=LITERAL_XSDTYPE_MAP[type(value)])
156 else:
157 return RDFLiteral(value)
158
159 def decode_rdf_representation(self, literal, graph):
160 if isinstance(literal, RDFLiteral):
161 value = literal.value if literal.value is not None else literal
162 datatype = literal.datatype if hasattr(literal, 'datatype') else None
163 langtag = literal.language if hasattr(literal, 'language') else None
164 if datatype and 'XMLLiteral' in datatype:
165 value = literal
166 if datatype and 'base64Binary' in datatype:
167 value = base64.standard_b64encode(value)
168 if datatype == XSD['QName']:
169 return pm.Literal(literal, datatype=XSD_QNAME)
170 if datatype == XSD['dateTime']:
171 return dateutil.parser.parse(literal)
172 if datatype == XSD['gYear']:
173 return pm.Literal(dateutil.parser.parse(literal).year,
174 datatype=self.valid_identifier(datatype))
175 if datatype == XSD['gYearMonth']:
176 parsed_info = dateutil.parser.parse(literal)
177 return pm.Literal('{0}-{1:02d}'.format(parsed_info.year, parsed_info.month),
178 datatype=self.valid_identifier(datatype))
179 else:
180 # The literal of standard Python types is not converted here
181 # It will be automatically converted when added to a record by
182 # _auto_literal_conversion()
183 return pm.Literal(value, self.valid_identifier(datatype), langtag)
184 elif isinstance(literal, URIRef):
185 rval = self.valid_identifier(literal)
186 if rval is None:
187 prefix, iri, _ = graph.namespace_manager.compute_qname(literal)
188 ns = self.document.add_namespace(prefix, iri)
189 rval = pm.QualifiedName(ns, literal.replace(ns.uri, ''))
190 return rval
191 else:
192 # simple type, just return it
193 return literal
194
195 def encode_document(self, document):
196 container = self.encode_container(document)
197 for item in document.bundles:
198 # encoding the sub-bundle
199 bundle = self.encode_container(item, identifier=item.identifier.uri)
200 container.addN(bundle.quads())
201 return container
202
203 def encode_container(self, bundle, container=None, identifier=None):
204 if container is None:
205 container = ConjunctiveGraph(identifier=identifier)
206 nm = container.namespace_manager
207 nm.bind('prov', PROV.uri)
208
209 for namespace in bundle.namespaces:
210 container.bind(namespace.prefix, namespace.uri)
211
212 id_generator = AnonymousIDGenerator()
213 real_or_anon_id = lambda record: record._identifier.uri if \
214 record._identifier else id_generator.get_anon_id(record)
215
216 for record in bundle._records:
217 rec_type = record.get_type()
218 if hasattr(record, 'identifier') and record.identifier:
219 identifier = URIRef(six.text_type(real_or_anon_id(record)))
220 container.add((identifier, RDF.type, URIRef(rec_type.uri)))
221 else:
222 identifier = None
223 if record.attributes:
224 bnode = None
225 formal_objects = []
226 used_objects = []
227 all_attributes = list(record.formal_attributes) + list(record.attributes)
228 formal_qualifiers = False
229 for attrid, (attr, value) in enumerate(list(record.formal_attributes)):
230 if (identifier is not None and value is not None) or \
231 (identifier is None and value is not None and attrid > 1):
232 formal_qualifiers = True
233 has_qualifiers = len(record.extra_attributes) > 0 or formal_qualifiers
234 for idx, (attr, value) in enumerate(all_attributes):
235 if record.is_relation():
236 pred = URIRef(PROV[PROV_N_MAP[rec_type]].uri)
237 # create bnode relation
238 if bnode is None:
239 valid_formal_indices = set()
240 for idx, (key, val) in enumerate(record.formal_attributes):
241 formal_objects.append(key)
242 if val:
243 valid_formal_indices.add(idx)
244 used_objects = [record.formal_attributes[0][0]]
245 subj = None
246 if record.formal_attributes[0][1]:
247 subj = URIRef(record.formal_attributes[0][1].uri)
248 if identifier is None and subj is not None:
249 try:
250 obj_val = record.formal_attributes[1][1]
251 obj_attr = URIRef(record.formal_attributes[1][0].uri)
252 # TODO: Why is obj_attr above not used anywhere?
253 except IndexError:
254 obj_val = None
255 if obj_val and (rec_type not in {PROV_END,
256 PROV_START,
257 PROV_USAGE,
258 PROV_GENERATION,
259 PROV_DERIVATION,
260 PROV_INVALIDATION} or
261 (valid_formal_indices == {0, 1} and
262 len(record.extra_attributes) == 0)):
263 used_objects.append(record.formal_attributes[1][0])
264 obj_val = self.encode_rdf_representation(obj_val)
265 if rec_type == PROV_ALTERNATE:
266 subj, obj_val = obj_val, subj
267 container.add((subj, pred, obj_val))
268 if rec_type == PROV_MENTION:
269 if record.formal_attributes[2][1]:
270 used_objects.append(record.formal_attributes[2][0])
271 obj_val = self.encode_rdf_representation(record.formal_attributes[2][1])
272 container.add((subj, URIRef(PROV['asInBundle'].uri), obj_val))
273 has_qualifiers = False
274 if rec_type in [PROV_ALTERNATE]:
275 continue
276 if subj and (has_qualifiers or identifier):
277 qualifier = rec_type._localpart
278 rec_uri = rec_type.uri
279 for attr_name, val in record.extra_attributes:
280 if attr_name == PROV['type']:
281 if PROV['Revision'] == val or \
282 PROV['Quotation'] == val or \
283 PROV['PrimarySource'] == val:
284 qualifier = val._localpart
285 rec_uri = val.uri
286 if identifier is not None:
287 container.remove((identifier,
288 RDF.type,
289 URIRef(rec_type.uri)))
290 QRole = URIRef(PROV['qualified' + qualifier].uri)
291 if identifier is not None:
292 container.add((subj, QRole, identifier))
293 else:
294 bnode = identifier = BNode()
295 container.add((subj, QRole, identifier))
296 container.add(
297 (identifier, RDF.type, URIRef(rec_uri))
298 ) # reset identifier to BNode
299 if value is not None and attr not in used_objects:
300 if attr in formal_objects:
301 pred = attr2rdf(attr)
302 elif attr == PROV['role']:
303 pred = URIRef(PROV['hadRole'].uri)
304 elif attr == PROV['plan']:
305 pred = URIRef(PROV['hadPlan'].uri)
306 elif attr == PROV['type']:
307 pred = RDF.type
308 elif attr == PROV['label']:
309 pred = RDFS.label
310 elif isinstance(attr, pm.QualifiedName):
311 pred = URIRef(attr.uri)
312 else:
313 pred = self.encode_rdf_representation(attr)
314 if PROV['plan'].uri in pred:
315 pred = URIRef(PROV['hadPlan'].uri)
316 if PROV['informant'].uri in pred:
317 pred = URIRef(PROV['activity'].uri)
318 if PROV['responsible'].uri in pred:
319 pred = URIRef(PROV['agent'].uri)
320 if rec_type == PROV_DELEGATION and PROV['activity'].uri in pred:
321 pred = URIRef(PROV['hadActivity'].uri)
322 if (rec_type in [PROV_END, PROV_START] and PROV['trigger'].uri in pred) or\
323 (rec_type in [PROV_USAGE] and PROV['used'].uri in pred):
324 pred = URIRef(PROV['entity'].uri)
325 if rec_type in [PROV_GENERATION, PROV_END,
326 PROV_START, PROV_USAGE,
327 PROV_INVALIDATION]:
328 if PROV['time'].uri in pred:
329 pred = URIRef(PROV['atTime'].uri)
330 if PROV['ender'].uri in pred:
331 pred = URIRef(PROV['hadActivity'].uri)
332 if PROV['starter'].uri in pred:
333 pred = URIRef(PROV['hadActivity'].uri)
334 if PROV['location'].uri in pred:
335 pred = URIRef(PROV['atLocation'].uri)
336 if rec_type in [PROV_ACTIVITY]:
337 if PROV_ATTR_STARTTIME in pred:
338 pred = URIRef(PROV['startedAtTime'].uri)
339 if PROV_ATTR_ENDTIME in pred:
340 pred = URIRef(PROV['endedAtTime'].uri)
341 if rec_type == PROV_DERIVATION:
342 if PROV['activity'].uri in pred:
343 pred = URIRef(PROV['hadActivity'].uri)
344 if PROV['generation'].uri in pred:
345 pred = URIRef(PROV['hadGeneration'].uri)
346 if PROV['usage'].uri in pred:
347 pred = URIRef(PROV['hadUsage'].uri)
348 if PROV['usedEntity'].uri in pred:
349 pred = URIRef(PROV['entity'].uri)
350 container.add((identifier, pred,
351 self.encode_rdf_representation(value)))
352 continue
353 if value is None:
354 continue
355 if isinstance(value, pm.ProvRecord):
356 obj = URIRef(six.text_type(real_or_anon_id(value)))
357 else:
358 # Assuming this is a datetime value
359 obj = self.encode_rdf_representation(value)
360 if attr == PROV['location']:
361 pred = URIRef(PROV['atLocation'].uri)
362 if False and isinstance(value, (URIRef, pm.QualifiedName)):
363 if isinstance(value, pm.QualifiedName):
364 value = URIRef(value.uri)
365 container.add((identifier, pred, value))
366 else:
367 container.add((identifier, pred,
368 self.encode_rdf_representation(obj)))
369 continue
370 if attr == PROV['type']:
371 pred = RDF.type
372 elif attr == PROV['label']:
373 pred = RDFS.label
374 elif attr == PROV_ATTR_STARTTIME:
375 pred = URIRef(PROV['startedAtTime'].uri)
376 elif attr == PROV_ATTR_ENDTIME:
377 pred = URIRef(PROV['endedAtTime'].uri)
378 else:
379 pred = self.encode_rdf_representation(attr)
380 container.add((identifier, pred, obj))
381 return container
382
383 def decode_document(self, content, document):
384 for prefix, url in content.namespaces():
385 document.add_namespace(prefix, six.text_type(url))
386 if hasattr(content, 'contexts'):
387 for graph in content.contexts():
388 if isinstance(graph.identifier, BNode):
389 self.decode_container(graph, document)
390 else:
391 bundle_id = six.text_type(graph.identifier)
392 bundle = document.bundle(bundle_id)
393 self.decode_container(graph, bundle)
394 else:
395 self.decode_container(content, document)
396
397 def decode_container(self, graph, bundle):
398 ids = {}
399 PROV_CLS_MAP = {}
400 formal_attributes = {}
401 unique_sets = {}
402 for key, val in PROV_BASE_CLS.items():
403 PROV_CLS_MAP[key.uri] = PROV_BASE_CLS[key]
404 relation_mapper = {URIRef(PROV['alternateOf'].uri): 'alternate',
405 URIRef(PROV['actedOnBehalfOf'].uri): 'delegation',
406 URIRef(PROV['specializationOf'].uri): 'specialization',
407 URIRef(PROV['mentionOf'].uri): 'mention',
408 URIRef(PROV['wasAssociatedWith'].uri): 'association',
409 URIRef(PROV['wasDerivedFrom'].uri): 'derivation',
410 URIRef(PROV['wasAttributedTo'].uri): 'attribution',
411 URIRef(PROV['wasInformedBy'].uri): 'communication',
412 URIRef(PROV['wasGeneratedBy'].uri): 'generation',
413 URIRef(PROV['wasInfluencedBy'].uri): 'influence',
414 URIRef(PROV['wasInvalidatedBy'].uri): 'invalidation',
415 URIRef(PROV['wasEndedBy'].uri): 'end',
416 URIRef(PROV['wasStartedBy'].uri): 'start',
417 URIRef(PROV['hadMember'].uri): 'membership',
418 URIRef(PROV['used'].uri): 'usage',
419 }
420 predicate_mapper = {RDFS.label: pm.PROV['label'],
421 URIRef(PROV['atLocation'].uri): PROV_LOCATION,
422 URIRef(PROV['startedAtTime'].uri): PROV_ATTR_STARTTIME,
423 URIRef(PROV['endedAtTime'].uri): PROV_ATTR_ENDTIME,
424 URIRef(PROV['atTime'].uri): PROV_ATTR_TIME,
425 URIRef(PROV['hadRole'].uri): PROV_ROLE,
426 URIRef(PROV['hadPlan'].uri): pm.PROV_ATTR_PLAN,
427 URIRef(PROV['hadUsage'].uri): pm.PROV_ATTR_USAGE,
428 URIRef(PROV['hadGeneration'].uri): pm.PROV_ATTR_GENERATION,
429 URIRef(PROV['hadActivity'].uri): pm.PROV_ATTR_ACTIVITY,
430 }
431 other_attributes = {}
432 for stmt in graph.triples((None, RDF.type, None)):
433 id = six.text_type(stmt[0])
434 obj = six.text_type(stmt[2])
435 if obj in PROV_CLS_MAP:
436 if not isinstance(stmt[0], BNode) and self.valid_identifier(id) is None:
437 prefix, iri, _ = graph.namespace_manager.compute_qname(id)
438 self.document.add_namespace(prefix, iri)
439 try:
440 prov_obj = PROV_CLS_MAP[obj]
441 except AttributeError:
442 prov_obj = None
443 add_attr = True
444 isderivation = pm.PROV['Revision'].uri in stmt[2] or \
445 pm.PROV['Quotation'].uri in stmt[2] or \
446 pm.PROV['PrimarySource'].uri in stmt[2]
447 if id not in ids and prov_obj and (prov_obj.uri == obj or
448 isderivation or
449 isinstance(stmt[0], BNode)):
450 ids[id] = prov_obj
451 klass = pm.PROV_REC_CLS[prov_obj]
452 formal_attributes[id] = OrderedDict([(key, None) for key in klass.FORMAL_ATTRIBUTES])
453 unique_sets[id] = OrderedDict([(key, []) for key in klass.FORMAL_ATTRIBUTES])
454 add_attr = False or ((isinstance(stmt[0], BNode) or isderivation) and prov_obj.uri != obj)
455 if add_attr:
456 if id not in other_attributes:
457 other_attributes[id] = []
458 obj_formatted = self.decode_rdf_representation(stmt[2], graph)
459 other_attributes[id].append((pm.PROV['type'], obj_formatted))
460 else:
461 if id not in other_attributes:
462 other_attributes[id] = []
463 obj = self.decode_rdf_representation(stmt[2], graph)
464 other_attributes[id].append((pm.PROV['type'], obj))
465 for id, pred, obj in graph:
466 id = six.text_type(id)
467 if id not in other_attributes:
468 other_attributes[id] = []
469 if pred == RDF.type:
470 continue
471 if pred in relation_mapper:
472 if 'alternateOf' in pred:
473 getattr(bundle, relation_mapper[pred])(obj, id)
474 elif 'mentionOf' in pred:
475 mentionBundle = None
476 for stmt in graph.triples((URIRef(id), URIRef(pm.PROV['asInBundle'].uri), None)):
477 mentionBundle = stmt[2]
478 getattr(bundle, relation_mapper[pred])(id, six.text_type(obj), mentionBundle)
479 elif 'actedOnBehalfOf' in pred or 'wasAssociatedWith' in pred:
480 qualifier = 'qualified' + relation_mapper[pred].upper()[0] + relation_mapper[pred][1:]
481 qualifier_bnode = None
482 for stmt in graph.triples((URIRef(id), URIRef(pm.PROV[qualifier].uri), None)):
483 qualifier_bnode = stmt[2]
484 if qualifier_bnode is None:
485 getattr(bundle, relation_mapper[pred])(id, six.text_type(obj))
486 else:
487 fakeys = list(formal_attributes[six.text_type(qualifier_bnode)].keys())
488 formal_attributes[six.text_type(qualifier_bnode)][fakeys[0]] = id
489 formal_attributes[six.text_type(qualifier_bnode)][fakeys[1]] = six.text_type(obj)
490 else:
491 getattr(bundle, relation_mapper[pred])(id, six.text_type(obj))
492 elif id in ids:
493 obj1 = self.decode_rdf_representation(obj, graph)
494 if obj is not None and obj1 is None:
495 raise ValueError(('Error transforming', obj))
496 pred_new = pred
497 if pred in predicate_mapper:
498 pred_new = predicate_mapper[pred]
499 if ids[id] == PROV_COMMUNICATION and 'activity' in six.text_type(pred_new):
500 pred_new = PROV_ATTR_INFORMANT
501 if ids[id] == PROV_DELEGATION and 'agent' in six.text_type(pred_new):
502 pred_new = PROV_ATTR_RESPONSIBLE
503 if ids[id] in [PROV_END, PROV_START] and 'entity' in six.text_type(pred_new):
504 pred_new = PROV_ATTR_TRIGGER
505 if ids[id] in [PROV_END] and 'activity' in six.text_type(pred_new):
506 pred_new = PROV_ATTR_ENDER
507 if ids[id] in [PROV_START] and 'activity' in six.text_type(pred_new):
508 pred_new = PROV_ATTR_STARTER
509 if ids[id] == PROV_DERIVATION and 'entity' in six.text_type(pred_new):
510 pred_new = PROV_ATTR_USED_ENTITY
511 if six.text_type(pred_new) in [val.uri for val in formal_attributes[id]]:
512 qname_key = self.valid_identifier(pred_new)
513 formal_attributes[id][qname_key] = obj1
514 unique_sets[id][qname_key].append(obj1)
515 if len(unique_sets[id][qname_key]) > 1:
516 formal_attributes[id][qname_key] = None
517 else:
518 if 'qualified' not in six.text_type(pred_new) and \
519 'asInBundle' not in six.text_type(pred_new):
520 other_attributes[id].append((six.text_type(pred_new), obj1))
521 local_key = six.text_type(obj)
522 if local_key in ids:
523 if 'qualified' in pred:
524 formal_attributes[local_key][list(formal_attributes[local_key].keys())[0]] = id
525 for id in ids:
526 attrs = None
527 if id in other_attributes:
528 attrs = other_attributes[id]
529 items_to_walk = []
530 for qname, values in unique_sets[id].items():
531 if values and len(values) > 1:
532 items_to_walk.append((qname, values))
533 if items_to_walk:
534 for subset in list(walk(items_to_walk)):
535 for key, value in subset.items():
536 formal_attributes[id][key] = value
537 bundle.new_record(ids[id], id, formal_attributes[id], attrs)
538 else:
539 bundle.new_record(ids[id], id, formal_attributes[id], attrs)
540 ids[id] = None
541 if attrs is not None:
542 other_attributes[id] = []
543 for key, val in other_attributes.items():
544 if val:
545 ids[key].add_attributes(val)
546
547
548 def walk(children, level=0, path=None, usename=True):
549 """Generate all the full paths in a tree, as a dict.
550
551 :Example:
552
553 >>> from prov.serializers.provrdf import walk
554 >>> iterables = [('a', lambda: [1, 2]), ('b', lambda: [3, 4])]
555 >>> [val['a'] for val in walk(iterables)]
556 [1, 1, 2, 2]
557 >>> [val['b'] for val in walk(iterables)]
558 [3, 4, 3, 4]
559 """
560 # Entry point
561 if level == 0:
562 path = {}
563 # Exit condition
564 if not children:
565 yield path.copy()
566 return
567 # Tree recursion
568 head, tail = children[0], children[1:]
569 name, func = head
570 for child in func:
571 # We can use the arg name or the tree level as a key
572 if usename:
573 path[name] = child
574 else:
575 path[level] = child
576 # Recurse into the next level
577 for child_paths in walk(tail, level + 1, path, usename):
578 yield child_paths
579
580
581 def literal_rdf_representation(literal):
582 value = six.text_type(literal.value) if literal.value else literal
583 if literal.langtag:
584 # a language tag can only go with prov:InternationalizedString
585 return RDFLiteral(value, lang=str(literal.langtag))
586 else:
587 datatype = literal.datatype
588 if 'base64Binary' in datatype.uri:
589 if six.PY2:
590 value = base64.standard_b64encode(value)
591 else:
592 value = literal.value.encode()
593 return RDFLiteral(value, datatype=datatype.uri)