comparison env/lib/python3.9/site-packages/rdflib/paths.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 from six import PY3
2
3
4 __doc__ = """
5
6 This module implements the SPARQL 1.1 Property path operators, as
7 defined in:
8
9 http://www.w3.org/TR/sparql11-query/#propertypaths
10
11 In SPARQL the syntax is as follows:
12
13 +--------------------+-------------------------------------------------+
14 |Syntax | Matches |
15 +====================+=================================================+
16 |iri | An IRI. A path of length one. |
17 +--------------------+-------------------------------------------------+
18 |^elt | Inverse path (object to subject). |
19 +--------------------+-------------------------------------------------+
20 |elt1 / elt2 | A sequence path of elt1 followed by elt2. |
21 +--------------------+-------------------------------------------------+
22 |elt1 | elt2 | A alternative path of elt1 or elt2 |
23 | | (all possibilities are tried). |
24 +--------------------+-------------------------------------------------+
25 |elt* | A path that connects the subject and object |
26 | | of the path by zero or more matches of elt. |
27 +--------------------+-------------------------------------------------+
28 |elt+ | A path that connects the subject and object |
29 | | of the path by one or more matches of elt. |
30 +--------------------+-------------------------------------------------+
31 |elt? | A path that connects the subject and object |
32 | | of the path by zero or one matches of elt. |
33 +--------------------+-------------------------------------------------+
34 |!iri or | Negated property set. An IRI which is not one of|
35 |!(iri\ :sub:`1`\ \| | iri\ :sub:`1`...iri\ :sub:`n`. |
36 |... \|iri\ :sub:`n`)| !iri is short for !(iri). |
37 +--------------------+-------------------------------------------------+
38 |!^iri or | Negated property set where the excluded matches |
39 |!(^iri\ :sub:`1`\ \|| are based on reversed path. That is, not one of |
40 |...\|^iri\ :sub:`n`)| iri\ :sub:`1`...iri\ :sub:`n` as reverse paths. |
41 | | !^iri is short for !(^iri). |
42 +--------------------+-------------------------------------------------+
43 |!(iri\ :sub:`1`\ \| | A combination of forward and reverse |
44 |...\|iri\ :sub:`j`\ | properties in a negated property set. |
45 |\|^iri\ :sub:`j+1`\ | |
46 |\|... \|^iri\ | |
47 |:sub:`n`)| | |
48 +--------------------+-------------------------------------------------+
49 |(elt) | A group path elt, brackets control precedence. |
50 +--------------------+-------------------------------------------------+
51
52 This module is used internally be the SPARQL engine, but they property paths
53 can also be used to query RDFLib Graphs directly.
54
55 Where possible the SPARQL syntax is mapped to python operators, and property
56 path objects can be constructed from existing URIRefs.
57
58 >>> from rdflib import Graph, Namespace
59
60 >>> foaf=Namespace('http://xmlns.com/foaf/0.1/')
61
62 >>> ~foaf.knows
63 Path(~http://xmlns.com/foaf/0.1/knows)
64
65 >>> foaf.knows/foaf.name
66 Path(http://xmlns.com/foaf/0.1/knows / http://xmlns.com/foaf/0.1/name)
67
68 >>> foaf.name|foaf.givenName
69 Path(http://xmlns.com/foaf/0.1/name | http://xmlns.com/foaf/0.1/givenName)
70
71 Modifiers (?, *, +) are done using * (the multiplication operator) and
72 the strings '*', '?', '+', also defined as constants in this file.
73
74 >>> foaf.knows*OneOrMore
75 Path(http://xmlns.com/foaf/0.1/knows+)
76
77 The path objects can also be used with the normal graph methods.
78
79 First some example data:
80
81 >>> g=Graph()
82
83 >>> g=g.parse(data='''
84 ... @prefix : <ex:> .
85 ...
86 ... :a :p1 :c ; :p2 :f .
87 ... :c :p2 :e ; :p3 :g .
88 ... :g :p3 :h ; :p2 :j .
89 ... :h :p3 :a ; :p2 :g .
90 ...
91 ... :q :px :q .
92 ...
93 ... ''', format='n3') # doctest: +ELLIPSIS
94
95 >>> e=Namespace('ex:')
96
97 Graph contains:
98 >>> (e.a, e.p1/e.p2, e.e) in g
99 True
100
101 Graph generator functions, triples, subjects, objects, etc. :
102
103 >>> list(g.objects(e.c, (e.p3*OneOrMore)/e.p2)) # doctest: +NORMALIZE_WHITESPACE
104 [rdflib.term.URIRef(u'ex:j'), rdflib.term.URIRef(u'ex:g'),
105 rdflib.term.URIRef(u'ex:f')]
106
107 A more complete set of tests:
108
109 >>> list(evalPath(g, (None, e.p1/e.p2, None)))==[(e.a, e.e)]
110 True
111 >>> list(evalPath(g, (e.a, e.p1|e.p2, None)))==[(e.a,e.c), (e.a,e.f)]
112 True
113 >>> list(evalPath(g, (e.c, ~e.p1, None))) == [ (e.c, e.a) ]
114 True
115 >>> list(evalPath(g, (e.a, e.p1*ZeroOrOne, None))) == [(e.a, e.a), (e.a, e.c)]
116 True
117 >>> list(evalPath(g, (e.c, e.p3*OneOrMore, None))) == [
118 ... (e.c, e.g), (e.c, e.h), (e.c, e.a)]
119 True
120 >>> list(evalPath(g, (e.c, e.p3*ZeroOrMore, None))) == [(e.c, e.c),
121 ... (e.c, e.g), (e.c, e.h), (e.c, e.a)]
122 True
123 >>> list(evalPath(g, (e.a, -e.p1, None))) == [(e.a, e.f)]
124 True
125 >>> list(evalPath(g, (e.a, -(e.p1|e.p2), None))) == []
126 True
127 >>> list(evalPath(g, (e.g, -~e.p2, None))) == [(e.g, e.j)]
128 True
129 >>> list(evalPath(g, (e.e, ~(e.p1/e.p2), None))) == [(e.e, e.a)]
130 True
131 >>> list(evalPath(g, (e.a, e.p1/e.p3/e.p3, None))) == [(e.a, e.h)]
132 True
133
134 >>> list(evalPath(g, (e.q, e.px*OneOrMore, None)))
135 [(rdflib.term.URIRef(u'ex:q'), rdflib.term.URIRef(u'ex:q'))]
136
137 >>> list(evalPath(g, (None, e.p1|e.p2, e.c)))
138 [(rdflib.term.URIRef(u'ex:a'), rdflib.term.URIRef(u'ex:c'))]
139
140 >>> list(evalPath(g, (None, ~e.p1, e.a))) == [ (e.c, e.a) ]
141 True
142 >>> list(evalPath(g, (None, e.p1*ZeroOrOne, e.c))) # doctest: +NORMALIZE_WHITESPACE
143 [(rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:c')),
144 (rdflib.term.URIRef(u'ex:a'), rdflib.term.URIRef(u'ex:c'))]
145
146 >>> list(evalPath(g, (None, e.p3*OneOrMore, e.a))) # doctest: +NORMALIZE_WHITESPACE
147 [(rdflib.term.URIRef(u'ex:h'), rdflib.term.URIRef(u'ex:a')),
148 (rdflib.term.URIRef(u'ex:g'), rdflib.term.URIRef(u'ex:a')),
149 (rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:a'))]
150
151 >>> list(evalPath(g, (None, e.p3*ZeroOrMore, e.a))) # doctest: +NORMALIZE_WHITESPACE
152 [(rdflib.term.URIRef(u'ex:a'), rdflib.term.URIRef(u'ex:a')),
153 (rdflib.term.URIRef(u'ex:h'), rdflib.term.URIRef(u'ex:a')),
154 (rdflib.term.URIRef(u'ex:g'), rdflib.term.URIRef(u'ex:a')),
155 (rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:a'))]
156
157 >>> list(evalPath(g, (None, -e.p1, e.f))) == [(e.a, e.f)]
158 True
159 >>> list(evalPath(g, (None, -(e.p1|e.p2), e.c))) == []
160 True
161 >>> list(evalPath(g, (None, -~e.p2, e.j))) == [(e.g, e.j)]
162 True
163 >>> list(evalPath(g, (None, ~(e.p1/e.p2), e.a))) == [(e.e, e.a)]
164 True
165 >>> list(evalPath(g, (None, e.p1/e.p3/e.p3, e.h))) == [(e.a, e.h)]
166 True
167
168 >>> list(evalPath(g, (e.q, e.px*OneOrMore, None)))
169 [(rdflib.term.URIRef(u'ex:q'), rdflib.term.URIRef(u'ex:q'))]
170
171 >>> list(evalPath(g, (e.c, (e.p2|e.p3)*ZeroOrMore, e.j)))
172 [(rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:j'))]
173
174 No vars specified:
175
176 >>> sorted(list(evalPath(g, (None, e.p3*OneOrMore, None)))) #doctest: +NORMALIZE_WHITESPACE
177 [(rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:a')),
178 (rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:g')),
179 (rdflib.term.URIRef(u'ex:c'), rdflib.term.URIRef(u'ex:h')),
180 (rdflib.term.URIRef(u'ex:g'), rdflib.term.URIRef(u'ex:a')),
181 (rdflib.term.URIRef(u'ex:g'), rdflib.term.URIRef(u'ex:h')),
182 (rdflib.term.URIRef(u'ex:h'), rdflib.term.URIRef(u'ex:a'))]
183
184 .. versionadded:: 4.0
185
186 """
187
188
189 from rdflib.term import URIRef, Node
190
191
192 # property paths
193
194 ZeroOrMore = '*'
195 OneOrMore = '+'
196 ZeroOrOne = '?'
197
198
199 class Path(object):
200 def eval(self, graph, subj=None, obj=None):
201 raise NotImplementedError()
202
203 def __hash__(self):
204 return hash(repr(self))
205
206 def __eq__(self, other):
207 return repr(self) == repr(other)
208
209 def __lt__(self, other):
210 if not isinstance(other, (Path, Node)):
211 raise TypeError('unorderable types: %s() < %s()' % (
212 repr(self), repr(other)))
213 return repr(self) < repr(other)
214
215 def __le__(self, other):
216 if not isinstance(other, (Path, Node)):
217 raise TypeError('unorderable types: %s() < %s()' % (
218 repr(self), repr(other)))
219 return repr(self) <= repr(other)
220
221 def __ne__(self, other):
222 return not self == other
223
224 def __gt__(self, other):
225 return not self <= other
226
227 def __ge__(self, other):
228 return not self < other
229
230
231 class InvPath(Path):
232
233 def __init__(self, arg):
234 self.arg = arg
235
236 def eval(self, graph, subj=None, obj=None):
237 for s, o in evalPath(graph, (obj, self.arg, subj)):
238 yield o, s
239
240 def __repr__(self):
241 return "Path(~%s)" % (self.arg,)
242
243 def n3(self):
244 return '^%s' % self.arg.n3()
245
246
247 class SequencePath(Path):
248 def __init__(self, *args):
249 self.args = []
250 for a in args:
251 if isinstance(a, SequencePath):
252 self.args += a.args
253 else:
254 self.args.append(a)
255
256 def eval(self, graph, subj=None, obj=None):
257 def _eval_seq(paths, subj, obj):
258 if paths[1:]:
259 for s, o in evalPath(graph, (subj, paths[0], None)):
260 for r in _eval_seq(paths[1:], o, obj):
261 yield s, r[1]
262
263 else:
264 for s, o in evalPath(graph, (subj, paths[0], obj)):
265 yield s, o
266
267 def _eval_seq_bw(paths, subj, obj):
268 if paths[:-1]:
269 for s, o in evalPath(graph, (None, paths[-1], obj)):
270 for r in _eval_seq(paths[:-1], subj, s):
271 yield r[0], o
272
273 else:
274 for s, o in evalPath(graph, (subj, paths[0], obj)):
275 yield s, o
276
277 if subj:
278 return _eval_seq(self.args, subj, obj)
279 elif obj:
280 return _eval_seq_bw(self.args, subj, obj)
281 else: # no vars bound, we can start anywhere
282 return _eval_seq(self.args, subj, obj)
283
284 def __repr__(self):
285 return "Path(%s)" % " / ".join(str(x) for x in self.args)
286
287 def n3(self):
288 return '/'.join(a.n3() for a in self.args)
289
290
291 class AlternativePath(Path):
292 def __init__(self, *args):
293 self.args = []
294 for a in args:
295 if isinstance(a, AlternativePath):
296 self.args += a.args
297 else:
298 self.args.append(a)
299
300 def eval(self, graph, subj=None, obj=None):
301 for x in self.args:
302 for y in evalPath(graph, (subj, x, obj)):
303 yield y
304
305 def __repr__(self):
306 return "Path(%s)" % " | ".join(str(x) for x in self.args)
307
308 def n3(self):
309 return '|'.join(a.n3() for a in self.args)
310
311
312 class MulPath(Path):
313 def __init__(self, path, mod):
314 self.path = path
315 self.mod = mod
316
317 if mod == ZeroOrOne:
318 self.zero = True
319 self.more = False
320 elif mod == ZeroOrMore:
321 self.zero = True
322 self.more = True
323 elif mod == OneOrMore:
324 self.zero = False
325 self.more = True
326 else:
327 raise Exception('Unknown modifier %s' % mod)
328
329 def eval(self, graph, subj=None, obj=None, first=True):
330 if self.zero and first:
331 if subj and obj:
332 if subj == obj:
333 yield (subj, obj)
334 elif subj:
335 yield (subj, subj)
336 elif obj:
337 yield (obj, obj)
338
339 def _fwd(subj=None, obj=None, seen=None):
340 seen.add(subj)
341
342 for s, o in evalPath(graph, (subj, self.path, None)):
343 if not obj or o == obj:
344 yield s, o
345 if self.more:
346 if o in seen:
347 continue
348 for s2, o2 in _fwd(o, obj, seen):
349 yield s, o2
350
351 def _bwd(subj=None, obj=None, seen=None):
352 seen.add(obj)
353
354 for s, o in evalPath(graph, (None, self.path, obj)):
355 if not subj or subj == s:
356 yield s, o
357 if self.more:
358 if s in seen:
359 continue
360
361 for s2, o2 in _bwd(None, s, seen):
362 yield s2, o
363
364 def _all_fwd_paths():
365 if self.zero:
366 seen1 = set()
367 # According to the spec, ALL nodes are possible solutions
368 # (even literals)
369 # we cannot do this without going through ALL triples
370 # unless we keep an index of all terms somehow
371 # but lets just hope this query doesnt happen very often...
372 for s, o in graph.subject_objects(None):
373 if s not in seen1:
374 seen1.add(s)
375 yield s, s
376 if o not in seen1:
377 seen1.add(o)
378 yield o, o
379
380 seen = set()
381 for s, o in evalPath(graph, (None, self.path, None)):
382 if not self.more:
383 yield s, o
384 else:
385 if s not in seen:
386 seen.add(s)
387 f = list(_fwd(s, None, set()))
388 for s1, o1 in f:
389 assert s1 == s
390 yield(s1, o1)
391
392 done = set() # the spec does by defn. not allow duplicates
393 if subj:
394 for x in _fwd(subj, obj, set()):
395 if x not in done:
396 done.add(x)
397 yield x
398 elif obj:
399 for x in _bwd(subj, obj, set()):
400 if x not in done:
401 done.add(x)
402 yield x
403 else:
404 for x in _all_fwd_paths():
405 if x not in done:
406 done.add(x)
407 yield x
408
409 def __repr__(self):
410 return "Path(%s%s)" % (self.path, self.mod)
411
412 def n3(self):
413 return '%s%s' % (self.path.n3(), self.mod)
414
415
416 class NegatedPath(Path):
417 def __init__(self, arg):
418 if isinstance(arg, (URIRef, InvPath)):
419 self.args = [arg]
420 elif isinstance(arg, AlternativePath):
421 self.args = arg.args
422 else:
423 raise Exception(
424 'Can only negate URIRefs, InvPaths or ' +
425 'AlternativePaths, not: %s' % (arg,))
426
427 def eval(self, graph, subj=None, obj=None):
428 for s, p, o in graph.triples((subj, None, obj)):
429 for a in self.args:
430 if isinstance(a, URIRef):
431 if p == a:
432 break
433 elif isinstance(a, InvPath):
434 if (o, a.arg, s) in graph:
435 break
436 else:
437 raise Exception('Invalid path in NegatedPath: %s' % a)
438 else:
439 yield s, o
440
441 def __repr__(self):
442 return "Path(! %s)" % ",".join(str(x) for x in self.args)
443
444 def n3(self):
445 return '!(%s)' % ('|'.join(self.args))
446
447
448 class PathList(list):
449 pass
450
451
452 def path_alternative(self, other):
453 """
454 alternative path
455 """
456 if not isinstance(other, (URIRef, Path)):
457 raise Exception('Only URIRefs or Paths can be in paths!')
458 return AlternativePath(self, other)
459
460
461 def path_sequence(self, other):
462 """
463 sequence path
464 """
465 if not isinstance(other, (URIRef, Path)):
466 raise Exception('Only URIRefs or Paths can be in paths!')
467 return SequencePath(self, other)
468
469
470 def evalPath(graph, t):
471 return ((s, o) for s, p, o in graph.triples(t))
472
473
474 def mul_path(p, mul):
475 """
476 cardinality path
477 """
478 return MulPath(p, mul)
479
480
481 def inv_path(p):
482 """
483 inverse path
484 """
485 return InvPath(p)
486
487
488 def neg_path(p):
489 """
490 negated path
491 """
492 return NegatedPath(p)
493
494
495 if __name__ == '__main__':
496
497 import doctest
498 doctest.testmod()
499 else:
500 # monkey patch
501 # (these cannot be directly in terms.py
502 # as it would introduce circular imports)
503
504 URIRef.__or__ = path_alternative
505 URIRef.__mul__ = mul_path
506 URIRef.__invert__ = inv_path
507 URIRef.__neg__ = neg_path
508 URIRef.__truediv__ = path_sequence
509 if not PY3:
510 URIRef.__div__ = path_sequence
511
512 Path.__invert__ = inv_path
513 Path.__neg__ = neg_path
514 Path.__mul__ = mul_path
515 Path.__or__ = path_alternative
516 Path.__truediv__ = path_sequence
517 if not PY3:
518 Path.__div__ = path_sequence