Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/rdflib/plugins/sparql/algebra.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 | |
| 2 """ | |
| 3 Converting the 'parse-tree' output of pyparsing to a SPARQL Algebra expression | |
| 4 | |
| 5 http://www.w3.org/TR/sparql11-query/#sparqlQuery | |
| 6 | |
| 7 """ | |
| 8 | |
| 9 import functools | |
| 10 import operator | |
| 11 import collections | |
| 12 | |
| 13 from rdflib import Literal, Variable, URIRef, BNode | |
| 14 | |
| 15 from rdflib.plugins.sparql.sparql import Prologue, Query | |
| 16 from rdflib.plugins.sparql.parserutils import CompValue, Expr | |
| 17 from rdflib.plugins.sparql.operators import ( | |
| 18 and_, TrueFilter, simplify as simplifyFilters) | |
| 19 from rdflib.paths import ( | |
| 20 InvPath, AlternativePath, SequencePath, MulPath, NegatedPath) | |
| 21 | |
| 22 from pyparsing import ParseResults | |
| 23 from functools import reduce | |
| 24 | |
| 25 | |
| 26 # --------------------------- | |
| 27 # Some convenience methods | |
| 28 def OrderBy(p, expr): | |
| 29 return CompValue('OrderBy', p=p, expr=expr) | |
| 30 | |
| 31 | |
| 32 def ToMultiSet(p): | |
| 33 return CompValue('ToMultiSet', p=p) | |
| 34 | |
| 35 | |
| 36 def Union(p1, p2): | |
| 37 return CompValue('Union', p1=p1, p2=p2) | |
| 38 | |
| 39 | |
| 40 def Join(p1, p2): | |
| 41 return CompValue('Join', p1=p1, p2=p2) | |
| 42 | |
| 43 | |
| 44 def Minus(p1, p2): | |
| 45 return CompValue('Minus', p1=p1, p2=p2) | |
| 46 | |
| 47 | |
| 48 def Graph(term, graph): | |
| 49 return CompValue('Graph', term=term, p=graph) | |
| 50 | |
| 51 | |
| 52 def BGP(triples=None): | |
| 53 return CompValue('BGP', triples=triples or []) | |
| 54 | |
| 55 | |
| 56 def LeftJoin(p1, p2, expr): | |
| 57 return CompValue('LeftJoin', p1=p1, p2=p2, expr=expr) | |
| 58 | |
| 59 | |
| 60 def Filter(expr, p): | |
| 61 return CompValue('Filter', expr=expr, p=p) | |
| 62 | |
| 63 | |
| 64 def Extend(p, expr, var): | |
| 65 return CompValue('Extend', p=p, expr=expr, var=var) | |
| 66 | |
| 67 def Values(res): | |
| 68 return CompValue('values', res=res) | |
| 69 | |
| 70 def Project(p, PV): | |
| 71 return CompValue('Project', p=p, PV=PV) | |
| 72 | |
| 73 | |
| 74 def Group(p, expr=None): | |
| 75 return CompValue('Group', p=p, expr=expr) | |
| 76 | |
| 77 | |
| 78 def _knownTerms(triple, varsknown, varscount): | |
| 79 return (len([_f for _f in (x not in varsknown and | |
| 80 isinstance( | |
| 81 x, (Variable, BNode)) for x in triple) if _f]), | |
| 82 -sum(varscount.get(x, 0) for x in triple), | |
| 83 not isinstance(triple[2], Literal), | |
| 84 ) | |
| 85 | |
| 86 | |
| 87 def reorderTriples(l): | |
| 88 """ | |
| 89 Reorder triple patterns so that we execute the | |
| 90 ones with most bindings first | |
| 91 """ | |
| 92 | |
| 93 def _addvar(term, varsknown): | |
| 94 if isinstance(term, (Variable, BNode)): | |
| 95 varsknown.add(term) | |
| 96 | |
| 97 l = [(None, x) for x in l] | |
| 98 varsknown = set() | |
| 99 varscount = collections.defaultdict(int) | |
| 100 for t in l: | |
| 101 for c in t[1]: | |
| 102 if isinstance(c, (Variable, BNode)): | |
| 103 varscount[c] += 1 | |
| 104 i = 0 | |
| 105 | |
| 106 # Done in steps, sort by number of bound terms | |
| 107 # the top block of patterns with the most bound terms is kept | |
| 108 # the rest is resorted based on the vars bound after the first | |
| 109 # block is evaluated | |
| 110 | |
| 111 # we sort by decorate/undecorate, since we need the value of the sort keys | |
| 112 | |
| 113 while i < len(l): | |
| 114 l[i:] = sorted((_knownTerms(x[ | |
| 115 1], varsknown, varscount), x[1]) for x in l[i:]) | |
| 116 t = l[i][0][0] # top block has this many terms bound | |
| 117 j = 0 | |
| 118 while i+j < len(l) and l[i+j][0][0] == t: | |
| 119 for c in l[i+j][1]: | |
| 120 _addvar(c, varsknown) | |
| 121 j += 1 | |
| 122 i += 1 | |
| 123 | |
| 124 return [x[1] for x in l] | |
| 125 | |
| 126 | |
| 127 def triples(l): | |
| 128 | |
| 129 l = reduce(lambda x, y: x + y, l) | |
| 130 if (len(l) % 3) != 0: | |
| 131 raise Exception('these aint triples') | |
| 132 return reorderTriples((l[x], l[x + 1], l[x + 2]) | |
| 133 for x in range(0, len(l), 3)) | |
| 134 | |
| 135 | |
| 136 def translatePName(p, prologue): | |
| 137 """ | |
| 138 Expand prefixed/relative URIs | |
| 139 """ | |
| 140 if isinstance(p, CompValue): | |
| 141 if p.name == 'pname': | |
| 142 return prologue.absolutize(p) | |
| 143 if p.name == 'literal': | |
| 144 return Literal(p.string, lang=p.lang, | |
| 145 datatype=prologue.absolutize(p.datatype)) | |
| 146 elif isinstance(p, URIRef): | |
| 147 return prologue.absolutize(p) | |
| 148 | |
| 149 | |
| 150 def translatePath(p): | |
| 151 | |
| 152 """ | |
| 153 Translate PropertyPath expressions | |
| 154 """ | |
| 155 | |
| 156 if isinstance(p, CompValue): | |
| 157 if p.name == 'PathAlternative': | |
| 158 if len(p.part) == 1: | |
| 159 return p.part[0] | |
| 160 else: | |
| 161 return AlternativePath(*p.part) | |
| 162 | |
| 163 elif p.name == 'PathSequence': | |
| 164 if len(p.part) == 1: | |
| 165 return p.part[0] | |
| 166 else: | |
| 167 return SequencePath(*p.part) | |
| 168 | |
| 169 elif p.name == 'PathElt': | |
| 170 if not p.mod: | |
| 171 return p.part | |
| 172 else: | |
| 173 if isinstance(p.part, list): | |
| 174 if len(p.part) != 1: | |
| 175 raise Exception('Denkfehler!') | |
| 176 | |
| 177 return MulPath(p.part[0], p.mod) | |
| 178 else: | |
| 179 return MulPath(p.part, p.mod) | |
| 180 | |
| 181 elif p.name == 'PathEltOrInverse': | |
| 182 if isinstance(p.part, list): | |
| 183 if len(p.part) != 1: | |
| 184 raise Exception('Denkfehler!') | |
| 185 return InvPath(p.part[0]) | |
| 186 else: | |
| 187 return InvPath(p.part) | |
| 188 | |
| 189 elif p.name == 'PathNegatedPropertySet': | |
| 190 if isinstance(p.part, list): | |
| 191 return NegatedPath(AlternativePath(*p.part)) | |
| 192 else: | |
| 193 return NegatedPath(p.part) | |
| 194 | |
| 195 | |
| 196 def translateExists(e): | |
| 197 | |
| 198 """ | |
| 199 Translate the graph pattern used by EXISTS and NOT EXISTS | |
| 200 http://www.w3.org/TR/sparql11-query/#sparqlCollectFilters | |
| 201 """ | |
| 202 | |
| 203 def _c(n): | |
| 204 if isinstance(n, CompValue): | |
| 205 if n.name in ('Builtin_EXISTS', 'Builtin_NOTEXISTS'): | |
| 206 n.graph = translateGroupGraphPattern(n.graph) | |
| 207 if n.graph.name == 'Filter': | |
| 208 # filters inside (NOT) EXISTS can see vars bound outside | |
| 209 n.graph.no_isolated_scope = True | |
| 210 | |
| 211 e = traverse(e, visitPost=_c) | |
| 212 | |
| 213 return e | |
| 214 | |
| 215 | |
| 216 def collectAndRemoveFilters(parts): | |
| 217 | |
| 218 """ | |
| 219 | |
| 220 FILTER expressions apply to the whole group graph pattern in which | |
| 221 they appear. | |
| 222 | |
| 223 http://www.w3.org/TR/sparql11-query/#sparqlCollectFilters | |
| 224 """ | |
| 225 | |
| 226 filters = [] | |
| 227 | |
| 228 i = 0 | |
| 229 while i < len(parts): | |
| 230 p = parts[i] | |
| 231 if p.name == 'Filter': | |
| 232 filters.append(translateExists(p.expr)) | |
| 233 parts.pop(i) | |
| 234 else: | |
| 235 i += 1 | |
| 236 | |
| 237 if filters: | |
| 238 return and_(*filters) | |
| 239 | |
| 240 return None | |
| 241 | |
| 242 | |
| 243 def translateGroupOrUnionGraphPattern(graphPattern): | |
| 244 A = None | |
| 245 | |
| 246 for g in graphPattern.graph: | |
| 247 g = translateGroupGraphPattern(g) | |
| 248 if not A: | |
| 249 A = g | |
| 250 else: | |
| 251 A = Union(A, g) | |
| 252 return A | |
| 253 | |
| 254 | |
| 255 def translateGraphGraphPattern(graphPattern): | |
| 256 return Graph(graphPattern.term, | |
| 257 translateGroupGraphPattern(graphPattern.graph)) | |
| 258 | |
| 259 | |
| 260 def translateInlineData(graphPattern): | |
| 261 return ToMultiSet(translateValues(graphPattern)) | |
| 262 | |
| 263 | |
| 264 def translateGroupGraphPattern(graphPattern): | |
| 265 """ | |
| 266 http://www.w3.org/TR/sparql11-query/#convertGraphPattern | |
| 267 """ | |
| 268 | |
| 269 if graphPattern.name == 'SubSelect': | |
| 270 return ToMultiSet(translate(graphPattern)[0]) | |
| 271 | |
| 272 if not graphPattern.part: | |
| 273 graphPattern.part = [] # empty { } | |
| 274 | |
| 275 filters = collectAndRemoveFilters(graphPattern.part) | |
| 276 | |
| 277 g = [] | |
| 278 for p in graphPattern.part: | |
| 279 if p.name == 'TriplesBlock': | |
| 280 # merge adjacent TripleBlocks | |
| 281 if not (g and g[-1].name == 'BGP'): | |
| 282 g.append(BGP()) | |
| 283 g[-1]["triples"] += triples(p.triples) | |
| 284 else: | |
| 285 g.append(p) | |
| 286 | |
| 287 G = BGP() | |
| 288 for p in g: | |
| 289 if p.name == 'OptionalGraphPattern': | |
| 290 A = translateGroupGraphPattern(p.graph) | |
| 291 if A.name == 'Filter': | |
| 292 G = LeftJoin(G, A.p, A.expr) | |
| 293 else: | |
| 294 G = LeftJoin(G, A, TrueFilter) | |
| 295 elif p.name == 'MinusGraphPattern': | |
| 296 G = Minus(p1=G, p2=translateGroupGraphPattern(p.graph)) | |
| 297 elif p.name == 'GroupOrUnionGraphPattern': | |
| 298 G = Join(p1=G, p2=translateGroupOrUnionGraphPattern(p)) | |
| 299 elif p.name == 'GraphGraphPattern': | |
| 300 G = Join(p1=G, p2=translateGraphGraphPattern(p)) | |
| 301 elif p.name == 'InlineData': | |
| 302 G = Join(p1=G, p2=translateInlineData(p)) | |
| 303 elif p.name == 'ServiceGraphPattern': | |
| 304 G = Join(p1=G, p2=p) | |
| 305 elif p.name in ('BGP', 'Extend'): | |
| 306 G = Join(p1=G, p2=p) | |
| 307 elif p.name == 'Bind': | |
| 308 G = Extend(G, p.expr, p.var) | |
| 309 | |
| 310 else: | |
| 311 raise Exception('Unknown part in GroupGraphPattern: %s - %s' % | |
| 312 (type(p), p.name)) | |
| 313 | |
| 314 if filters: | |
| 315 G = Filter(expr=filters, p=G) | |
| 316 | |
| 317 return G | |
| 318 | |
| 319 | |
| 320 class StopTraversal(Exception): | |
| 321 def __init__(self, rv): | |
| 322 self.rv = rv | |
| 323 | |
| 324 | |
| 325 def _traverse(e, visitPre=lambda n: None, visitPost=lambda n: None): | |
| 326 """ | |
| 327 Traverse a parse-tree, visit each node | |
| 328 | |
| 329 if visit functions return a value, replace current node | |
| 330 """ | |
| 331 _e = visitPre(e) | |
| 332 if _e is not None: | |
| 333 return _e | |
| 334 | |
| 335 if e is None: | |
| 336 return None | |
| 337 | |
| 338 if isinstance(e, (list, ParseResults)): | |
| 339 return [_traverse(x, visitPre, visitPost) for x in e] | |
| 340 elif isinstance(e, tuple): | |
| 341 return tuple([_traverse(x, visitPre, visitPost) for x in e]) | |
| 342 | |
| 343 elif isinstance(e, CompValue): | |
| 344 for k, val in e.items(): | |
| 345 e[k] = _traverse(val, visitPre, visitPost) | |
| 346 | |
| 347 _e = visitPost(e) | |
| 348 if _e is not None: | |
| 349 return _e | |
| 350 | |
| 351 return e | |
| 352 | |
| 353 | |
| 354 def _traverseAgg(e, visitor=lambda n, v: None): | |
| 355 """ | |
| 356 Traverse a parse-tree, visit each node | |
| 357 | |
| 358 if visit functions return a value, replace current node | |
| 359 """ | |
| 360 | |
| 361 res = [] | |
| 362 | |
| 363 if isinstance(e, (list, ParseResults, tuple)): | |
| 364 res = [_traverseAgg(x, visitor) for x in e] | |
| 365 | |
| 366 elif isinstance(e, CompValue): | |
| 367 for k, val in e.items(): | |
| 368 if val != None: | |
| 369 res.append(_traverseAgg(val, visitor)) | |
| 370 | |
| 371 return visitor(e, res) | |
| 372 | |
| 373 | |
| 374 def traverse( | |
| 375 tree, visitPre=lambda n: None, | |
| 376 visitPost=lambda n: None, complete=None): | |
| 377 """ | |
| 378 Traverse tree, visit each node with visit function | |
| 379 visit function may raise StopTraversal to stop traversal | |
| 380 if complete!=None, it is returned on complete traversal, | |
| 381 otherwise the transformed tree is returned | |
| 382 """ | |
| 383 try: | |
| 384 r = _traverse(tree, visitPre, visitPost) | |
| 385 if complete is not None: | |
| 386 return complete | |
| 387 return r | |
| 388 except StopTraversal as st: | |
| 389 return st.rv | |
| 390 | |
| 391 | |
| 392 def _hasAggregate(x): | |
| 393 """ | |
| 394 Traverse parse(sub)Tree | |
| 395 return true if any aggregates are used | |
| 396 """ | |
| 397 | |
| 398 if isinstance(x, CompValue): | |
| 399 if x.name.startswith('Aggregate_'): | |
| 400 raise StopTraversal(True) | |
| 401 | |
| 402 | |
| 403 def _aggs(e, A): | |
| 404 """ | |
| 405 Collect Aggregates in A | |
| 406 replaces aggregates with variable references | |
| 407 """ | |
| 408 | |
| 409 # TODO: nested Aggregates? | |
| 410 | |
| 411 if isinstance(e, CompValue) and e.name.startswith('Aggregate_'): | |
| 412 A.append(e) | |
| 413 aggvar = Variable('__agg_%d__' % len(A)) | |
| 414 e["res"] = aggvar | |
| 415 return aggvar | |
| 416 | |
| 417 | |
| 418 def _findVars(x, res): | |
| 419 """ | |
| 420 Find all variables in a tree | |
| 421 """ | |
| 422 if isinstance(x, Variable): | |
| 423 res.add(x) | |
| 424 if isinstance(x, CompValue): | |
| 425 if x.name == "Bind": | |
| 426 res.add(x.var) | |
| 427 return x # stop recursion and finding vars in the expr | |
| 428 elif x.name == 'SubSelect': | |
| 429 if x.projection: | |
| 430 res.update(v.var or v.evar for v in x.projection) | |
| 431 return x | |
| 432 | |
| 433 | |
| 434 def _addVars(x, children): | |
| 435 """ | |
| 436 find which variables may be bound by this part of the query | |
| 437 """ | |
| 438 if isinstance(x, Variable): | |
| 439 return set([x]) | |
| 440 elif isinstance(x, CompValue): | |
| 441 if x.name == "RelationalExpression": | |
| 442 x["_vars"] = set() | |
| 443 elif x.name == "Extend": | |
| 444 # vars only used in the expr for a bind should not be included | |
| 445 x["_vars"] = reduce(operator.or_, [ child for child,part in zip(children,x) if part!='expr' ], set()) | |
| 446 | |
| 447 else: | |
| 448 x["_vars"] = set(reduce(operator.or_, children, set())) | |
| 449 | |
| 450 if x.name == 'SubSelect': | |
| 451 if x.projection: | |
| 452 s = set(v.var or v.evar for v in x.projection) | |
| 453 else: | |
| 454 s = set() | |
| 455 | |
| 456 return s | |
| 457 | |
| 458 return x["_vars"] | |
| 459 | |
| 460 return reduce(operator.or_, children, set()) | |
| 461 | |
| 462 | |
| 463 def _sample(e, v=None): | |
| 464 """ | |
| 465 For each unaggregated variable V in expr | |
| 466 Replace V with Sample(V) | |
| 467 """ | |
| 468 if isinstance(e, CompValue) and e.name.startswith("Aggregate_"): | |
| 469 return e # do not replace vars in aggregates | |
| 470 if isinstance(e, Variable) and v != e: | |
| 471 return CompValue('Aggregate_Sample', vars=e) | |
| 472 | |
| 473 | |
| 474 def _simplifyFilters(e): | |
| 475 if isinstance(e, Expr): | |
| 476 return simplifyFilters(e) | |
| 477 | |
| 478 | |
| 479 def translateAggregates(q, M): | |
| 480 E = [] | |
| 481 A = [] | |
| 482 | |
| 483 # collect/replace aggs in : | |
| 484 # select expr as ?var | |
| 485 if q.projection: | |
| 486 for v in q.projection: | |
| 487 if v.evar: | |
| 488 v.expr = traverse(v.expr, functools.partial(_sample, v=v.evar)) | |
| 489 v.expr = traverse(v.expr, functools.partial(_aggs, A=A)) | |
| 490 | |
| 491 | |
| 492 # having clause | |
| 493 if traverse(q.having, _hasAggregate, complete=False): | |
| 494 q.having = traverse(q.having, _sample) | |
| 495 traverse(q.having, functools.partial(_aggs, A=A)) | |
| 496 | |
| 497 # order by | |
| 498 if traverse(q.orderby, _hasAggregate, complete=False): | |
| 499 q.orderby = traverse(q.orderby, _sample) | |
| 500 traverse(q.orderby, functools.partial(_aggs, A=A)) | |
| 501 | |
| 502 # sample all other select vars | |
| 503 # TODO: only allowed for vars in group-by? | |
| 504 if q.projection: | |
| 505 for v in q.projection: | |
| 506 if v.var: | |
| 507 rv = Variable('__agg_%d__' % (len(A) + 1)) | |
| 508 A.append(CompValue('Aggregate_Sample', vars=v.var, res=rv)) | |
| 509 E.append((rv, v.var)) | |
| 510 | |
| 511 return CompValue('AggregateJoin', A=A, p=M), E | |
| 512 | |
| 513 | |
| 514 def translateValues(v): | |
| 515 # if len(v.var)!=len(v.value): | |
| 516 # raise Exception("Unmatched vars and values in ValueClause: "+str(v)) | |
| 517 | |
| 518 res = [] | |
| 519 if not v.var: | |
| 520 return res | |
| 521 if not v.value: | |
| 522 return res | |
| 523 if not isinstance(v.value[0], list): | |
| 524 | |
| 525 for val in v.value: | |
| 526 res.append({v.var[0]: val}) | |
| 527 else: | |
| 528 for vals in v.value: | |
| 529 res.append(dict(list(zip(v.var, vals)))) | |
| 530 | |
| 531 return Values(res) | |
| 532 | |
| 533 | |
| 534 def translate(q): | |
| 535 """ | |
| 536 http://www.w3.org/TR/sparql11-query/#convertSolMod | |
| 537 | |
| 538 """ | |
| 539 | |
| 540 _traverse(q, _simplifyFilters) | |
| 541 | |
| 542 q.where = traverse(q.where, visitPost=translatePath) | |
| 543 | |
| 544 # TODO: Var scope test | |
| 545 VS = set() | |
| 546 traverse(q.where, functools.partial(_findVars, res=VS)) | |
| 547 | |
| 548 # all query types have a where part | |
| 549 M = translateGroupGraphPattern(q.where) | |
| 550 | |
| 551 aggregate = False | |
| 552 if q.groupby: | |
| 553 conditions = [] | |
| 554 # convert "GROUP BY (?expr as ?var)" to an Extend | |
| 555 for c in q.groupby.condition: | |
| 556 if isinstance(c, CompValue) and c.name == 'GroupAs': | |
| 557 M = Extend(M, c.expr, c.var) | |
| 558 c = c.var | |
| 559 conditions.append(c) | |
| 560 | |
| 561 M = Group(p=M, expr=conditions) | |
| 562 aggregate = True | |
| 563 elif traverse(q.having, _hasAggregate, complete=False) or \ | |
| 564 traverse(q.orderby, _hasAggregate, complete=False) or \ | |
| 565 any(traverse(x.expr, _hasAggregate, complete=False) | |
| 566 for x in q.projection or [] if x.evar): | |
| 567 # if any aggregate is used, implicit group by | |
| 568 M = Group(p=M) | |
| 569 aggregate = True | |
| 570 | |
| 571 if aggregate: | |
| 572 M, E = translateAggregates(q, M) | |
| 573 else: | |
| 574 E = [] | |
| 575 | |
| 576 # HAVING | |
| 577 if q.having: | |
| 578 M = Filter(expr=and_(*q.having.condition), p=M) | |
| 579 | |
| 580 # VALUES | |
| 581 if q.valuesClause: | |
| 582 M = Join(p1=M, p2=ToMultiSet(translateValues(q.valuesClause))) | |
| 583 | |
| 584 if not q.projection: | |
| 585 # select * | |
| 586 PV = list(VS) | |
| 587 else: | |
| 588 PV = list() | |
| 589 for v in q.projection: | |
| 590 if v.var: | |
| 591 if v not in PV: | |
| 592 PV.append(v.var) | |
| 593 elif v.evar: | |
| 594 if v not in PV: | |
| 595 PV.append(v.evar) | |
| 596 | |
| 597 E.append((v.expr, v.evar)) | |
| 598 else: | |
| 599 raise Exception("I expected a var or evar here!") | |
| 600 | |
| 601 for e, v in E: | |
| 602 M = Extend(M, e, v) | |
| 603 | |
| 604 # ORDER BY | |
| 605 if q.orderby: | |
| 606 M = OrderBy(M, [CompValue('OrderCondition', expr=c.expr, | |
| 607 order=c.order) for c in q.orderby.condition]) | |
| 608 | |
| 609 # PROJECT | |
| 610 M = Project(M, PV) | |
| 611 | |
| 612 if q.modifier: | |
| 613 if q.modifier == 'DISTINCT': | |
| 614 M = CompValue('Distinct', p=M) | |
| 615 elif q.modifier == 'REDUCED': | |
| 616 M = CompValue('Reduced', p=M) | |
| 617 | |
| 618 if q.limitoffset: | |
| 619 offset = 0 | |
| 620 if q.limitoffset.offset != None: | |
| 621 offset = q.limitoffset.offset.toPython() | |
| 622 | |
| 623 if q.limitoffset.limit != None: | |
| 624 M = CompValue('Slice', p=M, start=offset, | |
| 625 length=q.limitoffset.limit.toPython()) | |
| 626 else: | |
| 627 M = CompValue('Slice', p=M, start=offset) | |
| 628 | |
| 629 return M, PV | |
| 630 | |
| 631 | |
| 632 def simplify(n): | |
| 633 """Remove joins to empty BGPs""" | |
| 634 if isinstance(n, CompValue): | |
| 635 if n.name == 'Join': | |
| 636 if n.p1.name == 'BGP' and len(n.p1.triples) == 0: | |
| 637 return n.p2 | |
| 638 if n.p2.name == 'BGP' and len(n.p2.triples) == 0: | |
| 639 return n.p1 | |
| 640 elif n.name == 'BGP': | |
| 641 n["triples"] = reorderTriples(n.triples) | |
| 642 return n | |
| 643 | |
| 644 | |
| 645 def analyse(n, children): | |
| 646 | |
| 647 """ | |
| 648 Some things can be lazily joined. | |
| 649 This propegates whether they can up the tree | |
| 650 and sets lazy flags for all joins | |
| 651 """ | |
| 652 | |
| 653 if isinstance(n, CompValue): | |
| 654 if n.name == 'Join': | |
| 655 n["lazy"] = all(children) | |
| 656 return False | |
| 657 elif n.name in ('Slice', 'Distinct'): | |
| 658 return False | |
| 659 else: | |
| 660 return all(children) | |
| 661 else: | |
| 662 return True | |
| 663 | |
| 664 | |
| 665 def translatePrologue(p, base, initNs=None, prologue=None): | |
| 666 | |
| 667 if prologue is None: | |
| 668 prologue = Prologue() | |
| 669 prologue.base = "" | |
| 670 if base: | |
| 671 prologue.base = base | |
| 672 if initNs: | |
| 673 for k, v in initNs.items(): | |
| 674 prologue.bind(k, v) | |
| 675 | |
| 676 for x in p: | |
| 677 if x.name == 'Base': | |
| 678 prologue.base = x.iri | |
| 679 elif x.name == 'PrefixDecl': | |
| 680 prologue.bind(x.prefix, prologue.absolutize(x.iri)) | |
| 681 | |
| 682 return prologue | |
| 683 | |
| 684 | |
| 685 def translateQuads(quads): | |
| 686 if quads.triples: | |
| 687 alltriples = triples(quads.triples) | |
| 688 else: | |
| 689 alltriples = [] | |
| 690 | |
| 691 allquads = collections.defaultdict(list) | |
| 692 | |
| 693 if quads.quadsNotTriples: | |
| 694 for q in quads.quadsNotTriples: | |
| 695 if q.triples: | |
| 696 allquads[q.term] += triples(q.triples) | |
| 697 | |
| 698 return alltriples, allquads | |
| 699 | |
| 700 | |
| 701 def translateUpdate1(u, prologue): | |
| 702 if u.name in ('Load', 'Clear', 'Drop', 'Create'): | |
| 703 pass # no translation needed | |
| 704 elif u.name in ('Add', 'Move', 'Copy'): | |
| 705 pass | |
| 706 elif u.name in ('InsertData', 'DeleteData', 'DeleteWhere'): | |
| 707 t, q = translateQuads(u.quads) | |
| 708 u["quads"] = q | |
| 709 u["triples"] = t | |
| 710 if u.name in ('DeleteWhere', 'DeleteData'): | |
| 711 pass # TODO: check for bnodes in triples | |
| 712 elif u.name == 'Modify': | |
| 713 if u.delete: | |
| 714 u.delete["triples"], u.delete[ | |
| 715 "quads"] = translateQuads(u.delete.quads) | |
| 716 if u.insert: | |
| 717 u.insert["triples"], u.insert[ | |
| 718 "quads"] = translateQuads(u.insert.quads) | |
| 719 u["where"] = translateGroupGraphPattern(u.where) | |
| 720 else: | |
| 721 raise Exception('Unknown type of update operation: %s' % u) | |
| 722 | |
| 723 u.prologue = prologue | |
| 724 return u | |
| 725 | |
| 726 | |
| 727 def translateUpdate(q, base=None, initNs=None): | |
| 728 """ | |
| 729 Returns a list of SPARQL Update Algebra expressions | |
| 730 """ | |
| 731 | |
| 732 res = [] | |
| 733 prologue = None | |
| 734 if not q.request: | |
| 735 return res | |
| 736 for p, u in zip(q.prologue, q.request): | |
| 737 prologue = translatePrologue(p, base, initNs, prologue) | |
| 738 | |
| 739 # absolutize/resolve prefixes | |
| 740 u = traverse( | |
| 741 u, visitPost=functools.partial(translatePName, prologue=prologue)) | |
| 742 u = _traverse(u, _simplifyFilters) | |
| 743 | |
| 744 u = traverse(u, visitPost=translatePath) | |
| 745 | |
| 746 res.append(translateUpdate1(u, prologue)) | |
| 747 | |
| 748 return res | |
| 749 | |
| 750 | |
| 751 def translateQuery(q, base=None, initNs=None): | |
| 752 """ | |
| 753 Translate a query-parsetree to a SPARQL Algebra Expression | |
| 754 | |
| 755 Return a rdflib.plugins.sparql.sparql.Query object | |
| 756 """ | |
| 757 | |
| 758 # We get in: (prologue, query) | |
| 759 | |
| 760 prologue = translatePrologue(q[0], base, initNs) | |
| 761 | |
| 762 # absolutize/resolve prefixes | |
| 763 q[1] = traverse( | |
| 764 q[1], visitPost=functools.partial(translatePName, prologue=prologue)) | |
| 765 | |
| 766 P, PV = translate(q[1]) | |
| 767 datasetClause = q[1].datasetClause | |
| 768 if q[1].name == 'ConstructQuery': | |
| 769 | |
| 770 template = triples(q[1].template) if q[1].template else None | |
| 771 | |
| 772 res = CompValue(q[1].name, p=P, | |
| 773 template=template, | |
| 774 datasetClause=datasetClause) | |
| 775 else: | |
| 776 res = CompValue(q[1].name, p=P, datasetClause=datasetClause, PV=PV) | |
| 777 | |
| 778 res = traverse(res, visitPost=simplify) | |
| 779 _traverseAgg(res, visitor=analyse) | |
| 780 _traverseAgg(res, _addVars) | |
| 781 | |
| 782 return Query(prologue, res) | |
| 783 | |
| 784 | |
| 785 def pprintAlgebra(q): | |
| 786 def pp(p, ind=" "): | |
| 787 # if isinstance(p, list): | |
| 788 # print "[ " | |
| 789 # for x in p: pp(x,ind) | |
| 790 # print "%s ]"%ind | |
| 791 # return | |
| 792 if not isinstance(p, CompValue): | |
| 793 print(p) | |
| 794 return | |
| 795 print("%s(" % (p.name, )) | |
| 796 for k in p: | |
| 797 print("%s%s =" % (ind, k,), end=' ') | |
| 798 pp(p[k], ind + " ") | |
| 799 print("%s)" % ind) | |
| 800 | |
| 801 try: | |
| 802 pp(q.algebra) | |
| 803 except AttributeError: | |
| 804 # it's update, just a list | |
| 805 for x in q: | |
| 806 pp(x) | |
| 807 | |
| 808 if __name__ == '__main__': | |
| 809 import sys | |
| 810 from rdflib.plugins.sparql import parser | |
| 811 import os.path | |
| 812 | |
| 813 if os.path.exists(sys.argv[1]): | |
| 814 q = file(sys.argv[1]) | |
| 815 else: | |
| 816 q = sys.argv[1] | |
| 817 | |
| 818 pq = parser.parseQuery(q) | |
| 819 print(pq) | |
| 820 tq = translateQuery(pq) | |
| 821 print(pprintAlgebra(tq)) |
