Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/rdflib/plugins/sparql/evaluate.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 """ | |
| 2 These method recursively evaluate the SPARQL Algebra | |
| 3 | |
| 4 evalQuery is the entry-point, it will setup context and | |
| 5 return the SPARQLResult object | |
| 6 | |
| 7 evalPart is called on each level and will delegate to the right method | |
| 8 | |
| 9 A rdflib.plugins.sparql.sparql.QueryContext is passed along, keeping | |
| 10 information needed for evaluation | |
| 11 | |
| 12 A list of dicts (solution mappings) is returned, apart from GroupBy which may | |
| 13 also return a dict of list of dicts | |
| 14 | |
| 15 """ | |
| 16 | |
| 17 import collections | |
| 18 | |
| 19 from rdflib import Variable, Graph, BNode, URIRef, Literal | |
| 20 | |
| 21 from rdflib.plugins.sparql import CUSTOM_EVALS | |
| 22 from rdflib.plugins.sparql.parserutils import value | |
| 23 from rdflib.plugins.sparql.sparql import ( | |
| 24 QueryContext, AlreadyBound, FrozenBindings, SPARQLError) | |
| 25 from rdflib.plugins.sparql.evalutils import ( | |
| 26 _filter, _eval, _join, _diff, _minus, _fillTemplate, _ebv, _val) | |
| 27 | |
| 28 from rdflib.plugins.sparql.aggregates import Aggregator | |
| 29 from rdflib.plugins.sparql.algebra import Join, ToMultiSet, Values | |
| 30 | |
| 31 def evalBGP(ctx, bgp): | |
| 32 | |
| 33 """ | |
| 34 A basic graph pattern | |
| 35 """ | |
| 36 | |
| 37 if not bgp: | |
| 38 yield ctx.solution() | |
| 39 return | |
| 40 | |
| 41 s, p, o = bgp[0] | |
| 42 | |
| 43 _s = ctx[s] | |
| 44 _p = ctx[p] | |
| 45 _o = ctx[o] | |
| 46 | |
| 47 for ss, sp, so in ctx.graph.triples((_s, _p, _o)): | |
| 48 if None in (_s, _p, _o): | |
| 49 c = ctx.push() | |
| 50 else: | |
| 51 c = ctx | |
| 52 | |
| 53 if _s is None: | |
| 54 c[s] = ss | |
| 55 | |
| 56 try: | |
| 57 if _p is None: | |
| 58 c[p] = sp | |
| 59 except AlreadyBound: | |
| 60 continue | |
| 61 | |
| 62 try: | |
| 63 if _o is None: | |
| 64 c[o] = so | |
| 65 except AlreadyBound: | |
| 66 continue | |
| 67 | |
| 68 for x in evalBGP(c, bgp[1:]): | |
| 69 yield x | |
| 70 | |
| 71 | |
| 72 def evalExtend(ctx, extend): | |
| 73 # TODO: Deal with dict returned from evalPart from GROUP BY | |
| 74 | |
| 75 for c in evalPart(ctx, extend.p): | |
| 76 try: | |
| 77 e = _eval(extend.expr, c.forget(ctx, _except=extend._vars)) | |
| 78 if isinstance(e, SPARQLError): | |
| 79 raise e | |
| 80 | |
| 81 yield c.merge({extend.var: e}) | |
| 82 | |
| 83 except SPARQLError: | |
| 84 yield c | |
| 85 | |
| 86 | |
| 87 def evalLazyJoin(ctx, join): | |
| 88 """ | |
| 89 A lazy join will push the variables bound | |
| 90 in the first part to the second part, | |
| 91 essentially doing the join implicitly | |
| 92 hopefully evaluating much fewer triples | |
| 93 """ | |
| 94 for a in evalPart(ctx, join.p1): | |
| 95 c = ctx.thaw(a) | |
| 96 for b in evalPart(c, join.p2): | |
| 97 yield b.merge(a) # merge, as some bindings may have been forgotten | |
| 98 | |
| 99 | |
| 100 def evalJoin(ctx, join): | |
| 101 | |
| 102 # TODO: Deal with dict returned from evalPart from GROUP BY | |
| 103 # only ever for join.p1 | |
| 104 | |
| 105 if join.lazy: | |
| 106 return evalLazyJoin(ctx, join) | |
| 107 else: | |
| 108 a = evalPart(ctx, join.p1) | |
| 109 b = set(evalPart(ctx, join.p2)) | |
| 110 return _join(a, b) | |
| 111 | |
| 112 | |
| 113 def evalUnion(ctx, union): | |
| 114 res = set() | |
| 115 for x in evalPart(ctx, union.p1): | |
| 116 res.add(x) | |
| 117 yield x | |
| 118 for x in evalPart(ctx, union.p2): | |
| 119 if x not in res: | |
| 120 yield x | |
| 121 | |
| 122 | |
| 123 def evalMinus(ctx, minus): | |
| 124 a = evalPart(ctx, minus.p1) | |
| 125 b = set(evalPart(ctx, minus.p2)) | |
| 126 return _minus(a, b) | |
| 127 | |
| 128 | |
| 129 def evalLeftJoin(ctx, join): | |
| 130 # import pdb; pdb.set_trace() | |
| 131 for a in evalPart(ctx, join.p1): | |
| 132 ok = False | |
| 133 c = ctx.thaw(a) | |
| 134 for b in evalPart(c, join.p2): | |
| 135 if _ebv(join.expr, b.forget(ctx)): | |
| 136 ok = True | |
| 137 yield b | |
| 138 if not ok: | |
| 139 # we've cheated, the ctx above may contain | |
| 140 # vars bound outside our scope | |
| 141 # before we yield a solution without the OPTIONAL part | |
| 142 # check that we would have had no OPTIONAL matches | |
| 143 # even without prior bindings... | |
| 144 p1_vars = join.p1._vars | |
| 145 if p1_vars is None \ | |
| 146 or not any(_ebv(join.expr, b) for b in | |
| 147 evalPart(ctx.thaw(a.remember(p1_vars)), join.p2)): | |
| 148 | |
| 149 yield a | |
| 150 | |
| 151 | |
| 152 def evalFilter(ctx, part): | |
| 153 # TODO: Deal with dict returned from evalPart! | |
| 154 for c in evalPart(ctx, part.p): | |
| 155 if _ebv(part.expr, c.forget(ctx, _except=part._vars) if not part.no_isolated_scope else c): | |
| 156 yield c | |
| 157 | |
| 158 | |
| 159 def evalGraph(ctx, part): | |
| 160 | |
| 161 if ctx.dataset is None: | |
| 162 raise Exception( | |
| 163 "Non-conjunctive-graph doesn't know about " + | |
| 164 "graphs. Try a query without GRAPH.") | |
| 165 | |
| 166 ctx = ctx.clone() | |
| 167 graph = ctx[part.term] | |
| 168 if graph is None: | |
| 169 | |
| 170 for graph in ctx.dataset.contexts(): | |
| 171 | |
| 172 # in SPARQL the default graph is NOT a named graph | |
| 173 if graph == ctx.dataset.default_context: | |
| 174 continue | |
| 175 | |
| 176 c = ctx.pushGraph(graph) | |
| 177 c = c.push() | |
| 178 graphSolution = [{part.term: graph.identifier}] | |
| 179 for x in _join(evalPart(c, part.p), graphSolution): | |
| 180 yield x | |
| 181 | |
| 182 else: | |
| 183 c = ctx.pushGraph(ctx.dataset.get_context(graph)) | |
| 184 for x in evalPart(c, part.p): | |
| 185 yield x | |
| 186 | |
| 187 | |
| 188 def evalValues(ctx, part): | |
| 189 for r in part.p.res: | |
| 190 c = ctx.push() | |
| 191 try: | |
| 192 for k, v in r.items(): | |
| 193 if v != 'UNDEF': | |
| 194 c[k] = v | |
| 195 except AlreadyBound: | |
| 196 continue | |
| 197 | |
| 198 yield c.solution() | |
| 199 | |
| 200 | |
| 201 def evalMultiset(ctx, part): | |
| 202 | |
| 203 if part.p.name == 'values': | |
| 204 return evalValues(ctx, part) | |
| 205 | |
| 206 return evalPart(ctx, part.p) | |
| 207 | |
| 208 | |
| 209 def evalPart(ctx, part): | |
| 210 | |
| 211 # try custom evaluation functions | |
| 212 for name, c in list(CUSTOM_EVALS.items()): | |
| 213 try: | |
| 214 return c(ctx, part) | |
| 215 except NotImplementedError: | |
| 216 pass # the given custome-function did not handle this part | |
| 217 | |
| 218 if part.name == 'BGP': | |
| 219 # Reorder triples patterns by number of bound nodes in the current ctx | |
| 220 # Do patterns with more bound nodes first | |
| 221 triples = sorted(part.triples, key=lambda t: len([n for n in t if ctx[n] is None])) | |
| 222 | |
| 223 return evalBGP(ctx, triples) | |
| 224 elif part.name == 'Filter': | |
| 225 return evalFilter(ctx, part) | |
| 226 elif part.name == 'Join': | |
| 227 return evalJoin(ctx, part) | |
| 228 elif part.name == 'LeftJoin': | |
| 229 return evalLeftJoin(ctx, part) | |
| 230 elif part.name == 'Graph': | |
| 231 return evalGraph(ctx, part) | |
| 232 elif part.name == 'Union': | |
| 233 return evalUnion(ctx, part) | |
| 234 elif part.name == 'ToMultiSet': | |
| 235 return evalMultiset(ctx, part) | |
| 236 elif part.name == 'Extend': | |
| 237 return evalExtend(ctx, part) | |
| 238 elif part.name == 'Minus': | |
| 239 return evalMinus(ctx, part) | |
| 240 | |
| 241 elif part.name == 'Project': | |
| 242 return evalProject(ctx, part) | |
| 243 elif part.name == 'Slice': | |
| 244 return evalSlice(ctx, part) | |
| 245 elif part.name == 'Distinct': | |
| 246 return evalDistinct(ctx, part) | |
| 247 elif part.name == 'Reduced': | |
| 248 return evalReduced(ctx, part) | |
| 249 | |
| 250 elif part.name == 'OrderBy': | |
| 251 return evalOrderBy(ctx, part) | |
| 252 elif part.name == 'Group': | |
| 253 return evalGroup(ctx, part) | |
| 254 elif part.name == 'AggregateJoin': | |
| 255 return evalAggregateJoin(ctx, part) | |
| 256 | |
| 257 elif part.name == 'SelectQuery': | |
| 258 return evalSelectQuery(ctx, part) | |
| 259 elif part.name == 'AskQuery': | |
| 260 return evalAskQuery(ctx, part) | |
| 261 elif part.name == 'ConstructQuery': | |
| 262 return evalConstructQuery(ctx, part) | |
| 263 | |
| 264 elif part.name == 'ServiceGraphPattern': | |
| 265 raise Exception('ServiceGraphPattern not implemented') | |
| 266 | |
| 267 elif part.name == 'DescribeQuery': | |
| 268 raise Exception('DESCRIBE not implemented') | |
| 269 | |
| 270 else: | |
| 271 # import pdb ; pdb.set_trace() | |
| 272 raise Exception('I dont know: %s' % part.name) | |
| 273 | |
| 274 | |
| 275 def evalGroup(ctx, group): | |
| 276 | |
| 277 """ | |
| 278 http://www.w3.org/TR/sparql11-query/#defn_algGroup | |
| 279 """ | |
| 280 # grouping should be implemented by evalAggregateJoin | |
| 281 return evalPart(ctx, group.p) | |
| 282 | |
| 283 | |
| 284 def evalAggregateJoin(ctx, agg): | |
| 285 # import pdb ; pdb.set_trace() | |
| 286 p = evalPart(ctx, agg.p) | |
| 287 # p is always a Group, we always get a dict back | |
| 288 | |
| 289 group_expr = agg.p.expr | |
| 290 res = collections.defaultdict(lambda: Aggregator(aggregations=agg.A)) | |
| 291 | |
| 292 if group_expr is None: | |
| 293 # no grouping, just COUNT in SELECT clause | |
| 294 # get 1 aggregator for counting | |
| 295 aggregator = res[True] | |
| 296 for row in p: | |
| 297 aggregator.update(row) | |
| 298 else: | |
| 299 for row in p: | |
| 300 # determine right group aggregator for row | |
| 301 k = tuple(_eval(e, row, False) for e in group_expr) | |
| 302 res[k].update(row) | |
| 303 | |
| 304 # all rows are done; yield aggregated values | |
| 305 for aggregator in res.values(): | |
| 306 yield FrozenBindings(ctx, aggregator.get_bindings()) | |
| 307 | |
| 308 # there were no matches | |
| 309 if len(res) == 0: | |
| 310 yield FrozenBindings(ctx) | |
| 311 | |
| 312 | |
| 313 def evalOrderBy(ctx, part): | |
| 314 | |
| 315 res = evalPart(ctx, part.p) | |
| 316 | |
| 317 for e in reversed(part.expr): | |
| 318 | |
| 319 reverse = bool(e.order and e.order == 'DESC') | |
| 320 res = sorted(res, key=lambda x: _val(value(x, e.expr, variables=True)), reverse=reverse) | |
| 321 | |
| 322 return res | |
| 323 | |
| 324 | |
| 325 def evalSlice(ctx, slice): | |
| 326 # import pdb; pdb.set_trace() | |
| 327 res = evalPart(ctx, slice.p) | |
| 328 i = 0 | |
| 329 while i < slice.start: | |
| 330 next(res) | |
| 331 i += 1 | |
| 332 i = 0 | |
| 333 for x in res: | |
| 334 i += 1 | |
| 335 if slice.length is None: | |
| 336 yield x | |
| 337 else: | |
| 338 if i <= slice.length: | |
| 339 yield x | |
| 340 else: | |
| 341 break | |
| 342 | |
| 343 | |
| 344 def evalReduced(ctx, part): | |
| 345 """apply REDUCED to result | |
| 346 | |
| 347 REDUCED is not as strict as DISTINCT, but if the incoming rows were sorted | |
| 348 it should produce the same result with limited extra memory and time per | |
| 349 incoming row. | |
| 350 """ | |
| 351 | |
| 352 # This implementation uses a most recently used strategy and a limited | |
| 353 # buffer size. It relates to a LRU caching algorithm: | |
| 354 # https://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used_.28LRU.29 | |
| 355 MAX = 1 | |
| 356 # TODO: add configuration or determine "best" size for most use cases | |
| 357 # 0: No reduction | |
| 358 # 1: compare only with the last row, almost no reduction with | |
| 359 # unordered incoming rows | |
| 360 # N: The greater the buffer size the greater the reduction but more | |
| 361 # memory and time are needed | |
| 362 | |
| 363 # mixed data structure: set for lookup, deque for append/pop/remove | |
| 364 mru_set = set() | |
| 365 mru_queue = collections.deque() | |
| 366 | |
| 367 for row in evalPart(ctx, part.p): | |
| 368 if row in mru_set: | |
| 369 # forget last position of row | |
| 370 mru_queue.remove(row) | |
| 371 else: | |
| 372 #row seems to be new | |
| 373 yield row | |
| 374 mru_set.add(row) | |
| 375 if len(mru_set) > MAX: | |
| 376 # drop the least recently used row from buffer | |
| 377 mru_set.remove(mru_queue.pop()) | |
| 378 # put row to the front | |
| 379 mru_queue.appendleft(row) | |
| 380 | |
| 381 | |
| 382 def evalDistinct(ctx, part): | |
| 383 res = evalPart(ctx, part.p) | |
| 384 | |
| 385 done = set() | |
| 386 for x in res: | |
| 387 if x not in done: | |
| 388 yield x | |
| 389 done.add(x) | |
| 390 | |
| 391 | |
| 392 def evalProject(ctx, project): | |
| 393 res = evalPart(ctx, project.p) | |
| 394 | |
| 395 return (row.project(project.PV) for row in res) | |
| 396 | |
| 397 | |
| 398 def evalSelectQuery(ctx, query): | |
| 399 | |
| 400 res = {} | |
| 401 res["type_"] = "SELECT" | |
| 402 res["bindings"] = evalPart(ctx, query.p) | |
| 403 res["vars_"] = query.PV | |
| 404 return res | |
| 405 | |
| 406 | |
| 407 def evalAskQuery(ctx, query): | |
| 408 res = {} | |
| 409 res["type_"] = "ASK" | |
| 410 res["askAnswer"] = False | |
| 411 for x in evalPart(ctx, query.p): | |
| 412 res["askAnswer"] = True | |
| 413 break | |
| 414 | |
| 415 return res | |
| 416 | |
| 417 | |
| 418 def evalConstructQuery(ctx, query): | |
| 419 template = query.template | |
| 420 | |
| 421 if not template: | |
| 422 # a construct-where query | |
| 423 template = query.p.p.triples # query->project->bgp ... | |
| 424 | |
| 425 graph = Graph() | |
| 426 | |
| 427 for c in evalPart(ctx, query.p): | |
| 428 graph += _fillTemplate(template, c) | |
| 429 | |
| 430 res = {} | |
| 431 res["type_"] = "CONSTRUCT" | |
| 432 res["graph"] = graph | |
| 433 | |
| 434 return res | |
| 435 | |
| 436 | |
| 437 def evalQuery(graph, query, initBindings, base=None): | |
| 438 | |
| 439 initBindings = dict( ( Variable(k),v ) for k,v in initBindings.items() ) | |
| 440 | |
| 441 ctx = QueryContext(graph, initBindings=initBindings) | |
| 442 | |
| 443 ctx.prologue = query.prologue | |
| 444 main = query.algebra | |
| 445 | |
| 446 if main.datasetClause: | |
| 447 if ctx.dataset is None: | |
| 448 raise Exception( | |
| 449 "Non-conjunctive-graph doesn't know about " + | |
| 450 "graphs! Try a query without FROM (NAMED).") | |
| 451 | |
| 452 ctx = ctx.clone() # or push/pop? | |
| 453 | |
| 454 firstDefault = False | |
| 455 for d in main.datasetClause: | |
| 456 if d.default: | |
| 457 | |
| 458 if firstDefault: | |
| 459 # replace current default graph | |
| 460 dg = ctx.dataset.get_context(BNode()) | |
| 461 ctx = ctx.pushGraph(dg) | |
| 462 firstDefault = True | |
| 463 | |
| 464 ctx.load(d.default, default=True) | |
| 465 | |
| 466 elif d.named: | |
| 467 g = d.named | |
| 468 ctx.load(g, default=False) | |
| 469 | |
| 470 return evalPart(ctx, main) |
