comparison planemo/lib/python3.7/site-packages/rdflib/plugins/sparql/results/tsvresults.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1
2 """
3 This implements the Tab Separated SPARQL Result Format
4
5 It is implemented with pyparsing, reusing the elements from the SPARQL Parser
6 """
7
8 import codecs
9
10 from pyparsing import (
11 Optional, ZeroOrMore, Literal, ParserElement, ParseException, Suppress,
12 FollowedBy, LineEnd)
13
14 from rdflib.query import Result, ResultParser
15
16 from rdflib.plugins.sparql.parser import (
17 Var, STRING_LITERAL1, STRING_LITERAL2, IRIREF, BLANK_NODE_LABEL,
18 NumericLiteral, BooleanLiteral, LANGTAG)
19 from rdflib.plugins.sparql.parserutils import Comp, Param, CompValue
20
21 from rdflib import Literal as RDFLiteral
22
23 from rdflib.py3compat import bytestype
24
25 ParserElement.setDefaultWhitespaceChars(" \n")
26
27
28 String = STRING_LITERAL1 | STRING_LITERAL2
29
30 RDFLITERAL = Comp('literal', Param('string', String) + Optional(
31 Param('lang', LANGTAG.leaveWhitespace()
32 ) | Literal('^^').leaveWhitespace(
33 ) + Param('datatype', IRIREF).leaveWhitespace()))
34
35 NONE_VALUE = object()
36
37 EMPTY = FollowedBy(LineEnd()) | FollowedBy("\t")
38 EMPTY.setParseAction(lambda x: NONE_VALUE)
39
40 TERM = RDFLITERAL | IRIREF | BLANK_NODE_LABEL | NumericLiteral | BooleanLiteral
41
42 ROW = (EMPTY | TERM) + ZeroOrMore(Suppress("\t") + (EMPTY | TERM))
43 ROW.parseWithTabs()
44
45 HEADER = Var + ZeroOrMore(Suppress("\t") + Var)
46 HEADER.parseWithTabs()
47
48
49 class TSVResultParser(ResultParser):
50 def parse(self, source):
51
52 if isinstance(source.read(0), bytestype):
53 # if reading from source returns bytes do utf-8 decoding
54 source = codecs.getreader('utf-8')(source)
55
56 try:
57 r = Result('SELECT')
58
59 header = source.readline()
60
61 r.vars = list(HEADER.parseString(header.strip(), parseAll=True))
62 r.bindings = []
63 while True:
64 line = source.readline()
65 if not line:
66 break
67 line = line.strip('\n')
68 if line == "":
69 continue
70
71 row = ROW.parseString(line, parseAll=True)
72 r.bindings.append(
73 dict(list(zip(r.vars, (self.convertTerm(x) for x in row)))))
74
75 return r
76
77 except ParseException as err:
78 print(err.line)
79 print(" " * (err.column - 1) + "^")
80 print(err)
81
82 def convertTerm(self, t):
83 if t is NONE_VALUE:
84 return None
85 if isinstance(t, CompValue):
86 if t.name == 'literal':
87 return RDFLiteral(t.string, lang=t.lang, datatype=t.datatype)
88 else:
89 raise Exception("I dont know how to handle this: %s" % (t,))
90 else:
91 return t
92
93 if __name__ == '__main__':
94 import sys
95 r = Result.parse(file(sys.argv[1]), format='tsv')
96 print(r.vars)
97 print(r.bindings)
98 # print r.serialize(format='json')