comparison env/lib/python3.9/site-packages/rdflib_jsonld/parser.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # -*- coding: utf-8 -*-
2 """
3 This parser will interpret a JSON-LD document as an RDF Graph. See:
4
5 http://json-ld.org/
6
7 Example usage::
8
9 >>> from rdflib.plugin import register, Parser
10 >>> register('json-ld', Parser, 'rdflib_jsonld.parser', 'JsonLDParser')
11
12 >>> from rdflib import Graph, URIRef, Literal
13 >>> test_json = '''
14 ... {
15 ... "@context": {
16 ... "dc": "http://purl.org/dc/terms/",
17 ... "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
18 ... "rdfs": "http://www.w3.org/2000/01/rdf-schema#"
19 ... },
20 ... "@id": "http://example.org/about",
21 ... "dc:title": {
22 ... "@language": "en",
23 ... "@value": "Someone's Homepage"
24 ... }
25 ... }
26 ... '''
27 >>> g = Graph().parse(data=test_json, format='json-ld')
28 >>> list(g) == [(URIRef('http://example.org/about'),
29 ... URIRef('http://purl.org/dc/terms/title'),
30 ... Literal("Someone's Homepage", lang='en'))]
31 True
32
33 """
34 # NOTE: This code reads the entire JSON object into memory before parsing, but
35 # we should consider streaming the input to deal with arbitrarily large graphs.
36
37 import warnings
38 from rdflib.graph import ConjunctiveGraph
39 from rdflib.parser import Parser, URLInputSource
40 from rdflib.namespace import RDF, XSD
41 from rdflib.term import URIRef, BNode, Literal
42
43 from ._compat import str, str
44 from .context import Context, Term, UNDEF
45 from .util import source_to_json, VOCAB_DELIMS, context_from_urlinputsource
46 from .keys import CONTEXT, GRAPH, ID, INDEX, LANG, LIST, REV, SET, TYPE, VALUE, VOCAB
47
48 __all__ = ['JsonLDParser', 'to_rdf']
49
50
51 # Add jsonld suffix so RDFLib can guess format from file name
52 try:
53 from rdflib.util import SUFFIX_FORMAT_MAP
54 if 'jsonld' not in SUFFIX_FORMAT_MAP:
55 SUFFIX_FORMAT_MAP['jsonld'] = 'application/ld+json'
56 except ImportError:
57 pass
58
59
60 TYPE_TERM = Term(str(RDF.type), TYPE, VOCAB)
61
62 ALLOW_LISTS_OF_LISTS = True # NOTE: Not allowed in JSON-LD 1.0
63
64
65 class JsonLDParser(Parser):
66 def __init__(self):
67 super(JsonLDParser, self).__init__()
68
69 def parse(self, source, sink, **kwargs):
70 # TODO: docstring w. args and return value
71 encoding = kwargs.get('encoding') or 'utf-8'
72 if encoding not in ('utf-8', 'utf-16'):
73 warnings.warn("JSON should be encoded as unicode. " +
74 "Given encoding was: %s" % encoding)
75
76 base = kwargs.get('base') or sink.absolutize(
77 source.getPublicId() or source.getSystemId() or "")
78 context_data = kwargs.get('context')
79 if not context_data and isinstance(source, URLInputSource):
80 context_data = context_from_urlinputsource(source)
81 produce_generalized_rdf = kwargs.get('produce_generalized_rdf', False)
82
83 data = source_to_json(source)
84
85 # NOTE: A ConjunctiveGraph parses into a Graph sink, so no sink will be
86 # context_aware. Keeping this check in case RDFLib is changed, or
87 # someone passes something context_aware to this parser directly.
88 if not sink.context_aware:
89 conj_sink = ConjunctiveGraph(
90 store=sink.store,
91 identifier=sink.identifier)
92 else:
93 conj_sink = sink
94
95 to_rdf(data, conj_sink, base, context_data)
96
97
98 def to_rdf(data, dataset, base=None, context_data=None,
99 produce_generalized_rdf=False,
100 allow_lists_of_lists=None):
101 # TODO: docstring w. args and return value
102 context=Context(base=base)
103 if context_data:
104 context.load(context_data)
105 parser = Parser(generalized_rdf=produce_generalized_rdf,
106 allow_lists_of_lists=allow_lists_of_lists)
107 return parser.parse(data, context, dataset)
108
109
110 class Parser(object):
111
112 def __init__(self, generalized_rdf=False, allow_lists_of_lists=None):
113 self.generalized_rdf = generalized_rdf
114 self.allow_lists_of_lists = (allow_lists_of_lists
115 if allow_lists_of_lists is not None else ALLOW_LISTS_OF_LISTS)
116
117 def parse(self, data, context, dataset):
118 topcontext = False
119
120 if isinstance(data, list):
121 resources = data
122 elif isinstance(data, dict):
123 l_ctx = data.get(CONTEXT)
124 if l_ctx:
125 context.load(l_ctx, context.base)
126 topcontext = True
127 resources = data
128 if not isinstance(resources, list):
129 resources = [resources]
130
131 if context.vocab:
132 dataset.bind(None, context.vocab)
133 for name, term in list(context.terms.items()):
134 if term.id and term.id.endswith(VOCAB_DELIMS):
135 dataset.bind(name, term.id)
136
137 graph = dataset.default_context if dataset.context_aware else dataset
138
139 for node in resources:
140 self._add_to_graph(dataset, graph, context, node, topcontext)
141
142 return graph
143
144
145 def _add_to_graph(self, dataset, graph, context, node, topcontext=False):
146 if not isinstance(node, dict) or context.get_value(node):
147 return
148
149 if CONTEXT in node and not topcontext:
150 l_ctx = node.get(CONTEXT)
151 if l_ctx:
152 context = context.subcontext(l_ctx)
153 else:
154 context = Context(base=context.doc_base)
155
156 id_val = context.get_id(node)
157 if isinstance(id_val, str):
158 subj = self._to_rdf_id(context, id_val)
159 else:
160 subj = BNode()
161
162 if subj is None:
163 return None
164
165 # NOTE: crude way to signify that this node might represent a named graph
166 no_id = id_val is None
167
168 for key, obj in list(node.items()):
169 if key in (CONTEXT, ID) or key in context.get_keys(ID):
170 continue
171 if key == REV or key in context.get_keys(REV):
172 for rkey, robj in list(obj.items()):
173 self._key_to_graph(dataset, graph, context, subj, rkey, robj,
174 reverse=True, no_id=no_id)
175 else:
176 self._key_to_graph(dataset, graph, context, subj, key, obj,
177 no_id=no_id)
178
179 return subj
180
181
182 def _key_to_graph(self, dataset, graph, context, subj, key, obj,
183 reverse=False, no_id=False):
184
185 if isinstance(obj, list):
186 obj_nodes = obj
187 else:
188 obj_nodes = [obj]
189
190 term = context.terms.get(key)
191 if term:
192 term_id = term.id
193 if term.container == LIST:
194 obj_nodes = [{LIST: obj_nodes}]
195 elif isinstance(obj, dict):
196 if term.container == INDEX:
197 obj_nodes = []
198 for values in list(obj.values()):
199 if not isinstance(values, list):
200 obj_nodes.append(values)
201 else:
202 obj_nodes += values
203 elif term.container == LANG:
204 obj_nodes = []
205 for lang, values in list(obj.items()):
206 if not isinstance(values, list):
207 values = [values]
208 for v in values:
209 obj_nodes.append((v, lang))
210 else:
211 term_id = None
212
213 if TYPE in (key, term_id):
214 term = TYPE_TERM
215 elif GRAPH in (key, term_id):
216 if dataset.context_aware and not no_id:
217 subgraph = dataset.get_context(subj)
218 else:
219 subgraph = graph
220 for onode in obj_nodes:
221 self._add_to_graph(dataset, subgraph, context, onode)
222 return
223 elif SET in (key, term_id):
224 for onode in obj_nodes:
225 self._add_to_graph(dataset, graph, context, onode)
226 return
227
228 pred_uri = term.id if term else context.expand(key)
229
230 flattened = []
231 for obj in obj_nodes:
232 if isinstance(obj, dict):
233 objs = context.get_set(obj)
234 if objs is not None:
235 obj = objs
236 if isinstance(obj, list):
237 flattened += obj
238 continue
239 flattened.append(obj)
240 obj_nodes = flattened
241
242 if not pred_uri:
243 return
244
245 if term and term.reverse:
246 reverse = not reverse
247
248 bid = self._get_bnodeid(pred_uri)
249 if bid:
250 if not self.generalized_rdf:
251 return
252 pred = BNode(bid)
253 else:
254 pred = URIRef(pred_uri)
255 for obj_node in obj_nodes:
256 obj = self._to_object(dataset, graph, context, term, obj_node)
257 if obj is None:
258 continue
259 if reverse:
260 graph.add((obj, pred, subj))
261 else:
262 graph.add((subj, pred, obj))
263
264
265 def _to_object(self, dataset, graph, context, term, node, inlist=False):
266
267 if node is None:
268 return
269
270 if isinstance(node, tuple):
271 value, lang = node
272 if value is None:
273 return
274 return Literal(value, lang=lang)
275
276 if isinstance(node, dict):
277 node_list = context.get_list(node)
278 if node_list is not None:
279 if inlist and not self.allow_lists_of_lists:
280 return
281 listref = self._add_list(dataset, graph, context, term, node_list)
282 if listref:
283 return listref
284
285 else: # expand..
286 if not term or not term.type:
287 if isinstance(node, float):
288 return Literal(node, datatype=XSD.double)
289 if term and term.language is not UNDEF:
290 lang = term.language
291 else:
292 lang = context.language
293 return Literal(node, lang=lang)
294 else:
295 if term.type == ID:
296 node = {ID: context.resolve(node)}
297 elif term.type == VOCAB:
298 node = {ID: context.expand(node) or context.resolve_iri(node)}
299 else:
300 node = {TYPE: term.type,
301 VALUE: node}
302
303 lang = context.get_language(node)
304 if lang or context.get_key(VALUE) in node or VALUE in node:
305 value = context.get_value(node)
306 if value is None:
307 return None
308 datatype = not lang and context.get_type(node) or None
309 if lang:
310 return Literal(value, lang=lang)
311 elif datatype:
312 return Literal(value, datatype=context.expand(datatype))
313 else:
314 return Literal(value)
315 else:
316 return self._add_to_graph(dataset, graph, context, node)
317
318
319 def _to_rdf_id(self, context, id_val):
320 bid = self._get_bnodeid(id_val)
321 if bid:
322 return BNode(bid)
323 else:
324 uri = context.resolve(id_val)
325 if not self.generalized_rdf and ':' not in uri:
326 return None
327 return URIRef(uri)
328
329
330 def _get_bnodeid(self, ref):
331 if not ref.startswith('_:'):
332 return
333 bid = ref.split('_:', 1)[-1]
334 return bid or None
335
336
337 def _add_list(self, dataset, graph, context, term, node_list):
338 if not isinstance(node_list, list):
339 node_list = [node_list]
340 first_subj = BNode()
341 subj, rest = first_subj, None
342 for node in node_list:
343 if node is None:
344 continue
345 if rest:
346 graph.add((subj, RDF.rest, rest))
347 subj = rest
348 obj = self._to_object(dataset, graph, context, term, node, inlist=True)
349 if obj is None:
350 continue
351 graph.add((subj, RDF.first, obj))
352 rest = BNode()
353 if rest:
354 graph.add((subj, RDF.rest, RDF.nil))
355 return first_subj
356 else:
357 return RDF.nil