Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/rdflib/plugins/parsers/nquads.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 """ | |
| 2 This is a rdflib plugin for parsing NQuad files into Conjunctive | |
| 3 graphs that can be used and queried. The store that backs the graph | |
| 4 *must* be able to handle contexts. | |
| 5 | |
| 6 >>> from rdflib import ConjunctiveGraph, URIRef, Namespace | |
| 7 >>> g = ConjunctiveGraph() | |
| 8 >>> data = open("test/nquads.rdflib/example.nquads", "rb") | |
| 9 >>> g.parse(data, format="nquads") # doctest:+ELLIPSIS | |
| 10 <Graph identifier=... (<class 'rdflib.graph.Graph'>)> | |
| 11 >>> assert len(g.store) == 449 | |
| 12 >>> # There should be 16 separate contexts | |
| 13 >>> assert len([x for x in g.store.contexts()]) == 16 | |
| 14 >>> # is the name of entity E10009 "Arco Publications"? | |
| 15 >>> # (in graph http://bibliographica.org/entity/E10009) | |
| 16 >>> # Looking for: | |
| 17 >>> # <http://bibliographica.org/entity/E10009> | |
| 18 >>> # <http://xmlns.com/foaf/0.1/name> | |
| 19 >>> # "Arco Publications" | |
| 20 >>> # <http://bibliographica.org/entity/E10009> | |
| 21 >>> s = URIRef("http://bibliographica.org/entity/E10009") | |
| 22 >>> FOAF = Namespace("http://xmlns.com/foaf/0.1/") | |
| 23 >>> assert(g.value(s, FOAF.name).eq("Arco Publications")) | |
| 24 """ | |
| 25 | |
| 26 from codecs import getreader | |
| 27 | |
| 28 from rdflib.py3compat import b | |
| 29 | |
| 30 from rdflib import ConjunctiveGraph | |
| 31 | |
| 32 # Build up from the NTriples parser: | |
| 33 from rdflib.plugins.parsers.ntriples import NTriplesParser | |
| 34 from rdflib.plugins.parsers.ntriples import ParseError | |
| 35 from rdflib.plugins.parsers.ntriples import r_tail | |
| 36 from rdflib.plugins.parsers.ntriples import r_wspace | |
| 37 from rdflib.plugins.parsers.ntriples import r_wspaces | |
| 38 | |
| 39 __all__ = ['NQuadsParser'] | |
| 40 | |
| 41 | |
| 42 class NQuadsParser(NTriplesParser): | |
| 43 | |
| 44 def parse(self, inputsource, sink, **kwargs): | |
| 45 """Parse f as an N-Triples file.""" | |
| 46 assert sink.store.context_aware, ("NQuadsParser must be given" | |
| 47 " a context aware store.") | |
| 48 self.sink = ConjunctiveGraph(store=sink.store, identifier=sink.identifier) | |
| 49 | |
| 50 source = inputsource.getByteStream() | |
| 51 | |
| 52 if not hasattr(source, 'read'): | |
| 53 raise ParseError("Item to parse must be a file-like object.") | |
| 54 | |
| 55 source = getreader('utf-8')(source) | |
| 56 | |
| 57 self.file = source | |
| 58 self.buffer = '' | |
| 59 while True: | |
| 60 self.line = __line = self.readline() | |
| 61 if self.line is None: | |
| 62 break | |
| 63 try: | |
| 64 self.parseline() | |
| 65 except ParseError as msg: | |
| 66 raise ParseError("Invalid line (%s):\n%r" % (msg, __line)) | |
| 67 | |
| 68 return self.sink | |
| 69 | |
| 70 def parseline(self): | |
| 71 self.eat(r_wspace) | |
| 72 if (not self.line) or self.line.startswith(('#')): | |
| 73 return # The line is empty or a comment | |
| 74 | |
| 75 subject = self.subject() | |
| 76 self.eat(r_wspace) | |
| 77 | |
| 78 predicate = self.predicate() | |
| 79 self.eat(r_wspace) | |
| 80 | |
| 81 obj = self.object() | |
| 82 self.eat(r_wspace) | |
| 83 | |
| 84 context = self.uriref() or self.nodeid() or self.sink.identifier | |
| 85 self.eat(r_tail) | |
| 86 | |
| 87 if self.line: | |
| 88 raise ParseError("Trailing garbage") | |
| 89 # Must have a context aware store - add on a normal Graph | |
| 90 # discards anything where the ctx != graph.identifier | |
| 91 self.sink.get_context(context).add((subject, predicate, obj)) |
