Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/trix.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d30785e31577 | 1:56ad4e20f292 |
|---|---|
| 1 """ | |
| 2 A TriX parser for RDFLib | |
| 3 """ | |
| 4 from rdflib.namespace import Namespace | |
| 5 from rdflib.term import URIRef | |
| 6 from rdflib.term import BNode | |
| 7 from rdflib.term import Literal | |
| 8 from rdflib.graph import Graph, ConjunctiveGraph | |
| 9 from rdflib.exceptions import ParserError | |
| 10 from rdflib.parser import Parser | |
| 11 | |
| 12 from xml.sax.saxutils import handler | |
| 13 from xml.sax import make_parser | |
| 14 from xml.sax.handler import ErrorHandler | |
| 15 | |
| 16 __all__ = ['create_parser', 'TriXHandler', 'TriXParser'] | |
| 17 | |
| 18 | |
| 19 TRIXNS = Namespace("http://www.w3.org/2004/03/trix/trix-1/") | |
| 20 XMLNS = Namespace("http://www.w3.org/XML/1998/namespace") | |
| 21 | |
| 22 | |
| 23 class TriXHandler(handler.ContentHandler): | |
| 24 """An Sax Handler for TriX. See http://sw.nokia.com/trix/""" | |
| 25 | |
| 26 def __init__(self, store): | |
| 27 self.store = store | |
| 28 self.preserve_bnode_ids = False | |
| 29 self.reset() | |
| 30 | |
| 31 def reset(self): | |
| 32 self.bnode = {} | |
| 33 self.graph = None | |
| 34 self.triple = None | |
| 35 self.state = 0 | |
| 36 self.lang = None | |
| 37 self.datatype = None | |
| 38 | |
| 39 # ContentHandler methods | |
| 40 | |
| 41 def setDocumentLocator(self, locator): | |
| 42 self.locator = locator | |
| 43 | |
| 44 def startDocument(self): | |
| 45 pass | |
| 46 | |
| 47 def startPrefixMapping(self, prefix, namespace): | |
| 48 pass | |
| 49 | |
| 50 def endPrefixMapping(self, prefix): | |
| 51 pass | |
| 52 | |
| 53 def startElementNS(self, name, qname, attrs): | |
| 54 | |
| 55 if name[0] != str(TRIXNS): | |
| 56 self.error( | |
| 57 "Only elements in the TriX namespace are allowed. %s!=%s" | |
| 58 % (name[0], TRIXNS)) | |
| 59 | |
| 60 if name[1] == "TriX": | |
| 61 if self.state == 0: | |
| 62 self.state = 1 | |
| 63 else: | |
| 64 self.error("Unexpected TriX element") | |
| 65 | |
| 66 elif name[1] == "graph": | |
| 67 if self.state == 1: | |
| 68 self.state = 2 | |
| 69 else: | |
| 70 self.error("Unexpected graph element") | |
| 71 | |
| 72 elif name[1] == "uri": | |
| 73 if self.state == 2: | |
| 74 # the context uri | |
| 75 self.state = 3 | |
| 76 elif self.state == 4: | |
| 77 # part of a triple | |
| 78 pass | |
| 79 else: | |
| 80 self.error("Unexpected uri element") | |
| 81 | |
| 82 elif name[1] == "triple": | |
| 83 if self.state == 2: | |
| 84 if self.graph is None: | |
| 85 # anonymous graph, create one with random bnode id | |
| 86 self.graph = Graph(store=self.store) | |
| 87 # start of a triple | |
| 88 self.triple = [] | |
| 89 self.state = 4 | |
| 90 else: | |
| 91 self.error("Unexpected triple element") | |
| 92 | |
| 93 elif name[1] == "typedLiteral": | |
| 94 if self.state == 4: | |
| 95 # part of triple | |
| 96 self.lang = None | |
| 97 self.datatype = None | |
| 98 | |
| 99 try: | |
| 100 self.lang = attrs.getValue((str(XMLNS), "lang")) | |
| 101 except: | |
| 102 # language not required - ignore | |
| 103 pass | |
| 104 try: | |
| 105 self.datatype = attrs.getValueByQName("datatype") | |
| 106 except KeyError: | |
| 107 self.error("No required attribute 'datatype'") | |
| 108 else: | |
| 109 self.error("Unexpected typedLiteral element") | |
| 110 | |
| 111 elif name[1] == "plainLiteral": | |
| 112 if self.state == 4: | |
| 113 # part of triple | |
| 114 self.lang = None | |
| 115 self.datatype = None | |
| 116 try: | |
| 117 self.lang = attrs.getValue((str(XMLNS), "lang")) | |
| 118 except: | |
| 119 # language not required - ignore | |
| 120 pass | |
| 121 | |
| 122 else: | |
| 123 self.error("Unexpected plainLiteral element") | |
| 124 | |
| 125 elif name[1] == "id": | |
| 126 if self.state == 2: | |
| 127 # the context uri | |
| 128 self.state = 3 | |
| 129 | |
| 130 elif self.state == 4: | |
| 131 # part of triple | |
| 132 pass | |
| 133 else: | |
| 134 self.error("Unexpected id element") | |
| 135 | |
| 136 else: | |
| 137 self.error("Unknown element %s in TriX namespace" % name[1]) | |
| 138 | |
| 139 self.chars = "" | |
| 140 | |
| 141 def endElementNS(self, name, qname): | |
| 142 if name[0] != str(TRIXNS): | |
| 143 self.error( | |
| 144 "Only elements in the TriX namespace are allowed. %s!=%s" | |
| 145 % (name[0], TRIXNS)) | |
| 146 | |
| 147 if name[1] == "uri": | |
| 148 if self.state == 3: | |
| 149 self.graph = Graph(store=self.store, | |
| 150 identifier=URIRef(self.chars.strip())) | |
| 151 self.state = 2 | |
| 152 elif self.state == 4: | |
| 153 self.triple += [URIRef(self.chars.strip())] | |
| 154 else: | |
| 155 self.error( | |
| 156 "Illegal internal self.state - This should never " + | |
| 157 "happen if the SAX parser ensures XML syntax correctness") | |
| 158 | |
| 159 elif name[1] == "id": | |
| 160 if self.state == 3: | |
| 161 self.graph = Graph(self.store, identifier=self.get_bnode( | |
| 162 self.chars.strip())) | |
| 163 self.state = 2 | |
| 164 elif self.state == 4: | |
| 165 self.triple += [self.get_bnode(self.chars.strip())] | |
| 166 else: | |
| 167 self.error( | |
| 168 "Illegal internal self.state - This should never " + | |
| 169 "happen if the SAX parser ensures XML syntax correctness") | |
| 170 | |
| 171 elif name[1] == "plainLiteral" or name[1] == "typedLiteral": | |
| 172 if self.state == 4: | |
| 173 self.triple += [Literal( | |
| 174 self.chars, lang=self.lang, datatype=self.datatype)] | |
| 175 else: | |
| 176 self.error( | |
| 177 "This should never happen if the SAX parser " + | |
| 178 "ensures XML syntax correctness") | |
| 179 | |
| 180 elif name[1] == "triple": | |
| 181 if self.state == 4: | |
| 182 if len(self.triple) != 3: | |
| 183 self.error("Triple has wrong length, got %d elements: %s" % | |
| 184 (len(self.triple), self.triple)) | |
| 185 | |
| 186 self.graph.add(self.triple) | |
| 187 # self.store.store.add(self.triple,context=self.graph) | |
| 188 # self.store.addN([self.triple+[self.graph]]) | |
| 189 self.state = 2 | |
| 190 else: | |
| 191 self.error( | |
| 192 "This should never happen if the SAX parser " + | |
| 193 "ensures XML syntax correctness") | |
| 194 | |
| 195 elif name[1] == "graph": | |
| 196 self.graph = None | |
| 197 self.state = 1 | |
| 198 | |
| 199 elif name[1] == "TriX": | |
| 200 self.state = 0 | |
| 201 | |
| 202 else: | |
| 203 self.error("Unexpected close element") | |
| 204 | |
| 205 def get_bnode(self, label): | |
| 206 if self.preserve_bnode_ids: | |
| 207 bn = BNode(label) | |
| 208 else: | |
| 209 if label in self.bnode: | |
| 210 bn = self.bnode[label] | |
| 211 else: | |
| 212 bn = BNode(label) | |
| 213 self.bnode[label] = bn | |
| 214 return bn | |
| 215 | |
| 216 def characters(self, content): | |
| 217 self.chars += content | |
| 218 | |
| 219 def ignorableWhitespace(self, content): | |
| 220 pass | |
| 221 | |
| 222 def processingInstruction(self, target, data): | |
| 223 pass | |
| 224 | |
| 225 def error(self, message): | |
| 226 locator = self.locator | |
| 227 info = "%s:%s:%s: " % ( | |
| 228 locator.getSystemId(), | |
| 229 locator.getLineNumber(), | |
| 230 locator.getColumnNumber()) | |
| 231 raise ParserError(info + message) | |
| 232 | |
| 233 | |
| 234 def create_parser(store): | |
| 235 parser = make_parser() | |
| 236 try: | |
| 237 # Workaround for bug in expatreader.py. Needed when | |
| 238 # expatreader is trying to guess a prefix. | |
| 239 parser.start_namespace_decl( | |
| 240 "xml", "http://www.w3.org/XML/1998/namespace") | |
| 241 except AttributeError: | |
| 242 pass # Not present in Jython (at least) | |
| 243 parser.setFeature(handler.feature_namespaces, 1) | |
| 244 trix = TriXHandler(store) | |
| 245 parser.setContentHandler(trix) | |
| 246 parser.setErrorHandler(ErrorHandler()) | |
| 247 return parser | |
| 248 | |
| 249 | |
| 250 class TriXParser(Parser): | |
| 251 """A parser for TriX. See http://sw.nokia.com/trix/""" | |
| 252 | |
| 253 def __init__(self): | |
| 254 pass | |
| 255 | |
| 256 def parse(self, source, sink, **args): | |
| 257 assert sink.store.context_aware, ( | |
| 258 "TriXParser must be given a context aware store.") | |
| 259 | |
| 260 self._parser = create_parser(sink.store) | |
| 261 content_handler = self._parser.getContentHandler() | |
| 262 preserve_bnode_ids = args.get("preserve_bnode_ids", None) | |
| 263 if preserve_bnode_ids is not None: | |
| 264 content_handler.preserve_bnode_ids = preserve_bnode_ids | |
| 265 # We're only using it once now | |
| 266 # content_handler.reset() | |
| 267 # self._parser.reset() | |
| 268 self._parser.parse(source) |
