Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/rdflib/plugins/parsers/notation3.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 notation3.py - Standalone Notation3 Parser | |
| 4 Derived from CWM, the Closed World Machine | |
| 5 | |
| 6 Authors of the original suite: | |
| 7 | |
| 8 * Dan Connolly <@@> | |
| 9 * Tim Berners-Lee <@@> | |
| 10 * Yosi Scharf <@@> | |
| 11 * Joseph M. Reagle Jr. <reagle@w3.org> | |
| 12 * Rich Salz <rsalz@zolera.com> | |
| 13 | |
| 14 http://www.w3.org/2000/10/swap/notation3.py | |
| 15 | |
| 16 Copyright 2000-2007, World Wide Web Consortium. | |
| 17 Copyright 2001, MIT. | |
| 18 Copyright 2001, Zolera Systems Inc. | |
| 19 | |
| 20 License: W3C Software License | |
| 21 http://www.w3.org/Consortium/Legal/copyright-software | |
| 22 | |
| 23 Modified by Sean B. Palmer | |
| 24 Copyright 2007, Sean B. Palmer. | |
| 25 | |
| 26 Modified to work with rdflib by Gunnar Aastrand Grimnes | |
| 27 Copyright 2010, Gunnar A. Grimnes | |
| 28 | |
| 29 """ | |
| 30 | |
| 31 # Python standard libraries | |
| 32 import types | |
| 33 import sys | |
| 34 import os | |
| 35 import re | |
| 36 import codecs | |
| 37 import warnings | |
| 38 | |
| 39 from decimal import Decimal | |
| 40 | |
| 41 from uuid import uuid4 | |
| 42 | |
| 43 from rdflib.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id | |
| 44 from rdflib.graph import QuotedGraph, ConjunctiveGraph, Graph | |
| 45 from rdflib import py3compat | |
| 46 b = py3compat.b | |
| 47 | |
| 48 __all__ = ['BadSyntax', 'N3Parser', 'TurtleParser', | |
| 49 "splitFragP", "join", "base", | |
| 50 "runNamespace", "uniqueURI", "hexify"] | |
| 51 | |
| 52 from rdflib.parser import Parser | |
| 53 | |
| 54 | |
| 55 def splitFragP(uriref, punct=0): | |
| 56 """split a URI reference before the fragment | |
| 57 | |
| 58 Punctuation is kept. | |
| 59 | |
| 60 e.g. | |
| 61 | |
| 62 >>> splitFragP("abc#def") | |
| 63 ('abc', '#def') | |
| 64 | |
| 65 >>> splitFragP("abcdef") | |
| 66 ('abcdef', '') | |
| 67 | |
| 68 """ | |
| 69 | |
| 70 i = uriref.rfind("#") | |
| 71 if i >= 0: | |
| 72 return uriref[:i], uriref[i:] | |
| 73 else: | |
| 74 return uriref, '' | |
| 75 | |
| 76 | |
| 77 @py3compat.format_doctest_out | |
| 78 def join(here, there): | |
| 79 """join an absolute URI and URI reference | |
| 80 (non-ascii characters are supported/doctested; | |
| 81 haven't checked the details of the IRI spec though) | |
| 82 | |
| 83 ``here`` is assumed to be absolute. | |
| 84 ``there`` is URI reference. | |
| 85 | |
| 86 >>> join('http://example/x/y/z', '../abc') | |
| 87 'http://example/x/abc' | |
| 88 | |
| 89 Raise ValueError if there uses relative path | |
| 90 syntax but here has no hierarchical path. | |
| 91 | |
| 92 >>> join('mid:foo@example', '../foo') # doctest: +NORMALIZE_WHITESPACE | |
| 93 Traceback (most recent call last): | |
| 94 raise ValueError(here) | |
| 95 ValueError: Base <mid:foo@example> has no slash | |
| 96 after colon - with relative '../foo'. | |
| 97 | |
| 98 >>> join('http://example/x/y/z', '') | |
| 99 'http://example/x/y/z' | |
| 100 | |
| 101 >>> join('mid:foo@example', '#foo') | |
| 102 'mid:foo@example#foo' | |
| 103 | |
| 104 We grok IRIs | |
| 105 | |
| 106 >>> len(%(u)s'Andr\\xe9') | |
| 107 5 | |
| 108 | |
| 109 >>> join('http://example.org/', %(u)s'#Andr\\xe9') | |
| 110 %(u)s'http://example.org/#Andr\\xe9' | |
| 111 """ | |
| 112 | |
| 113 # assert(here.find("#") < 0), \ | |
| 114 # "Base may not contain hash: '%s'" % here # why must caller splitFrag? | |
| 115 | |
| 116 slashl = there.find('/') | |
| 117 colonl = there.find(':') | |
| 118 | |
| 119 # join(base, 'foo:/') -- absolute | |
| 120 if colonl >= 0 and (slashl < 0 or colonl < slashl): | |
| 121 return there | |
| 122 | |
| 123 bcolonl = here.find(':') | |
| 124 assert(bcolonl >= 0), \ | |
| 125 "Base uri '%s' is not absolute" % here # else it's not absolute | |
| 126 | |
| 127 path, frag = splitFragP(there) | |
| 128 if not path: | |
| 129 return here + frag | |
| 130 | |
| 131 # join('mid:foo@example', '../foo') bzzt | |
| 132 if here[bcolonl + 1:bcolonl + 2] != '/': | |
| 133 raise ValueError( | |
| 134 ("Base <%s> has no slash after " | |
| 135 "colon - with relative '%s'.") % (here, there)) | |
| 136 | |
| 137 if here[bcolonl + 1:bcolonl + 3] == '//': | |
| 138 bpath = here.find('/', bcolonl + 3) | |
| 139 else: | |
| 140 bpath = bcolonl + 1 | |
| 141 | |
| 142 # join('http://xyz', 'foo') | |
| 143 if bpath < 0: | |
| 144 bpath = len(here) | |
| 145 here = here + '/' | |
| 146 | |
| 147 # join('http://xyz/', '//abc') => 'http://abc' | |
| 148 if there[:2] == '//': | |
| 149 return here[:bcolonl + 1] + there | |
| 150 | |
| 151 # join('http://xyz/', '/abc') => 'http://xyz/abc' | |
| 152 if there[:1] == '/': | |
| 153 return here[:bpath] + there | |
| 154 | |
| 155 slashr = here.rfind('/') | |
| 156 | |
| 157 while 1: | |
| 158 if path[:2] == './': | |
| 159 path = path[2:] | |
| 160 if path == '.': | |
| 161 path = '' | |
| 162 elif path[:3] == '../' or path == '..': | |
| 163 path = path[3:] | |
| 164 i = here.rfind('/', bpath, slashr) | |
| 165 if i >= 0: | |
| 166 here = here[:i + 1] | |
| 167 slashr = i | |
| 168 else: | |
| 169 break | |
| 170 | |
| 171 return here[:slashr + 1] + path + frag | |
| 172 | |
| 173 | |
| 174 def base(): | |
| 175 """The base URI for this process - the Web equiv of cwd | |
| 176 | |
| 177 Relative or abolute unix-standard filenames parsed relative to | |
| 178 this yeild the URI of the file. | |
| 179 If we had a reliable way of getting a computer name, | |
| 180 we should put it in the hostname just to prevent ambiguity | |
| 181 | |
| 182 """ | |
| 183 # return "file://" + hostname + os.getcwd() + "/" | |
| 184 return "file://" + _fixslash(os.getcwd()) + "/" | |
| 185 | |
| 186 | |
| 187 def _fixslash(s): | |
| 188 """ Fix windowslike filename to unixlike - (#ifdef WINDOWS)""" | |
| 189 s = s.replace("\\", "/") | |
| 190 if s[0] != "/" and s[1] == ":": | |
| 191 s = s[2:] # @@@ Hack when drive letter present | |
| 192 return s | |
| 193 | |
| 194 | |
| 195 CONTEXT = 0 | |
| 196 PRED = 1 | |
| 197 SUBJ = 2 | |
| 198 OBJ = 3 | |
| 199 | |
| 200 PARTS = PRED, SUBJ, OBJ | |
| 201 ALL4 = CONTEXT, PRED, SUBJ, OBJ | |
| 202 | |
| 203 SYMBOL = 0 | |
| 204 FORMULA = 1 | |
| 205 LITERAL = 2 | |
| 206 LITERAL_DT = 21 | |
| 207 LITERAL_LANG = 22 | |
| 208 ANONYMOUS = 3 | |
| 209 XMLLITERAL = 25 | |
| 210 | |
| 211 Logic_NS = "http://www.w3.org/2000/10/swap/log#" | |
| 212 NODE_MERGE_URI = Logic_NS + "is" # Pseudo-property indicating node merging | |
| 213 forSomeSym = Logic_NS + "forSome" | |
| 214 forAllSym = Logic_NS + "forAll" | |
| 215 | |
| 216 RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" | |
| 217 RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" | |
| 218 OWL_NS = "http://www.w3.org/2002/07/owl#" | |
| 219 DAML_sameAs_URI = OWL_NS + "sameAs" | |
| 220 parsesTo_URI = Logic_NS + "parsesTo" | |
| 221 RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/" | |
| 222 | |
| 223 List_NS = RDF_NS_URI # From 20030808 | |
| 224 _Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#" | |
| 225 | |
| 226 N3_first = (SYMBOL, List_NS + "first") | |
| 227 N3_rest = (SYMBOL, List_NS + "rest") | |
| 228 N3_li = (SYMBOL, List_NS + "li") | |
| 229 N3_nil = (SYMBOL, List_NS + "nil") | |
| 230 N3_List = (SYMBOL, List_NS + "List") | |
| 231 N3_Empty = (SYMBOL, List_NS + "Empty") | |
| 232 | |
| 233 | |
| 234 runNamespaceValue = None | |
| 235 | |
| 236 | |
| 237 def runNamespace(): | |
| 238 "Return a URI suitable as a namespace for run-local objects" | |
| 239 # @@@ include hostname (privacy?) (hash it?) | |
| 240 global runNamespaceValue | |
| 241 if runNamespaceValue is None: | |
| 242 runNamespaceValue = join(base(), _unique_id()) + '#' | |
| 243 return runNamespaceValue | |
| 244 | |
| 245 nextu = 0 | |
| 246 | |
| 247 | |
| 248 def uniqueURI(): | |
| 249 "A unique URI" | |
| 250 global nextu | |
| 251 nextu += 1 | |
| 252 # return runNamespace() + "u_" + `nextu` | |
| 253 return runNamespace() + "u_" + str(nextu) | |
| 254 | |
| 255 | |
| 256 tracking = False | |
| 257 chatty_flag = 50 | |
| 258 | |
| 259 # from why import BecauseOfData, becauseSubexpression | |
| 260 | |
| 261 | |
| 262 def BecauseOfData(*args, **kargs): | |
| 263 # print args, kargs | |
| 264 pass | |
| 265 | |
| 266 | |
| 267 def becauseSubexpression(*args, **kargs): | |
| 268 # print args, kargs | |
| 269 pass | |
| 270 | |
| 271 N3_forSome_URI = forSomeSym | |
| 272 N3_forAll_URI = forAllSym | |
| 273 | |
| 274 # Magic resources we know about | |
| 275 | |
| 276 ADDED_HASH = "#" # Stop where we use this in case we want to remove it! | |
| 277 # This is the hash on namespace URIs | |
| 278 | |
| 279 RDF_type = (SYMBOL, RDF_type_URI) | |
| 280 DAML_sameAs = (SYMBOL, DAML_sameAs_URI) | |
| 281 | |
| 282 LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies" | |
| 283 | |
| 284 BOOLEAN_DATATYPE = _XSD_PFX + "boolean" | |
| 285 DECIMAL_DATATYPE = _XSD_PFX + "decimal" | |
| 286 DOUBLE_DATATYPE = _XSD_PFX + "double" | |
| 287 FLOAT_DATATYPE = _XSD_PFX + "float" | |
| 288 INTEGER_DATATYPE = _XSD_PFX + "integer" | |
| 289 | |
| 290 option_noregen = 0 # If set, do not regenerate genids on output | |
| 291 | |
| 292 # @@ I18n - the notname chars need extending for well known unicode non-text | |
| 293 # characters. The XML spec switched to assuming unknown things were name | |
| 294 # characaters. | |
| 295 # _namechars = string.lowercase + string.uppercase + string.digits + '_-' | |
| 296 _notQNameChars = \ | |
| 297 "\t\r\n !\"#$&'()*,+/;<=>?@[\\]^`{|}~" # else valid qname :-/ | |
| 298 _notKeywordsChars = _notQNameChars + "." | |
| 299 _notNameChars = _notQNameChars + ":" # Assume anything else valid name :-/ | |
| 300 _rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' | |
| 301 | |
| 302 hexChars = 'ABCDEFabcdef0123456789' | |
| 303 escapeChars = "(_~.-!$&'()*+,;=/?#@%)" # valid for \ escapes in localnames | |
| 304 | |
| 305 def unicodeExpand(m): | |
| 306 try: | |
| 307 return chr(int(m.group(1), 16)) | |
| 308 except: | |
| 309 raise Exception("Invalid unicode code point: " + m.group(1)) | |
| 310 | |
| 311 if py3compat.narrow_build: | |
| 312 def unicodeExpand(m): | |
| 313 try: | |
| 314 return chr(int(m.group(1), 16)) | |
| 315 except ValueError: | |
| 316 warnings.warn( | |
| 317 'Encountered a unicode char > 0xFFFF in a narrow python build. ' | |
| 318 'Trying to degrade gracefully, but this can cause problems ' | |
| 319 'later when working with the string:\n%s' % m.group(0)) | |
| 320 return codecs.decode(m.group(0), 'unicode_escape') | |
| 321 | |
| 322 unicodeEscape4 = re.compile( | |
| 323 r'\\u([0-9a-fA-F]{4})') | |
| 324 unicodeEscape8 = re.compile( | |
| 325 r'\\U([0-9a-fA-F]{8})') | |
| 326 | |
| 327 | |
| 328 | |
| 329 N3CommentCharacter = "#" # For unix script # ! compatabilty | |
| 330 | |
| 331 ########################################## Parse string to sink | |
| 332 # | |
| 333 # Regular expressions: | |
| 334 eol = re.compile( | |
| 335 r'[ \t]*(#[^\n]*)?\r?\n') # end of line, poss. w/comment | |
| 336 eof = re.compile( | |
| 337 r'[ \t]*(#[^\n]*)?$') # end of file, poss. w/comment | |
| 338 ws = re.compile(r'[ \t]*') # Whitespace not including NL | |
| 339 signed_integer = re.compile(r'[-+]?[0-9]+') # integer | |
| 340 integer_syntax = re.compile(r'[-+]?[0-9]+') | |
| 341 decimal_syntax = re.compile(r'[-+]?[0-9]*\.[0-9]+') | |
| 342 exponent_syntax = re.compile(r'[-+]?(?:[0-9]+\.[0-9]*(?:e|E)[-+]?[0-9]+|'+ | |
| 343 r'\.[0-9](?:e|E)[-+]?[0-9]+|'+ | |
| 344 r'[0-9]+(?:e|E)[-+]?[0-9]+)') | |
| 345 digitstring = re.compile(r'[0-9]+') # Unsigned integer | |
| 346 interesting = re.compile(r"""[\\\r\n\"\']""") | |
| 347 langcode = re.compile(r'[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*') | |
| 348 | |
| 349 | |
| 350 class SinkParser: | |
| 351 def __init__(self, store, openFormula=None, thisDoc="", baseURI=None, | |
| 352 genPrefix="", why=None, turtle=False): | |
| 353 """ note: namespace names should *not* end in # ; | |
| 354 the # will get added during qname processing """ | |
| 355 | |
| 356 self._bindings = {} | |
| 357 if thisDoc != "": | |
| 358 assert ':' in thisDoc, "Document URI not absolute: <%s>" % thisDoc | |
| 359 self._bindings[""] = thisDoc + "#" # default | |
| 360 | |
| 361 self._store = store | |
| 362 if genPrefix: | |
| 363 store.setGenPrefix(genPrefix) # pass it on | |
| 364 | |
| 365 self._thisDoc = thisDoc | |
| 366 self.lines = 0 # for error handling | |
| 367 self.startOfLine = 0 # For calculating character number | |
| 368 self._genPrefix = genPrefix | |
| 369 self.keywords = ['a', 'this', 'bind', 'has', 'is', 'of', | |
| 370 'true', 'false'] | |
| 371 self.keywordsSet = 0 # Then only can others be considerd qnames | |
| 372 self._anonymousNodes = {} | |
| 373 # Dict of anon nodes already declared ln: Term | |
| 374 self._variables = {} | |
| 375 self._parentVariables = {} | |
| 376 self._reason = why # Why the parser was asked to parse this | |
| 377 | |
| 378 self.turtle = turtle # raise exception when encountering N3 extensions | |
| 379 # Turtle allows single or double quotes around strings, whereas N3 | |
| 380 # only allows double quotes. | |
| 381 self.string_delimiters = ('"', "'") if turtle else ('"',) | |
| 382 | |
| 383 self._reason2 = None # Why these triples | |
| 384 # was: diag.tracking | |
| 385 if tracking: | |
| 386 self._reason2 = BecauseOfData( | |
| 387 store.newSymbol(thisDoc), because=self._reason) | |
| 388 | |
| 389 if baseURI: | |
| 390 self._baseURI = baseURI | |
| 391 else: | |
| 392 if thisDoc: | |
| 393 self._baseURI = thisDoc | |
| 394 else: | |
| 395 self._baseURI = None | |
| 396 | |
| 397 assert not self._baseURI or ':' in self._baseURI | |
| 398 | |
| 399 if not self._genPrefix: | |
| 400 if self._thisDoc: | |
| 401 self._genPrefix = self._thisDoc + "#_g" | |
| 402 else: | |
| 403 self._genPrefix = uniqueURI() | |
| 404 | |
| 405 if openFormula is None: | |
| 406 if self._thisDoc: | |
| 407 self._formula = store.newFormula(thisDoc + "#_formula") | |
| 408 else: | |
| 409 self._formula = store.newFormula() | |
| 410 else: | |
| 411 self._formula = openFormula | |
| 412 | |
| 413 self._context = self._formula | |
| 414 self._parentContext = None | |
| 415 | |
| 416 def here(self, i): | |
| 417 """String generated from position in file | |
| 418 | |
| 419 This is for repeatability when refering people to bnodes in a document. | |
| 420 This has diagnostic uses less formally, as it should point one to which | |
| 421 bnode the arbitrary identifier actually is. It gives the | |
| 422 line and character number of the '[' charcacter or path character | |
| 423 which introduced the blank node. The first blank node is boringly | |
| 424 _L1C1. It used to be used only for tracking, but for tests in general | |
| 425 it makes the canonical ordering of bnodes repeatable.""" | |
| 426 | |
| 427 return "%s_L%iC%i" % (self._genPrefix, self.lines, | |
| 428 i - self.startOfLine + 1) | |
| 429 | |
| 430 def formula(self): | |
| 431 return self._formula | |
| 432 | |
| 433 def loadStream(self, stream): | |
| 434 return self.loadBuf(stream.read()) # Not ideal | |
| 435 | |
| 436 def loadBuf(self, buf): | |
| 437 """Parses a buffer and returns its top level formula""" | |
| 438 self.startDoc() | |
| 439 | |
| 440 self.feed(buf) | |
| 441 return self.endDoc() # self._formula | |
| 442 | |
| 443 def feed(self, octets): | |
| 444 """Feed an octet stream tothe parser | |
| 445 | |
| 446 if BadSyntax is raised, the string | |
| 447 passed in the exception object is the | |
| 448 remainder after any statements have been parsed. | |
| 449 So if there is more data to feed to the | |
| 450 parser, it should be straightforward to recover.""" | |
| 451 | |
| 452 if not isinstance(octets, str): | |
| 453 s = octets.decode('utf-8') | |
| 454 # NB already decoded, so \ufeff | |
| 455 if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode('utf-8'): | |
| 456 s = s[1:] | |
| 457 else: | |
| 458 s = octets | |
| 459 | |
| 460 i = 0 | |
| 461 while i >= 0: | |
| 462 j = self.skipSpace(s, i) | |
| 463 if j < 0: | |
| 464 return | |
| 465 | |
| 466 i = self.directiveOrStatement(s, j) | |
| 467 if i < 0: | |
| 468 #print("# next char: %s" % s[j]) | |
| 469 self.BadSyntax(s, j, | |
| 470 "expected directive or statement") | |
| 471 | |
| 472 def directiveOrStatement(self, argstr, h): | |
| 473 | |
| 474 i = self.skipSpace(argstr, h) | |
| 475 if i < 0: | |
| 476 return i # EOF | |
| 477 | |
| 478 if self.turtle: | |
| 479 j = self.sparqlDirective(argstr, i) | |
| 480 if j >= 0: | |
| 481 return j | |
| 482 | |
| 483 j = self.directive(argstr, i) | |
| 484 if j >= 0: | |
| 485 return self.checkDot(argstr, j) | |
| 486 | |
| 487 j = self.statement(argstr, i) | |
| 488 if j >= 0: | |
| 489 return self.checkDot(argstr, j) | |
| 490 | |
| 491 return j | |
| 492 | |
| 493 # @@I18N | |
| 494 # _namechars = string.lowercase + string.uppercase + string.digits + '_-' | |
| 495 | |
| 496 def tok(self, tok, argstr, i, colon=False): | |
| 497 """Check for keyword. Space must have been stripped on entry and | |
| 498 we must not be at end of file. | |
| 499 | |
| 500 if colon, then keyword followed by colon is ok | |
| 501 (@prefix:<blah> is ok, rdf:type shortcut a must be followed by ws) | |
| 502 """ | |
| 503 | |
| 504 assert tok[0] not in _notNameChars # not for punctuation | |
| 505 if argstr[i:i + 1] == "@": | |
| 506 i = i + 1 | |
| 507 else: | |
| 508 if tok not in self.keywords: | |
| 509 return -1 # No, this has neither keywords declaration nor "@" | |
| 510 | |
| 511 if (argstr[i:i + len(tok)] == tok | |
| 512 and ( argstr[i + len(tok)] in _notKeywordsChars) | |
| 513 or (colon and argstr[i+len(tok)] == ':')): | |
| 514 i = i + len(tok) | |
| 515 return i | |
| 516 else: | |
| 517 return -1 | |
| 518 | |
| 519 def sparqlTok(self, tok, argstr, i): | |
| 520 """Check for SPARQL keyword. Space must have been stripped on entry | |
| 521 and we must not be at end of file. | |
| 522 Case insensitive and not preceeded by @ | |
| 523 """ | |
| 524 | |
| 525 assert tok[0] not in _notNameChars # not for punctuation | |
| 526 | |
| 527 if (argstr[i:i + len(tok)].lower() == tok.lower() | |
| 528 and (argstr[i + len(tok)] in _notQNameChars)): | |
| 529 i = i + len(tok) | |
| 530 return i | |
| 531 else: | |
| 532 return -1 | |
| 533 | |
| 534 | |
| 535 def directive(self, argstr, i): | |
| 536 j = self.skipSpace(argstr, i) | |
| 537 if j < 0: | |
| 538 return j # eof | |
| 539 res = [] | |
| 540 | |
| 541 j = self.tok('bind', argstr, i) # implied "#". Obsolete. | |
| 542 if j > 0: | |
| 543 self.BadSyntax(argstr, i, | |
| 544 "keyword bind is obsolete: use @prefix") | |
| 545 | |
| 546 j = self.tok('keywords', argstr, i) | |
| 547 if j > 0: | |
| 548 if self.turtle: | |
| 549 self.BadSyntax(argstr, i, "Found 'keywords' when in Turtle mode.") | |
| 550 | |
| 551 i = self.commaSeparatedList(argstr, j, res, self.bareWord) | |
| 552 if i < 0: | |
| 553 self.BadSyntax(argstr, i, | |
| 554 "'@keywords' needs comma separated list of words") | |
| 555 self.setKeywords(res[:]) | |
| 556 return i | |
| 557 | |
| 558 j = self.tok('forAll', argstr, i) | |
| 559 if j > 0: | |
| 560 if self.turtle: | |
| 561 self.BadSyntax(argstr, i, "Found 'forAll' when in Turtle mode.") | |
| 562 | |
| 563 i = self.commaSeparatedList(argstr, j, res, self.uri_ref2) | |
| 564 if i < 0: | |
| 565 self.BadSyntax(argstr, i, | |
| 566 "Bad variable list after @forAll") | |
| 567 for x in res: | |
| 568 # self._context.declareUniversal(x) | |
| 569 if x not in self._variables or x in self._parentVariables: | |
| 570 self._variables[x] = self._context.newUniversal(x) | |
| 571 return i | |
| 572 | |
| 573 j = self.tok('forSome', argstr, i) | |
| 574 if j > 0: | |
| 575 if self.turtle: | |
| 576 self.BadSyntax(argstr, i, "Found 'forSome' when in Turtle mode.") | |
| 577 | |
| 578 i = self. commaSeparatedList(argstr, j, res, self.uri_ref2) | |
| 579 if i < 0: | |
| 580 self.BadSyntax(argstr, i, | |
| 581 "Bad variable list after @forSome") | |
| 582 for x in res: | |
| 583 self._context.declareExistential(x) | |
| 584 return i | |
| 585 | |
| 586 j = self.tok('prefix', argstr, i, colon=True) # no implied "#" | |
| 587 if j >= 0: | |
| 588 t = [] | |
| 589 i = self.qname(argstr, j, t) | |
| 590 if i < 0: | |
| 591 self.BadSyntax(argstr, j, | |
| 592 "expected qname after @prefix") | |
| 593 j = self.uri_ref2(argstr, i, t) | |
| 594 if j < 0: | |
| 595 self.BadSyntax(argstr, i, | |
| 596 "expected <uriref> after @prefix _qname_") | |
| 597 ns = self.uriOf(t[1]) | |
| 598 | |
| 599 if self._baseURI: | |
| 600 ns = join(self._baseURI, ns) | |
| 601 elif ":" not in ns: | |
| 602 self.BadSyntax(argstr, j, | |
| 603 "With no base URI, cannot use " + | |
| 604 "relative URI in @prefix <" + ns + ">") | |
| 605 assert ':' in ns # must be absolute | |
| 606 self._bindings[t[0][0]] = ns | |
| 607 self.bind(t[0][0], hexify(ns)) | |
| 608 return j | |
| 609 | |
| 610 j = self.tok('base', argstr, i) # Added 2007/7/7 | |
| 611 if j >= 0: | |
| 612 t = [] | |
| 613 i = self.uri_ref2(argstr, j, t) | |
| 614 if i < 0: | |
| 615 self.BadSyntax(argstr, j, | |
| 616 "expected <uri> after @base ") | |
| 617 ns = self.uriOf(t[0]) | |
| 618 | |
| 619 if self._baseURI: | |
| 620 ns = join(self._baseURI, ns) | |
| 621 else: | |
| 622 self.BadSyntax(argstr, j, | |
| 623 "With no previous base URI, cannot use " + | |
| 624 "relative URI in @base <" + ns + ">") | |
| 625 assert ':' in ns # must be absolute | |
| 626 self._baseURI = ns | |
| 627 return i | |
| 628 | |
| 629 return -1 # Not a directive, could be something else. | |
| 630 | |
| 631 def sparqlDirective(self, argstr, i): | |
| 632 | |
| 633 """ | |
| 634 turtle and trig support BASE/PREFIX without @ and without | |
| 635 terminating . | |
| 636 """ | |
| 637 | |
| 638 j = self.skipSpace(argstr, i) | |
| 639 if j < 0: | |
| 640 return j # eof | |
| 641 | |
| 642 j = self.sparqlTok('PREFIX', argstr, i) | |
| 643 if j >= 0: | |
| 644 t = [] | |
| 645 i = self.qname(argstr, j, t) | |
| 646 if i < 0: | |
| 647 self.BadSyntax(argstr, j, | |
| 648 "expected qname after @prefix") | |
| 649 j = self.uri_ref2(argstr, i, t) | |
| 650 if j < 0: | |
| 651 self.BadSyntax(argstr, i, | |
| 652 "expected <uriref> after @prefix _qname_") | |
| 653 ns = self.uriOf(t[1]) | |
| 654 | |
| 655 if self._baseURI: | |
| 656 ns = join(self._baseURI, ns) | |
| 657 elif ":" not in ns: | |
| 658 self.BadSyntax(argstr, j, | |
| 659 "With no base URI, cannot use " + | |
| 660 "relative URI in @prefix <" + ns + ">") | |
| 661 assert ':' in ns # must be absolute | |
| 662 self._bindings[t[0][0]] = ns | |
| 663 self.bind(t[0][0], hexify(ns)) | |
| 664 return j | |
| 665 | |
| 666 j = self.sparqlTok('BASE', argstr, i) | |
| 667 if j >= 0: | |
| 668 t = [] | |
| 669 i = self.uri_ref2(argstr, j, t) | |
| 670 if i < 0: | |
| 671 self.BadSyntax(argstr, j, | |
| 672 "expected <uri> after @base ") | |
| 673 ns = self.uriOf(t[0]) | |
| 674 | |
| 675 if self._baseURI: | |
| 676 ns = join(self._baseURI, ns) | |
| 677 else: | |
| 678 self.BadSyntax(argstr, j, | |
| 679 "With no previous base URI, cannot use " + | |
| 680 "relative URI in @base <" + ns + ">") | |
| 681 assert ':' in ns # must be absolute | |
| 682 self._baseURI = ns | |
| 683 return i | |
| 684 | |
| 685 return -1 # Not a directive, could be something else. | |
| 686 | |
| 687 | |
| 688 def bind(self, qn, uri): | |
| 689 assert isinstance( | |
| 690 uri, bytes), "Any unicode must be %x-encoded already" | |
| 691 if qn == "": | |
| 692 self._store.setDefaultNamespace(uri) | |
| 693 else: | |
| 694 self._store.bind(qn, uri) | |
| 695 | |
| 696 def setKeywords(self, k): | |
| 697 "Takes a list of strings" | |
| 698 if k is None: | |
| 699 self.keywordsSet = 0 | |
| 700 else: | |
| 701 self.keywords = k | |
| 702 self.keywordsSet = 1 | |
| 703 | |
| 704 def startDoc(self): | |
| 705 # was: self._store.startDoc() | |
| 706 self._store.startDoc(self._formula) | |
| 707 | |
| 708 def endDoc(self): | |
| 709 """Signal end of document and stop parsing. returns formula""" | |
| 710 self._store.endDoc(self._formula) # don't canonicalize yet | |
| 711 return self._formula | |
| 712 | |
| 713 def makeStatement(self, quadruple): | |
| 714 # $$$$$$$$$$$$$$$$$$$$$ | |
| 715 # print "# Parser output: ", `quadruple` | |
| 716 self._store.makeStatement(quadruple, why=self._reason2) | |
| 717 | |
| 718 def statement(self, argstr, i): | |
| 719 r = [] | |
| 720 i = self.object( | |
| 721 argstr, i, r) # Allow literal for subject - extends RDF | |
| 722 if i < 0: | |
| 723 return i | |
| 724 | |
| 725 j = self.property_list(argstr, i, r[0]) | |
| 726 | |
| 727 if j < 0: | |
| 728 self.BadSyntax( | |
| 729 argstr, i, "expected propertylist") | |
| 730 return j | |
| 731 | |
| 732 def subject(self, argstr, i, res): | |
| 733 return self.item(argstr, i, res) | |
| 734 | |
| 735 def verb(self, argstr, i, res): | |
| 736 """ has _prop_ | |
| 737 is _prop_ of | |
| 738 a | |
| 739 = | |
| 740 _prop_ | |
| 741 >- prop -> | |
| 742 <- prop -< | |
| 743 _operator_""" | |
| 744 | |
| 745 j = self.skipSpace(argstr, i) | |
| 746 if j < 0: | |
| 747 return j # eof | |
| 748 | |
| 749 r = [] | |
| 750 | |
| 751 j = self.tok('has', argstr, i) | |
| 752 if j >= 0: | |
| 753 if self.turtle: | |
| 754 self.BadSyntax(argstr, i, "Found 'has' keyword in Turtle mode") | |
| 755 | |
| 756 i = self.prop(argstr, j, r) | |
| 757 if i < 0: | |
| 758 self.BadSyntax(argstr, j, | |
| 759 "expected property after 'has'") | |
| 760 res.append(('->', r[0])) | |
| 761 return i | |
| 762 | |
| 763 j = self.tok('is', argstr, i) | |
| 764 if j >= 0: | |
| 765 if self.turtle: | |
| 766 self.BadSyntax(argstr, i, "Found 'is' keyword in Turtle mode") | |
| 767 | |
| 768 i = self.prop(argstr, j, r) | |
| 769 if i < 0: | |
| 770 self.BadSyntax(argstr, j, | |
| 771 "expected <property> after 'is'") | |
| 772 j = self.skipSpace(argstr, i) | |
| 773 if j < 0: | |
| 774 self.BadSyntax(argstr, i, | |
| 775 "End of file found, expected property after 'is'") | |
| 776 i = j | |
| 777 j = self.tok('of', argstr, i) | |
| 778 if j < 0: | |
| 779 self.BadSyntax(argstr, i, | |
| 780 "expected 'of' after 'is' <prop>") | |
| 781 res.append(('<-', r[0])) | |
| 782 return j | |
| 783 | |
| 784 j = self.tok('a', argstr, i) | |
| 785 if j >= 0: | |
| 786 res.append(('->', RDF_type)) | |
| 787 return j | |
| 788 | |
| 789 if argstr[i:i + 2] == "<=": | |
| 790 if self.turtle: | |
| 791 self.BadSyntax(argstr, i, | |
| 792 "Found '<=' in Turtle mode. ") | |
| 793 | |
| 794 res.append(('<-', self._store.newSymbol(Logic_NS + "implies"))) | |
| 795 return i + 2 | |
| 796 | |
| 797 if argstr[i:i + 1] == "=": | |
| 798 if self.turtle: | |
| 799 self.BadSyntax(argstr, i, "Found '=' in Turtle mode") | |
| 800 if argstr[i + 1:i + 2] == ">": | |
| 801 res.append(('->', self._store.newSymbol(Logic_NS + "implies"))) | |
| 802 return i + 2 | |
| 803 res.append(('->', DAML_sameAs)) | |
| 804 return i + 1 | |
| 805 | |
| 806 if argstr[i:i + 2] == ":=": | |
| 807 if self.turtle: | |
| 808 self.BadSyntax(argstr, i, "Found ':=' in Turtle mode") | |
| 809 | |
| 810 # patch file relates two formulae, uses this @@ really? | |
| 811 res.append(('->', Logic_NS + "becomes")) | |
| 812 return i + 2 | |
| 813 | |
| 814 j = self.prop(argstr, i, r) | |
| 815 if j >= 0: | |
| 816 res.append(('->', r[0])) | |
| 817 return j | |
| 818 | |
| 819 if argstr[i:i + 2] == ">-" or argstr[i:i + 2] == "<-": | |
| 820 self.BadSyntax(argstr, j, | |
| 821 ">- ... -> syntax is obsolete.") | |
| 822 | |
| 823 return -1 | |
| 824 | |
| 825 def prop(self, argstr, i, res): | |
| 826 return self.item(argstr, i, res) | |
| 827 | |
| 828 def item(self, argstr, i, res): | |
| 829 return self.path(argstr, i, res) | |
| 830 | |
| 831 def blankNode(self, uri=None): | |
| 832 return self._store.newBlankNode(self._context, uri, why=self._reason2) | |
| 833 | |
| 834 def path(self, argstr, i, res): | |
| 835 """Parse the path production. | |
| 836 """ | |
| 837 j = self.nodeOrLiteral(argstr, i, res) | |
| 838 if j < 0: | |
| 839 return j # nope | |
| 840 | |
| 841 while argstr[j:j + 1] in "!^": # no spaces, must follow exactly (?) | |
| 842 ch = argstr[j:j + 1] | |
| 843 subj = res.pop() | |
| 844 obj = self.blankNode(uri=self.here(j)) | |
| 845 j = self.node(argstr, j + 1, res) | |
| 846 if j < 0: | |
| 847 self.BadSyntax(argstr, j, | |
| 848 "EOF found in middle of path syntax") | |
| 849 pred = res.pop() | |
| 850 if ch == "^": # Reverse traverse | |
| 851 self.makeStatement((self._context, pred, obj, subj)) | |
| 852 else: | |
| 853 self.makeStatement((self._context, pred, subj, obj)) | |
| 854 res.append(obj) | |
| 855 return j | |
| 856 | |
| 857 def anonymousNode(self, ln): | |
| 858 """Remember or generate a term for one of these _: anonymous nodes""" | |
| 859 term = self._anonymousNodes.get(ln, None) | |
| 860 if term is not None: | |
| 861 return term | |
| 862 term = self._store.newBlankNode(self._context, why=self._reason2) | |
| 863 self._anonymousNodes[ln] = term | |
| 864 return term | |
| 865 | |
| 866 def node(self, argstr, i, res, subjectAlready=None): | |
| 867 """Parse the <node> production. | |
| 868 Space is now skipped once at the beginning | |
| 869 instead of in multipe calls to self.skipSpace(). | |
| 870 """ | |
| 871 subj = subjectAlready | |
| 872 | |
| 873 j = self.skipSpace(argstr, i) | |
| 874 if j < 0: | |
| 875 return j # eof | |
| 876 i = j | |
| 877 ch = argstr[i:i + 1] # Quick 1-character checks first: | |
| 878 | |
| 879 if ch == "[": | |
| 880 bnodeID = self.here(i) | |
| 881 j = self.skipSpace(argstr, i + 1) | |
| 882 if j < 0: | |
| 883 self.BadSyntax(argstr, i, | |
| 884 "EOF after '['") | |
| 885 # Hack for "is" binding name to anon node | |
| 886 if argstr[j:j + 1] == "=": | |
| 887 if self.turtle: | |
| 888 self.BadSyntax(argstr, j, "Found '[=' or '[ =' when in turtle mode.") | |
| 889 i = j + 1 | |
| 890 objs = [] | |
| 891 j = self.objectList(argstr, i, objs) | |
| 892 if j >= 0: | |
| 893 subj = objs[0] | |
| 894 if len(objs) > 1: | |
| 895 for obj in objs: | |
| 896 self.makeStatement((self._context, | |
| 897 DAML_sameAs, subj, obj)) | |
| 898 j = self.skipSpace(argstr, j) | |
| 899 if j < 0: | |
| 900 self.BadSyntax(argstr, i, | |
| 901 "EOF when objectList expected after [ = ") | |
| 902 if argstr[j:j + 1] == ";": | |
| 903 j = j + 1 | |
| 904 else: | |
| 905 self.BadSyntax(argstr, i, | |
| 906 "objectList expected after [= ") | |
| 907 | |
| 908 if subj is None: | |
| 909 subj = self.blankNode(uri=bnodeID) | |
| 910 | |
| 911 i = self.property_list(argstr, j, subj) | |
| 912 if i < 0: | |
| 913 self.BadSyntax(argstr, j, | |
| 914 "property_list expected") | |
| 915 | |
| 916 j = self.skipSpace(argstr, i) | |
| 917 if j < 0: | |
| 918 self.BadSyntax(argstr, i, | |
| 919 "EOF when ']' expected after [ <propertyList>") | |
| 920 if argstr[j:j + 1] != "]": | |
| 921 self.BadSyntax(argstr, j, | |
| 922 "']' expected") | |
| 923 res.append(subj) | |
| 924 return j + 1 | |
| 925 | |
| 926 if not self.turtle and ch == "{": | |
| 927 # if self.turtle: | |
| 928 # self.BadSyntax(argstr, i, | |
| 929 # "found '{' while in Turtle mode, Formulas not supported!") | |
| 930 ch2 = argstr[i + 1:i + 2] | |
| 931 if ch2 == '$': | |
| 932 # a set | |
| 933 i += 1 | |
| 934 j = i + 1 | |
| 935 List = [] | |
| 936 first_run = True | |
| 937 while 1: | |
| 938 i = self.skipSpace(argstr, j) | |
| 939 if i < 0: | |
| 940 self.BadSyntax(argstr, i, | |
| 941 "needed '$}', found end.") | |
| 942 if argstr[i:i + 2] == '$}': | |
| 943 j = i + 2 | |
| 944 break | |
| 945 | |
| 946 if not first_run: | |
| 947 if argstr[i:i + 1] == ',': | |
| 948 i += 1 | |
| 949 else: | |
| 950 self.BadSyntax( | |
| 951 argstr, i, "expected: ','") | |
| 952 else: | |
| 953 first_run = False | |
| 954 | |
| 955 item = [] | |
| 956 j = self.item( | |
| 957 argstr, i, item) # @@@@@ should be path, was object | |
| 958 if j < 0: | |
| 959 self.BadSyntax(argstr, i, | |
| 960 "expected item in set or '$}'") | |
| 961 List.append(self._store.intern(item[0])) | |
| 962 res.append(self._store.newSet(List, self._context)) | |
| 963 return j | |
| 964 else: | |
| 965 # parse a formula | |
| 966 j = i + 1 | |
| 967 oldParentContext = self._parentContext | |
| 968 self._parentContext = self._context | |
| 969 parentAnonymousNodes = self._anonymousNodes | |
| 970 grandParentVariables = self._parentVariables | |
| 971 self._parentVariables = self._variables | |
| 972 self._anonymousNodes = {} | |
| 973 self._variables = self._variables.copy() | |
| 974 reason2 = self._reason2 | |
| 975 self._reason2 = becauseSubexpression | |
| 976 if subj is None: | |
| 977 subj = self._store.newFormula() | |
| 978 self._context = subj | |
| 979 | |
| 980 while 1: | |
| 981 i = self.skipSpace(argstr, j) | |
| 982 if i < 0: | |
| 983 self.BadSyntax( | |
| 984 argstr, i, "needed '}', found end.") | |
| 985 | |
| 986 if argstr[i:i + 1] == "}": | |
| 987 j = i + 1 | |
| 988 break | |
| 989 | |
| 990 j = self.directiveOrStatement(argstr, i) | |
| 991 if j < 0: | |
| 992 self.BadSyntax( | |
| 993 argstr, i, "expected statement or '}'") | |
| 994 | |
| 995 self._anonymousNodes = parentAnonymousNodes | |
| 996 self._variables = self._parentVariables | |
| 997 self._parentVariables = grandParentVariables | |
| 998 self._context = self._parentContext | |
| 999 self._reason2 = reason2 | |
| 1000 self._parentContext = oldParentContext | |
| 1001 res.append(subj.close()) # No use until closed | |
| 1002 return j | |
| 1003 | |
| 1004 if ch == "(": | |
| 1005 thing_type = self._store.newList | |
| 1006 ch2 = argstr[i + 1:i + 2] | |
| 1007 if ch2 == '$': | |
| 1008 thing_type = self._store.newSet | |
| 1009 i += 1 | |
| 1010 j = i + 1 | |
| 1011 | |
| 1012 List = [] | |
| 1013 while 1: | |
| 1014 i = self.skipSpace(argstr, j) | |
| 1015 if i < 0: | |
| 1016 self.BadSyntax( | |
| 1017 argstr, i, "needed ')', found end.") | |
| 1018 if argstr[i:i + 1] == ')': | |
| 1019 j = i + 1 | |
| 1020 break | |
| 1021 | |
| 1022 item = [] | |
| 1023 j = self.item( | |
| 1024 argstr, i, item) # @@@@@ should be path, was object | |
| 1025 if j < 0: | |
| 1026 self.BadSyntax(argstr, i, | |
| 1027 "expected item in list or ')'") | |
| 1028 List.append(self._store.intern(item[0])) | |
| 1029 res.append(thing_type(List, self._context)) | |
| 1030 return j | |
| 1031 | |
| 1032 j = self.tok('this', argstr, i) # This context | |
| 1033 if j >= 0: | |
| 1034 self.BadSyntax(argstr, i, | |
| 1035 "Keyword 'this' was ancient N3. Now use " + | |
| 1036 "@forSome and @forAll keywords.") | |
| 1037 | |
| 1038 # booleans | |
| 1039 j = self.tok('true', argstr, i) | |
| 1040 if j >= 0: | |
| 1041 res.append(True) | |
| 1042 return j | |
| 1043 j = self.tok('false', argstr, i) | |
| 1044 if j >= 0: | |
| 1045 res.append(False) | |
| 1046 return j | |
| 1047 | |
| 1048 if subj is None: # If this can be a named node, then check for a name. | |
| 1049 j = self.uri_ref2(argstr, i, res) | |
| 1050 if j >= 0: | |
| 1051 return j | |
| 1052 | |
| 1053 return -1 | |
| 1054 | |
| 1055 def property_list(self, argstr, i, subj): | |
| 1056 """Parse property list | |
| 1057 Leaves the terminating punctuation in the buffer | |
| 1058 """ | |
| 1059 while 1: | |
| 1060 while 1: # skip repeat ; | |
| 1061 j = self.skipSpace(argstr, i) | |
| 1062 if j < 0: | |
| 1063 self.BadSyntax(argstr, i, | |
| 1064 "EOF found when expected verb in property list") | |
| 1065 if argstr[j]!=';': break | |
| 1066 i = j+1 | |
| 1067 | |
| 1068 if argstr[j:j + 2] == ":-": | |
| 1069 if self.turtle: | |
| 1070 self.BadSyntax(argstr, j, "Found in ':-' in Turtle mode") | |
| 1071 i = j + 2 | |
| 1072 res = [] | |
| 1073 j = self.node(argstr, i, res, subj) | |
| 1074 if j < 0: | |
| 1075 self.BadSyntax(argstr, i, | |
| 1076 "bad {} or () or [] node after :- ") | |
| 1077 i = j | |
| 1078 continue | |
| 1079 i = j | |
| 1080 v = [] | |
| 1081 j = self.verb(argstr, i, v) | |
| 1082 if j <= 0: | |
| 1083 return i # void but valid | |
| 1084 | |
| 1085 objs = [] | |
| 1086 i = self.objectList(argstr, j, objs) | |
| 1087 if i < 0: | |
| 1088 self.BadSyntax(argstr, j, | |
| 1089 "objectList expected") | |
| 1090 for obj in objs: | |
| 1091 dira, sym = v[0] | |
| 1092 if dira == '->': | |
| 1093 self.makeStatement((self._context, sym, subj, obj)) | |
| 1094 else: | |
| 1095 self.makeStatement((self._context, sym, obj, subj)) | |
| 1096 | |
| 1097 j = self.skipSpace(argstr, i) | |
| 1098 if j < 0: | |
| 1099 self.BadSyntax(argstr, j, | |
| 1100 "EOF found in list of objects") | |
| 1101 if argstr[i:i + 1] != ";": | |
| 1102 return i | |
| 1103 i = i + 1 # skip semicolon and continue | |
| 1104 | |
| 1105 def commaSeparatedList(self, argstr, j, res, what): | |
| 1106 """return value: -1 bad syntax; >1 new position in argstr | |
| 1107 res has things found appended | |
| 1108 """ | |
| 1109 i = self.skipSpace(argstr, j) | |
| 1110 if i < 0: | |
| 1111 self.BadSyntax(argstr, i, | |
| 1112 "EOF found expecting comma sep list") | |
| 1113 if argstr[i] == ".": | |
| 1114 return j # empty list is OK | |
| 1115 i = what(argstr, i, res) | |
| 1116 if i < 0: | |
| 1117 return -1 | |
| 1118 | |
| 1119 while 1: | |
| 1120 j = self.skipSpace(argstr, i) | |
| 1121 if j < 0: | |
| 1122 return j # eof | |
| 1123 ch = argstr[j:j + 1] | |
| 1124 if ch != ",": | |
| 1125 if ch != ".": | |
| 1126 return -1 | |
| 1127 return j # Found but not swallowed "." | |
| 1128 i = what(argstr, j + 1, res) | |
| 1129 if i < 0: | |
| 1130 self.BadSyntax(argstr, i, | |
| 1131 "bad list content") | |
| 1132 | |
| 1133 def objectList(self, argstr, i, res): | |
| 1134 i = self.object(argstr, i, res) | |
| 1135 if i < 0: | |
| 1136 return -1 | |
| 1137 while 1: | |
| 1138 j = self.skipSpace(argstr, i) | |
| 1139 if j < 0: | |
| 1140 self.BadSyntax(argstr, j, | |
| 1141 "EOF found after object") | |
| 1142 if argstr[j:j + 1] != ",": | |
| 1143 return j # Found something else! | |
| 1144 i = self.object(argstr, j + 1, res) | |
| 1145 if i < 0: | |
| 1146 return i | |
| 1147 | |
| 1148 def checkDot(self, argstr, i): | |
| 1149 j = self.skipSpace(argstr, i) | |
| 1150 if j < 0: | |
| 1151 return j # eof | |
| 1152 if argstr[j:j + 1] == ".": | |
| 1153 return j + 1 # skip | |
| 1154 if argstr[j:j + 1] == "}": | |
| 1155 return j # don't skip it | |
| 1156 if argstr[j:j + 1] == "]": | |
| 1157 return j | |
| 1158 self.BadSyntax(argstr, j, | |
| 1159 "expected '.' or '}' or ']' at end of statement") | |
| 1160 | |
| 1161 def uri_ref2(self, argstr, i, res): | |
| 1162 """Generate uri from n3 representation. | |
| 1163 | |
| 1164 Note that the RDF convention of directly concatenating | |
| 1165 NS and local name is now used though I prefer inserting a '#' | |
| 1166 to make the namesapces look more like what XML folks expect. | |
| 1167 """ | |
| 1168 qn = [] | |
| 1169 j = self.qname(argstr, i, qn) | |
| 1170 if j >= 0: | |
| 1171 pfx, ln = qn[0] | |
| 1172 if pfx is None: | |
| 1173 assert 0, "not used?" | |
| 1174 ns = self._baseURI + ADDED_HASH | |
| 1175 else: | |
| 1176 try: | |
| 1177 ns = self._bindings[pfx] | |
| 1178 except KeyError: | |
| 1179 if pfx == "_": # Magic prefix 2001/05/30, can be changed | |
| 1180 res.append(self.anonymousNode(ln)) | |
| 1181 return j | |
| 1182 if not self.turtle and pfx == "": | |
| 1183 ns = join(self._baseURI or "", "#") | |
| 1184 else: | |
| 1185 self.BadSyntax(argstr, i, | |
| 1186 "Prefix \"%s:\" not bound" % (pfx)) | |
| 1187 symb = self._store.newSymbol(ns + ln) | |
| 1188 if symb in self._variables: | |
| 1189 res.append(self._variables[symb]) | |
| 1190 else: | |
| 1191 res.append(symb) # @@@ "#" CONVENTION | |
| 1192 return j | |
| 1193 | |
| 1194 i = self.skipSpace(argstr, i) | |
| 1195 if i < 0: | |
| 1196 return -1 | |
| 1197 | |
| 1198 if argstr[i] == "?": | |
| 1199 v = [] | |
| 1200 j = self.variable(argstr, i, v) | |
| 1201 if j > 0: # Forget varibles as a class, only in context. | |
| 1202 res.append(v[0]) | |
| 1203 return j | |
| 1204 return -1 | |
| 1205 | |
| 1206 elif argstr[i] == "<": | |
| 1207 i = i + 1 | |
| 1208 st = i | |
| 1209 while i < len(argstr): | |
| 1210 if argstr[i] == ">": | |
| 1211 uref = argstr[st:i] # the join should dealt with "": | |
| 1212 | |
| 1213 # expand unicode escapes | |
| 1214 uref = unicodeEscape8.sub(unicodeExpand, uref) | |
| 1215 uref = unicodeEscape4.sub(unicodeExpand, uref) | |
| 1216 | |
| 1217 if self._baseURI: | |
| 1218 uref = join(self._baseURI, uref) # was: uripath.join | |
| 1219 else: | |
| 1220 assert ":" in uref, \ | |
| 1221 "With no base URI, cannot deal with relative URIs" | |
| 1222 if argstr[i - 1:i] == "#" and not uref[-1:] == "#": | |
| 1223 uref = uref + \ | |
| 1224 "#" # She meant it! Weirdness in urlparse? | |
| 1225 symb = self._store.newSymbol(uref) | |
| 1226 if symb in self._variables: | |
| 1227 res.append(self._variables[symb]) | |
| 1228 else: | |
| 1229 res.append(symb) | |
| 1230 return i + 1 | |
| 1231 i = i + 1 | |
| 1232 self.BadSyntax(argstr, j, | |
| 1233 "unterminated URI reference") | |
| 1234 | |
| 1235 elif self.keywordsSet: | |
| 1236 v = [] | |
| 1237 j = self.bareWord(argstr, i, v) | |
| 1238 if j < 0: | |
| 1239 return -1 # Forget varibles as a class, only in context. | |
| 1240 if v[0] in self.keywords: | |
| 1241 self.BadSyntax(argstr, i, | |
| 1242 'Keyword "%s" not allowed here.' % v[0]) | |
| 1243 res.append(self._store.newSymbol(self._bindings[""] + v[0])) | |
| 1244 return j | |
| 1245 else: | |
| 1246 return -1 | |
| 1247 | |
| 1248 def skipSpace(self, argstr, i): | |
| 1249 """Skip white space, newlines and comments. | |
| 1250 return -1 if EOF, else position of first non-ws character""" | |
| 1251 while 1: | |
| 1252 m = eol.match(argstr, i) | |
| 1253 if m is None: | |
| 1254 break | |
| 1255 self.lines = self.lines + 1 | |
| 1256 i = m.end() # Point to first character unmatched | |
| 1257 self.startOfLine = i | |
| 1258 m = ws.match(argstr, i) | |
| 1259 if m is not None: | |
| 1260 i = m.end() | |
| 1261 m = eof.match(argstr, i) | |
| 1262 if m is not None: | |
| 1263 return -1 | |
| 1264 return i | |
| 1265 | |
| 1266 def variable(self, argstr, i, res): | |
| 1267 """ ?abc -> variable(:abc) | |
| 1268 """ | |
| 1269 | |
| 1270 j = self.skipSpace(argstr, i) | |
| 1271 if j < 0: | |
| 1272 return -1 | |
| 1273 | |
| 1274 if argstr[j:j + 1] != "?": | |
| 1275 return -1 | |
| 1276 j = j + 1 | |
| 1277 i = j | |
| 1278 if argstr[j] in "0123456789-": | |
| 1279 self.BadSyntax(argstr, j, | |
| 1280 "Varible name can't start with '%s'" % argstr[j]) | |
| 1281 while i < len(argstr) and argstr[i] not in _notKeywordsChars: | |
| 1282 i = i + 1 | |
| 1283 if self._parentContext is None: | |
| 1284 varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i]) | |
| 1285 if varURI not in self._variables: | |
| 1286 self._variables[varURI] = self._context.newUniversal( | |
| 1287 varURI, why=self._reason2) | |
| 1288 res.append(self._variables[varURI]) | |
| 1289 return i | |
| 1290 # @@ was: | |
| 1291 # self.BadSyntax(argstr, j, | |
| 1292 # "Can't use ?xxx syntax for variable in outermost level: %s" | |
| 1293 # % argstr[j-1:i]) | |
| 1294 varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i]) | |
| 1295 if varURI not in self._parentVariables: | |
| 1296 self._parentVariables[varURI] = self._parentContext.newUniversal( | |
| 1297 varURI, why=self._reason2) | |
| 1298 res.append(self._parentVariables[varURI]) | |
| 1299 return i | |
| 1300 | |
| 1301 def bareWord(self, argstr, i, res): | |
| 1302 """ abc -> :abc | |
| 1303 """ | |
| 1304 j = self.skipSpace(argstr, i) | |
| 1305 if j < 0: | |
| 1306 return -1 | |
| 1307 | |
| 1308 if argstr[j] in "0123456789-" or argstr[j] in _notKeywordsChars: | |
| 1309 return -1 | |
| 1310 i = j | |
| 1311 while i < len(argstr) and argstr[i] not in _notKeywordsChars: | |
| 1312 i = i + 1 | |
| 1313 res.append(argstr[j:i]) | |
| 1314 return i | |
| 1315 | |
| 1316 def qname(self, argstr, i, res): | |
| 1317 """ | |
| 1318 xyz:def -> ('xyz', 'def') | |
| 1319 If not in keywords and keywordsSet: def -> ('', 'def') | |
| 1320 :def -> ('', 'def') | |
| 1321 """ | |
| 1322 | |
| 1323 i = self.skipSpace(argstr, i) | |
| 1324 if i < 0: | |
| 1325 return -1 | |
| 1326 | |
| 1327 c = argstr[i] | |
| 1328 if c in "0123456789-+.": | |
| 1329 return -1 | |
| 1330 if c not in _notNameChars: | |
| 1331 ln = c | |
| 1332 i = i + 1 | |
| 1333 while i < len(argstr): | |
| 1334 c = argstr[i] | |
| 1335 if c not in _notNameChars: | |
| 1336 ln = ln + c | |
| 1337 i = i + 1 | |
| 1338 else: | |
| 1339 break | |
| 1340 | |
| 1341 if argstr[i - 1] == ".": # qname cannot end with "." | |
| 1342 ln = ln[:-1] | |
| 1343 if not ln: return -1 | |
| 1344 i -= 1 | |
| 1345 | |
| 1346 else: # First character is non-alpha | |
| 1347 ln = '' # Was: None - TBL (why? useful?) | |
| 1348 | |
| 1349 if i < len(argstr) and argstr[i] == ':': | |
| 1350 pfx = ln | |
| 1351 # bnodes names have different rules | |
| 1352 if pfx == '_': | |
| 1353 allowedChars = _notNameChars | |
| 1354 else: | |
| 1355 allowedChars = _notQNameChars | |
| 1356 | |
| 1357 i = i + 1 | |
| 1358 lastslash = False | |
| 1359 # start = i # TODO first char . | |
| 1360 ln = '' | |
| 1361 while i < len(argstr): | |
| 1362 c = argstr[i] | |
| 1363 if not lastslash and c == '\\': | |
| 1364 lastslash = True | |
| 1365 i += 1 | |
| 1366 | |
| 1367 elif lastslash or c not in allowedChars: | |
| 1368 | |
| 1369 if lastslash: | |
| 1370 if c not in escapeChars: | |
| 1371 raise BadSyntax(self._thisDoc, self.line, argstr, i, | |
| 1372 "illegal escape "+c) | |
| 1373 elif c=='%': | |
| 1374 if argstr[i+1] not in hexChars or argstr[i+2] not in hexChars: | |
| 1375 raise BadSyntax(self._thisDoc, self.line, argstr, i, | |
| 1376 "illegal hex escape "+c) | |
| 1377 | |
| 1378 ln = ln + c | |
| 1379 i = i + 1 | |
| 1380 lastslash = False | |
| 1381 else: | |
| 1382 break | |
| 1383 | |
| 1384 if lastslash: | |
| 1385 raise BadSyntax( | |
| 1386 self._thisDoc, self.line, argstr, i, | |
| 1387 "qname cannot end with \\") | |
| 1388 | |
| 1389 | |
| 1390 if argstr[i-1]=='.': | |
| 1391 # localname cannot end in . | |
| 1392 ln = ln[:-1] | |
| 1393 if not ln: return -1 | |
| 1394 i -= 1 | |
| 1395 | |
| 1396 res.append((pfx, ln)) | |
| 1397 return i | |
| 1398 | |
| 1399 else: # delimiter was not ":" | |
| 1400 if ln and self.keywordsSet and ln not in self.keywords: | |
| 1401 res.append(('', ln)) | |
| 1402 return i | |
| 1403 return -1 | |
| 1404 | |
| 1405 def object(self, argstr, i, res): | |
| 1406 j = self.subject(argstr, i, res) | |
| 1407 if j >= 0: | |
| 1408 return j | |
| 1409 else: | |
| 1410 j = self.skipSpace(argstr, i) | |
| 1411 if j < 0: | |
| 1412 return -1 | |
| 1413 else: | |
| 1414 i = j | |
| 1415 | |
| 1416 if argstr[i] in self.string_delimiters: | |
| 1417 if argstr[i:i + 3] == argstr[i] * 3: | |
| 1418 delim = argstr[i] * 3 | |
| 1419 else: | |
| 1420 delim = argstr[i] | |
| 1421 i = i + len(delim) | |
| 1422 | |
| 1423 j, s = self.strconst(argstr, i, delim) | |
| 1424 | |
| 1425 res.append(self._store.newLiteral(s)) | |
| 1426 return j | |
| 1427 else: | |
| 1428 return -1 | |
| 1429 | |
| 1430 def nodeOrLiteral(self, argstr, i, res): | |
| 1431 j = self.node(argstr, i, res) | |
| 1432 startline = self.lines # Remember where for error messages | |
| 1433 if j >= 0: | |
| 1434 return j | |
| 1435 else: | |
| 1436 j = self.skipSpace(argstr, i) | |
| 1437 if j < 0: | |
| 1438 return -1 | |
| 1439 else: | |
| 1440 i = j | |
| 1441 | |
| 1442 ch = argstr[i] | |
| 1443 if ch in "-+0987654321.": | |
| 1444 m = exponent_syntax.match(argstr, i) | |
| 1445 if m: | |
| 1446 j = m.end() | |
| 1447 res.append(float(argstr[i:j])) | |
| 1448 return j | |
| 1449 | |
| 1450 m = decimal_syntax.match(argstr, i) | |
| 1451 if m: | |
| 1452 j = m.end() | |
| 1453 res.append(Decimal(argstr[i:j])) | |
| 1454 return j | |
| 1455 | |
| 1456 m = integer_syntax.match(argstr, i) | |
| 1457 if m: | |
| 1458 j = m.end() | |
| 1459 res.append(int(argstr[i:j])) | |
| 1460 return j | |
| 1461 | |
| 1462 # return -1 ## or fall through? | |
| 1463 | |
| 1464 if argstr[i] in self.string_delimiters: | |
| 1465 if argstr[i:i + 3] == argstr[i] * 3: | |
| 1466 delim = argstr[i] * 3 | |
| 1467 else: | |
| 1468 delim = argstr[i] | |
| 1469 i = i + len(delim) | |
| 1470 | |
| 1471 dt = None | |
| 1472 j, s = self.strconst(argstr, i, delim) | |
| 1473 lang = None | |
| 1474 if argstr[j:j + 1] == "@": # Language? | |
| 1475 m = langcode.match(argstr, j + 1) | |
| 1476 if m is None: | |
| 1477 raise BadSyntax( | |
| 1478 self._thisDoc, startline, argstr, i, | |
| 1479 "Bad language code syntax on string " + | |
| 1480 "literal, after @") | |
| 1481 i = m.end() | |
| 1482 lang = argstr[j + 1:i] | |
| 1483 j = i | |
| 1484 if argstr[j:j + 2] == "^^": | |
| 1485 res2 = [] | |
| 1486 j = self.uri_ref2(argstr, j + 2, res2) # Read datatype URI | |
| 1487 dt = res2[0] | |
| 1488 res.append(self._store.newLiteral(s, dt, lang)) | |
| 1489 return j | |
| 1490 else: | |
| 1491 return -1 | |
| 1492 | |
| 1493 def uriOf(self, sym): | |
| 1494 if isinstance(sym, tuple): | |
| 1495 return sym[1] # old system for --pipe | |
| 1496 # return sym.uriref() # cwm api | |
| 1497 return sym | |
| 1498 | |
| 1499 def strconst(self, argstr, i, delim): | |
| 1500 """parse an N3 string constant delimited by delim. | |
| 1501 return index, val | |
| 1502 """ | |
| 1503 delim1 = delim[0] | |
| 1504 delim2, delim3, delim4, delim5 = delim1 * 2, delim1 * 3, delim1 * 4, delim1 * 5 | |
| 1505 | |
| 1506 j = i | |
| 1507 ustr = "" # Empty unicode string | |
| 1508 startline = self.lines # Remember where for error messages | |
| 1509 while j < len(argstr): | |
| 1510 if argstr[j] == delim1: | |
| 1511 if delim == delim1: # done when delim is " or ' | |
| 1512 i = j + 1 | |
| 1513 return i, ustr | |
| 1514 if delim == delim3: # done when delim is """ or ''' and, respectively ... | |
| 1515 if argstr[j:j + 5] == delim5: # ... we have "" or '' before | |
| 1516 i = j + 5 | |
| 1517 ustr = ustr + delim2 | |
| 1518 return i, ustr | |
| 1519 if argstr[j:j + 4] == delim4: # ... we have " or ' before | |
| 1520 i = j + 4 | |
| 1521 ustr = ustr + delim1 | |
| 1522 return i, ustr | |
| 1523 if argstr[j:j + 3] == delim3: # current " or ' is part of delim | |
| 1524 i = j + 3 | |
| 1525 return i, ustr | |
| 1526 | |
| 1527 # we are inside of the string and current char is " or ' | |
| 1528 j = j + 1 | |
| 1529 ustr = ustr + delim1 | |
| 1530 continue | |
| 1531 | |
| 1532 m = interesting.search(argstr, j) # was argstr[j:]. | |
| 1533 # Note for pos param to work, MUST be compiled ... re bug? | |
| 1534 assert m, "Quote expected in string at ^ in %s^%s" % ( | |
| 1535 argstr[j - 20:j], argstr[j:j + 20]) # at least need a quote | |
| 1536 | |
| 1537 i = m.start() | |
| 1538 try: | |
| 1539 ustr = ustr + argstr[j:i] | |
| 1540 except UnicodeError: | |
| 1541 err = "" | |
| 1542 for c in argstr[j:i]: | |
| 1543 err = err + (" %02x" % ord(c)) | |
| 1544 streason = sys.exc_info()[1].__str__() | |
| 1545 raise BadSyntax( | |
| 1546 self._thisDoc, startline, argstr, j, | |
| 1547 "Unicode error appending characters" + | |
| 1548 " %s to string, because\n\t%s" | |
| 1549 % (err, streason)) | |
| 1550 | |
| 1551 # print "@@@ i = ",i, " j=",j, "m.end=", m.end() | |
| 1552 | |
| 1553 ch = argstr[i] | |
| 1554 if ch == delim1: | |
| 1555 j = i | |
| 1556 continue | |
| 1557 elif ch in ('"', "'") and ch != delim1: | |
| 1558 ustr = ustr + ch | |
| 1559 j = i + 1 | |
| 1560 continue | |
| 1561 elif ch in "\r\n": | |
| 1562 if delim == delim1: | |
| 1563 raise BadSyntax( | |
| 1564 self._thisDoc, startline, argstr, i, | |
| 1565 "newline found in string literal") | |
| 1566 self.lines = self.lines + 1 | |
| 1567 ustr = ustr + ch | |
| 1568 j = i + 1 | |
| 1569 self.startOfLine = j | |
| 1570 | |
| 1571 elif ch == "\\": | |
| 1572 j = i + 1 | |
| 1573 ch = argstr[j:j + 1] # Will be empty if string ends | |
| 1574 if not ch: | |
| 1575 raise BadSyntax( | |
| 1576 self._thisDoc, startline, argstr, i, | |
| 1577 "unterminated string literal (2)") | |
| 1578 k = 'abfrtvn\\"'.find(ch) | |
| 1579 if k >= 0: | |
| 1580 uch = '\a\b\f\r\t\v\n\\"'[k] | |
| 1581 ustr = ustr + uch | |
| 1582 j = j + 1 | |
| 1583 elif ch == "u": | |
| 1584 j, ch = self.uEscape(argstr, j + 1, startline) | |
| 1585 ustr = ustr + ch | |
| 1586 elif ch == "U": | |
| 1587 j, ch = self.UEscape(argstr, j + 1, startline) | |
| 1588 ustr = ustr + ch | |
| 1589 else: | |
| 1590 self.BadSyntax(argstr, i, | |
| 1591 "bad escape") | |
| 1592 | |
| 1593 self.BadSyntax(argstr, i, | |
| 1594 "unterminated string literal") | |
| 1595 | |
| 1596 def _unicodeEscape(self, argstr, i, startline, reg, n, prefix): | |
| 1597 if len(argstr)<i+n: | |
| 1598 raise BadSyntax( | |
| 1599 self._thisDoc, startline, argstr, i, | |
| 1600 "unterminated string literal(3)") | |
| 1601 try: | |
| 1602 return i+n, reg.sub(unicodeExpand, '\\'+prefix+argstr[i:i+n]) | |
| 1603 except: | |
| 1604 raise BadSyntax( | |
| 1605 self._thisDoc, startline, argstr, i, | |
| 1606 "bad string literal hex escape: "+argstr[i:i+n]) | |
| 1607 | |
| 1608 def uEscape(self, argstr, i, startline): | |
| 1609 return self._unicodeEscape(argstr, i, startline, unicodeEscape4, 4, 'u') | |
| 1610 | |
| 1611 def UEscape(self, argstr, i, startline): | |
| 1612 return self._unicodeEscape(argstr, i, startline, unicodeEscape8, 8, 'U') | |
| 1613 | |
| 1614 def BadSyntax(self, argstr, i, msg): | |
| 1615 raise BadSyntax(self._thisDoc, self.lines, argstr, i, msg) | |
| 1616 | |
| 1617 # If we are going to do operators then they should generate | |
| 1618 # [ is operator:plus of ( \1 \2 ) ] | |
| 1619 | |
| 1620 | |
| 1621 class BadSyntax(SyntaxError): | |
| 1622 def __init__(self, uri, lines, argstr, i, why): | |
| 1623 self._str = argstr.encode( | |
| 1624 'utf-8') # Better go back to strings for errors | |
| 1625 self._i = i | |
| 1626 self._why = why | |
| 1627 self.lines = lines | |
| 1628 self._uri = uri | |
| 1629 | |
| 1630 def __str__(self): | |
| 1631 argstr = self._str | |
| 1632 i = self._i | |
| 1633 st = 0 | |
| 1634 if i > 60: | |
| 1635 pre = "..." | |
| 1636 st = i - 60 | |
| 1637 else: | |
| 1638 pre = "" | |
| 1639 if len(argstr) - i > 60: | |
| 1640 post = "..." | |
| 1641 else: | |
| 1642 post = "" | |
| 1643 | |
| 1644 return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' \ | |
| 1645 % (self.lines + 1, self._uri, self._why, pre, | |
| 1646 argstr[st:i], argstr[i:i + 60], post) | |
| 1647 | |
| 1648 @property | |
| 1649 def message(self): | |
| 1650 return str(self) | |
| 1651 | |
| 1652 | |
| 1653 | |
| 1654 ############################################################################### | |
| 1655 class Formula(object): | |
| 1656 number = 0 | |
| 1657 | |
| 1658 def __init__(self, parent): | |
| 1659 self.uuid = uuid4().hex | |
| 1660 self.counter = 0 | |
| 1661 Formula.number += 1 | |
| 1662 self.number = Formula.number | |
| 1663 self.existentials = {} | |
| 1664 self.universals = {} | |
| 1665 | |
| 1666 self.quotedgraph = QuotedGraph( | |
| 1667 store=parent.store, identifier=self.id()) | |
| 1668 | |
| 1669 def __str__(self): | |
| 1670 return '_:Formula%s' % self.number | |
| 1671 | |
| 1672 def id(self): | |
| 1673 return BNode('_:Formula%s' % self.number) | |
| 1674 | |
| 1675 def newBlankNode(self, uri=None, why=None): | |
| 1676 if uri is None: | |
| 1677 self.counter += 1 | |
| 1678 bn = BNode('f%sb%s' % (self.uuid, self.counter)) | |
| 1679 else: | |
| 1680 bn = BNode(uri.split('#').pop().replace('_', 'b')) | |
| 1681 return bn | |
| 1682 | |
| 1683 def newUniversal(self, uri, why=None): | |
| 1684 return Variable(uri.split('#').pop()) | |
| 1685 | |
| 1686 def declareExistential(self, x): | |
| 1687 self.existentials[x] = self.newBlankNode() | |
| 1688 | |
| 1689 def close(self): | |
| 1690 | |
| 1691 return self.quotedgraph | |
| 1692 | |
| 1693 | |
| 1694 r_hibyte = re.compile(r'([\x80-\xff])') | |
| 1695 | |
| 1696 | |
| 1697 class RDFSink(object): | |
| 1698 def __init__(self, graph): | |
| 1699 self.rootFormula = None | |
| 1700 self.counter = 0 | |
| 1701 self.graph = graph | |
| 1702 | |
| 1703 def newFormula(self): | |
| 1704 assert self.graph.store.formula_aware | |
| 1705 f = Formula(self.graph) | |
| 1706 return f | |
| 1707 | |
| 1708 def newGraph(self, identifier): | |
| 1709 return Graph(self.graph.store, identifier) | |
| 1710 | |
| 1711 def newSymbol(self, *args): | |
| 1712 return URIRef(args[0]) | |
| 1713 | |
| 1714 def newBlankNode(self, arg=None, uri=None, why=None): | |
| 1715 if isinstance(arg, Formula): | |
| 1716 return arg.newBlankNode(uri) | |
| 1717 elif isinstance(arg, Graph) or arg is None: | |
| 1718 self.counter += 1 | |
| 1719 bn = BNode('n' + str(self.counter)) | |
| 1720 else: | |
| 1721 bn = BNode(str(arg[0]).split('#').pop().replace('_', 'b')) | |
| 1722 return bn | |
| 1723 | |
| 1724 def newLiteral(self, s, dt, lang): | |
| 1725 if dt: | |
| 1726 return Literal(s, datatype=dt) | |
| 1727 else: | |
| 1728 return Literal(s, lang=lang) | |
| 1729 | |
| 1730 def newList(self, n, f): | |
| 1731 if not n: | |
| 1732 return self.newSymbol( | |
| 1733 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil' | |
| 1734 ) | |
| 1735 | |
| 1736 a = self.newBlankNode(f) | |
| 1737 first = self.newSymbol( | |
| 1738 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first' | |
| 1739 ) | |
| 1740 rest = self.newSymbol( | |
| 1741 'http://www.w3.org/1999/02/22-rdf-syntax-ns#rest') | |
| 1742 self.makeStatement((f, first, a, n[0])) | |
| 1743 self.makeStatement((f, rest, a, self.newList(n[1:], f))) | |
| 1744 return a | |
| 1745 | |
| 1746 def newSet(self, *args): | |
| 1747 return set(args) | |
| 1748 | |
| 1749 def setDefaultNamespace(self, *args): | |
| 1750 return ':'.join(repr(n) for n in args) | |
| 1751 | |
| 1752 def makeStatement(self, quadruple, why=None): | |
| 1753 f, p, s, o = quadruple | |
| 1754 | |
| 1755 if hasattr(p, 'formula'): | |
| 1756 raise Exception("Formula used as predicate") | |
| 1757 | |
| 1758 s = self.normalise(f, s) | |
| 1759 p = self.normalise(f, p) | |
| 1760 o = self.normalise(f, o) | |
| 1761 | |
| 1762 if f == self.rootFormula: | |
| 1763 # print s, p, o, '.' | |
| 1764 self.graph.add((s, p, o)) | |
| 1765 elif isinstance(f, Formula): | |
| 1766 f.quotedgraph.add((s, p, o)) | |
| 1767 else: | |
| 1768 f.add((s,p,o)) | |
| 1769 | |
| 1770 # return str(quadruple) | |
| 1771 | |
| 1772 def normalise(self, f, n): | |
| 1773 if isinstance(n, tuple): | |
| 1774 return URIRef(str(n[1])) | |
| 1775 | |
| 1776 if isinstance(n, bool): | |
| 1777 s = Literal(str(n).lower(), datatype=BOOLEAN_DATATYPE) | |
| 1778 return s | |
| 1779 | |
| 1780 if isinstance(n, int) or isinstance(n, int): | |
| 1781 s = Literal(str(n), datatype=INTEGER_DATATYPE) | |
| 1782 return s | |
| 1783 | |
| 1784 if isinstance(n, Decimal): | |
| 1785 value = str(n) | |
| 1786 if value == '-0': | |
| 1787 value = '0' | |
| 1788 s = Literal(value, datatype=DECIMAL_DATATYPE) | |
| 1789 return s | |
| 1790 | |
| 1791 if isinstance(n, float): | |
| 1792 s = Literal(str(n), datatype=DOUBLE_DATATYPE) | |
| 1793 return s | |
| 1794 | |
| 1795 if isinstance(f, Formula): | |
| 1796 if n in f.existentials: | |
| 1797 return f.existentials[n] | |
| 1798 | |
| 1799 # if isinstance(n, Var): | |
| 1800 # if f.universals.has_key(n): | |
| 1801 # return f.universals[n] | |
| 1802 # f.universals[n] = f.newBlankNode() | |
| 1803 # return f.universals[n] | |
| 1804 | |
| 1805 return n | |
| 1806 | |
| 1807 def intern(self, something): | |
| 1808 return something | |
| 1809 | |
| 1810 def bind(self, pfx, uri): | |
| 1811 pass # print pfx, ':', uri | |
| 1812 | |
| 1813 def startDoc(self, formula): | |
| 1814 self.rootFormula = formula | |
| 1815 | |
| 1816 def endDoc(self, formula): | |
| 1817 pass | |
| 1818 | |
| 1819 | |
| 1820 ################################################### | |
| 1821 # | |
| 1822 # Utilities | |
| 1823 # | |
| 1824 | |
| 1825 | |
| 1826 @py3compat.format_doctest_out | |
| 1827 def hexify(ustr): | |
| 1828 """Use URL encoding to return an ASCII string | |
| 1829 corresponding to the given UTF8 string | |
| 1830 | |
| 1831 >>> hexify("http://example/a b") | |
| 1832 %(b)s'http://example/a%%20b' | |
| 1833 | |
| 1834 """ | |
| 1835 # s1=ustr.encode('utf-8') | |
| 1836 s = "" | |
| 1837 for ch in ustr: # .encode('utf-8'): | |
| 1838 if ord(ch) > 126 or ord(ch) < 33: | |
| 1839 ch = "%%%02X" % ord(ch) | |
| 1840 else: | |
| 1841 ch = "%c" % ord(ch) | |
| 1842 s = s + ch | |
| 1843 return b(s) | |
| 1844 | |
| 1845 | |
| 1846 class TurtleParser(Parser): | |
| 1847 | |
| 1848 """ | |
| 1849 An RDFLib parser for Turtle | |
| 1850 | |
| 1851 See http://www.w3.org/TR/turtle/ | |
| 1852 """ | |
| 1853 | |
| 1854 def __init__(self): | |
| 1855 pass | |
| 1856 | |
| 1857 def parse(self, source, graph, encoding="utf-8", turtle=True): | |
| 1858 | |
| 1859 if encoding not in [None, "utf-8"]: | |
| 1860 raise Exception( | |
| 1861 ("N3/Turtle files are always utf-8 encoded, ", | |
| 1862 "I was passed: %s") % encoding) | |
| 1863 | |
| 1864 sink = RDFSink(graph) | |
| 1865 | |
| 1866 baseURI = graph.absolutize( | |
| 1867 source.getPublicId() or source.getSystemId() or "") | |
| 1868 p = SinkParser(sink, baseURI=baseURI, turtle=turtle) | |
| 1869 | |
| 1870 p.loadStream(source.getByteStream()) | |
| 1871 | |
| 1872 for prefix, namespace in list(p._bindings.items()): | |
| 1873 graph.bind(prefix, namespace) | |
| 1874 | |
| 1875 | |
| 1876 class N3Parser(TurtleParser): | |
| 1877 | |
| 1878 """ | |
| 1879 An RDFLib parser for Notation3 | |
| 1880 | |
| 1881 See http://www.w3.org/DesignIssues/Notation3.html | |
| 1882 | |
| 1883 """ | |
| 1884 | |
| 1885 def __init__(self): | |
| 1886 pass | |
| 1887 | |
| 1888 def parse(self, source, graph, encoding="utf-8"): | |
| 1889 # we're currently being handed a Graph, not a ConjunctiveGraph | |
| 1890 assert graph.store.context_aware # is this implied by formula_aware | |
| 1891 assert graph.store.formula_aware | |
| 1892 | |
| 1893 conj_graph = ConjunctiveGraph(store=graph.store) | |
| 1894 conj_graph.default_context = graph # TODO: CG __init__ should have a | |
| 1895 # default_context arg | |
| 1896 # TODO: update N3Processor so that it can use conj_graph as the sink | |
| 1897 conj_graph.namespace_manager = graph.namespace_manager | |
| 1898 | |
| 1899 TurtleParser.parse(self, source, conj_graph, encoding, turtle=False) | |
| 1900 | |
| 1901 | |
| 1902 def _test(): # pragma: no cover | |
| 1903 import doctest | |
| 1904 doctest.testmod() | |
| 1905 | |
| 1906 | |
| 1907 # if __name__ == '__main__': | |
| 1908 # _test() | |
| 1909 | |
| 1910 def main(): # pragma: no cover | |
| 1911 g = ConjunctiveGraph() | |
| 1912 | |
| 1913 sink = RDFSink(g) | |
| 1914 base_uri = 'file://' + os.path.join(os.getcwd(), sys.argv[1]) | |
| 1915 | |
| 1916 p = SinkParser(sink, baseURI=base_uri) | |
| 1917 p._bindings[''] = p._baseURI + '#' | |
| 1918 p.startDoc() | |
| 1919 | |
| 1920 f = open(sys.argv[1], 'rb') | |
| 1921 rdbytes = f.read() | |
| 1922 f.close() | |
| 1923 | |
| 1924 p.feed(rdbytes) | |
| 1925 p.endDoc() | |
| 1926 for t in g.quads((None, None, None)): | |
| 1927 | |
| 1928 print(t) | |
| 1929 | |
| 1930 if __name__ == '__main__': | |
| 1931 main() | |
| 1932 | |
| 1933 # ends |
