Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/rdflib/plugins/serializers/turtle.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 """ | |
| 2 Turtle RDF graph serializer for RDFLib. | |
| 3 See <http://www.w3.org/TeamSubmission/turtle/> for syntax specification. | |
| 4 """ | |
| 5 | |
| 6 from collections import defaultdict | |
| 7 | |
| 8 from rdflib.compat import cmp_to_key | |
| 9 from rdflib.term import BNode, Literal, URIRef | |
| 10 from rdflib.exceptions import Error | |
| 11 from rdflib.serializer import Serializer | |
| 12 from rdflib.namespace import RDF, RDFS | |
| 13 | |
| 14 __all__ = ['RecursiveSerializer', 'TurtleSerializer'] | |
| 15 | |
| 16 def _object_comparator(a,b): | |
| 17 """ | |
| 18 for nice clean output we sort the objects of triples, | |
| 19 some of them are literals, | |
| 20 these are sorted according to the sort order of the underlying python objects | |
| 21 in py3 not all things are comparable. | |
| 22 This falls back on comparing string representations when not. | |
| 23 """ | |
| 24 | |
| 25 try: | |
| 26 if a>b: return 1 | |
| 27 if a<b: return -1 | |
| 28 return 0 | |
| 29 | |
| 30 except TypeError: | |
| 31 a = str(a) | |
| 32 b = str(b) | |
| 33 return (a > b) - (a < b) | |
| 34 | |
| 35 | |
| 36 class RecursiveSerializer(Serializer): | |
| 37 | |
| 38 topClasses = [RDFS.Class] | |
| 39 predicateOrder = [RDF.type, RDFS.label] | |
| 40 maxDepth = 10 | |
| 41 indentString = " " | |
| 42 | |
| 43 def __init__(self, store): | |
| 44 | |
| 45 super(RecursiveSerializer, self).__init__(store) | |
| 46 self.stream = None | |
| 47 self.reset() | |
| 48 | |
| 49 def addNamespace(self, prefix, uri): | |
| 50 if prefix in self.namespaces and self.namespaces[prefix]!=uri: | |
| 51 raise Exception("Trying to override namespace prefix %s => %s, but it's already bound to %s"%(prefix, uri, self.namespaces[prefix])) | |
| 52 self.namespaces[prefix] = uri | |
| 53 | |
| 54 def checkSubject(self, subject): | |
| 55 """Check to see if the subject should be serialized yet""" | |
| 56 if ((self.isDone(subject)) | |
| 57 or (subject not in self._subjects) | |
| 58 or ((subject in self._topLevels) and (self.depth > 1)) | |
| 59 or (isinstance(subject, URIRef) | |
| 60 and (self.depth >= self.maxDepth))): | |
| 61 return False | |
| 62 return True | |
| 63 | |
| 64 def isDone(self, subject): | |
| 65 """Return true if subject is serialized""" | |
| 66 return subject in self._serialized | |
| 67 | |
| 68 def orderSubjects(self): | |
| 69 seen = {} | |
| 70 subjects = [] | |
| 71 | |
| 72 for classURI in self.topClasses: | |
| 73 members = list(self.store.subjects(RDF.type, classURI)) | |
| 74 members.sort() | |
| 75 | |
| 76 for member in members: | |
| 77 subjects.append(member) | |
| 78 self._topLevels[member] = True | |
| 79 seen[member] = True | |
| 80 | |
| 81 recursable = [ | |
| 82 (isinstance(subject, BNode), | |
| 83 self._references[subject], subject) | |
| 84 for subject in self._subjects if subject not in seen] | |
| 85 | |
| 86 recursable.sort() | |
| 87 subjects.extend([subject for (isbnode, refs, subject) in recursable]) | |
| 88 | |
| 89 return subjects | |
| 90 | |
| 91 def preprocess(self): | |
| 92 for triple in self.store.triples((None, None, None)): | |
| 93 self.preprocessTriple(triple) | |
| 94 | |
| 95 def preprocessTriple(self, xxx_todo_changeme): | |
| 96 (s, p, o) = xxx_todo_changeme | |
| 97 self._references[o]+=1 | |
| 98 self._subjects[s] = True | |
| 99 | |
| 100 def reset(self): | |
| 101 self.depth = 0 | |
| 102 self.lists = {} | |
| 103 self.namespaces = {} | |
| 104 self._references = defaultdict(int) | |
| 105 self._serialized = {} | |
| 106 self._subjects = {} | |
| 107 self._topLevels = {} | |
| 108 | |
| 109 for prefix, ns in self.store.namespaces(): | |
| 110 self.addNamespace(prefix, ns) | |
| 111 | |
| 112 def buildPredicateHash(self, subject): | |
| 113 """ | |
| 114 Build a hash key by predicate to a list of objects for the given | |
| 115 subject | |
| 116 """ | |
| 117 properties = {} | |
| 118 for s, p, o in self.store.triples((subject, None, None)): | |
| 119 oList = properties.get(p, []) | |
| 120 oList.append(o) | |
| 121 properties[p] = oList | |
| 122 return properties | |
| 123 | |
| 124 def sortProperties(self, properties): | |
| 125 """Take a hash from predicate uris to lists of values. | |
| 126 Sort the lists of values. Return a sorted list of properties.""" | |
| 127 # Sort object lists | |
| 128 for prop, objects in list(properties.items()): | |
| 129 objects.sort(key=cmp_to_key(_object_comparator)) | |
| 130 | |
| 131 # Make sorted list of properties | |
| 132 propList = [] | |
| 133 seen = {} | |
| 134 for prop in self.predicateOrder: | |
| 135 if (prop in properties) and (prop not in seen): | |
| 136 propList.append(prop) | |
| 137 seen[prop] = True | |
| 138 props = list(properties.keys()) | |
| 139 props.sort() | |
| 140 for prop in props: | |
| 141 if prop not in seen: | |
| 142 propList.append(prop) | |
| 143 seen[prop] = True | |
| 144 return propList | |
| 145 | |
| 146 def subjectDone(self, subject): | |
| 147 """Mark a subject as done.""" | |
| 148 self._serialized[subject] = True | |
| 149 | |
| 150 def indent(self, modifier=0): | |
| 151 """Returns indent string multiplied by the depth""" | |
| 152 return (self.depth + modifier) * self.indentString | |
| 153 | |
| 154 def write(self, text): | |
| 155 """Write text in given encoding.""" | |
| 156 self.stream.write(text.encode(self.encoding, 'replace')) | |
| 157 | |
| 158 | |
| 159 SUBJECT = 0 | |
| 160 VERB = 1 | |
| 161 OBJECT = 2 | |
| 162 | |
| 163 _GEN_QNAME_FOR_DT = False | |
| 164 _SPACIOUS_OUTPUT = False | |
| 165 | |
| 166 | |
| 167 class TurtleSerializer(RecursiveSerializer): | |
| 168 | |
| 169 short_name = "turtle" | |
| 170 indentString = ' ' | |
| 171 | |
| 172 def __init__(self, store): | |
| 173 self._ns_rewrite = {} | |
| 174 super(TurtleSerializer, self).__init__(store) | |
| 175 self.keywords = { | |
| 176 RDF.type: 'a' | |
| 177 } | |
| 178 self.reset() | |
| 179 self.stream = None | |
| 180 self._spacious = _SPACIOUS_OUTPUT | |
| 181 | |
| 182 def addNamespace(self, prefix, namespace): | |
| 183 # Turtle does not support prefix that start with _ | |
| 184 # if they occur in the graph, rewrite to p_blah | |
| 185 # this is more complicated since we need to make sure p_blah | |
| 186 # does not already exist. And we register namespaces as we go, i.e. | |
| 187 # we may first see a triple with prefix _9 - rewrite it to p_9 | |
| 188 # and then later find a triple with a "real" p_9 prefix | |
| 189 | |
| 190 # so we need to keep track of ns rewrites we made so far. | |
| 191 | |
| 192 if (prefix > '' and prefix[0] == '_') \ | |
| 193 or self.namespaces.get(prefix, namespace) != namespace: | |
| 194 | |
| 195 if prefix not in self._ns_rewrite: | |
| 196 p = "p" + prefix | |
| 197 while p in self.namespaces: | |
| 198 p = "p" + p | |
| 199 self._ns_rewrite[prefix] = p | |
| 200 | |
| 201 prefix = self._ns_rewrite.get(prefix, prefix) | |
| 202 | |
| 203 super(TurtleSerializer, self).addNamespace(prefix, namespace) | |
| 204 return prefix | |
| 205 | |
| 206 def reset(self): | |
| 207 super(TurtleSerializer, self).reset() | |
| 208 self._shortNames = {} | |
| 209 self._started = False | |
| 210 self._ns_rewrite = {} | |
| 211 | |
| 212 def serialize(self, stream, base=None, encoding=None, | |
| 213 spacious=None, **args): | |
| 214 self.reset() | |
| 215 self.stream = stream | |
| 216 self.base = base | |
| 217 | |
| 218 if spacious is not None: | |
| 219 self._spacious = spacious | |
| 220 | |
| 221 self.preprocess() | |
| 222 subjects_list = self.orderSubjects() | |
| 223 | |
| 224 self.startDocument() | |
| 225 | |
| 226 firstTime = True | |
| 227 for subject in subjects_list: | |
| 228 if self.isDone(subject): | |
| 229 continue | |
| 230 if firstTime: | |
| 231 firstTime = False | |
| 232 if self.statement(subject) and not firstTime: | |
| 233 self.write('\n') | |
| 234 | |
| 235 self.endDocument() | |
| 236 stream.write("\n".encode('ascii')) | |
| 237 | |
| 238 def preprocessTriple(self, triple): | |
| 239 super(TurtleSerializer, self).preprocessTriple(triple) | |
| 240 for i, node in enumerate(triple): | |
| 241 if node in self.keywords: | |
| 242 continue | |
| 243 # Don't use generated prefixes for subjects and objects | |
| 244 self.getQName(node, gen_prefix=(i == VERB)) | |
| 245 if isinstance(node, Literal) and node.datatype: | |
| 246 self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT) | |
| 247 p = triple[1] | |
| 248 if isinstance(p, BNode): # hmm - when is P ever a bnode? | |
| 249 self._references[p]+=1 | |
| 250 | |
| 251 def getQName(self, uri, gen_prefix=True): | |
| 252 if not isinstance(uri, URIRef): | |
| 253 return None | |
| 254 | |
| 255 parts = None | |
| 256 | |
| 257 try: | |
| 258 parts = self.store.compute_qname(uri, generate=gen_prefix) | |
| 259 except: | |
| 260 | |
| 261 # is the uri a namespace in itself? | |
| 262 pfx = self.store.store.prefix(uri) | |
| 263 | |
| 264 if pfx is not None: | |
| 265 parts = (pfx, uri, '') | |
| 266 else: | |
| 267 # nothing worked | |
| 268 return None | |
| 269 | |
| 270 prefix, namespace, local = parts | |
| 271 | |
| 272 # QName cannot end with . | |
| 273 if local.endswith("."): return None | |
| 274 | |
| 275 prefix = self.addNamespace(prefix, namespace) | |
| 276 | |
| 277 return '%s:%s' % (prefix, local) | |
| 278 | |
| 279 def startDocument(self): | |
| 280 self._started = True | |
| 281 ns_list = sorted(self.namespaces.items()) | |
| 282 for prefix, uri in ns_list: | |
| 283 self.write(self.indent() + '@prefix %s: <%s> .\n' % (prefix, uri)) | |
| 284 if ns_list and self._spacious: | |
| 285 self.write('\n') | |
| 286 | |
| 287 def endDocument(self): | |
| 288 if self._spacious: | |
| 289 self.write('\n') | |
| 290 | |
| 291 def statement(self, subject): | |
| 292 self.subjectDone(subject) | |
| 293 return self.s_squared(subject) or self.s_default(subject) | |
| 294 | |
| 295 def s_default(self, subject): | |
| 296 self.write('\n' + self.indent()) | |
| 297 self.path(subject, SUBJECT) | |
| 298 self.predicateList(subject) | |
| 299 self.write(' .') | |
| 300 return True | |
| 301 | |
| 302 def s_squared(self, subject): | |
| 303 if (self._references[subject] > 0) or not isinstance(subject, BNode): | |
| 304 return False | |
| 305 self.write('\n' + self.indent() + '[]') | |
| 306 self.predicateList(subject) | |
| 307 self.write(' .') | |
| 308 return True | |
| 309 | |
| 310 def path(self, node, position, newline=False): | |
| 311 if not (self.p_squared(node, position, newline) | |
| 312 or self.p_default(node, position, newline)): | |
| 313 raise Error("Cannot serialize node '%s'" % (node, )) | |
| 314 | |
| 315 def p_default(self, node, position, newline=False): | |
| 316 if position != SUBJECT and not newline: | |
| 317 self.write(' ') | |
| 318 self.write(self.label(node, position)) | |
| 319 return True | |
| 320 | |
| 321 def label(self, node, position): | |
| 322 if node == RDF.nil: | |
| 323 return '()' | |
| 324 if position is VERB and node in self.keywords: | |
| 325 return self.keywords[node] | |
| 326 if isinstance(node, Literal): | |
| 327 return node._literal_n3( | |
| 328 use_plain=True, | |
| 329 qname_callback=lambda dt: self.getQName( | |
| 330 dt, _GEN_QNAME_FOR_DT)) | |
| 331 else: | |
| 332 node = self.relativize(node) | |
| 333 | |
| 334 return self.getQName(node, position == VERB) or node.n3() | |
| 335 | |
| 336 def p_squared(self, node, position, newline=False): | |
| 337 if (not isinstance(node, BNode) | |
| 338 or node in self._serialized | |
| 339 or self._references[node] > 1 | |
| 340 or position == SUBJECT): | |
| 341 return False | |
| 342 | |
| 343 if not newline: | |
| 344 self.write(' ') | |
| 345 | |
| 346 if self.isValidList(node): | |
| 347 # this is a list | |
| 348 self.write('(') | |
| 349 self.depth += 1 # 2 | |
| 350 self.doList(node) | |
| 351 self.depth -= 1 # 2 | |
| 352 self.write(' )') | |
| 353 else: | |
| 354 self.subjectDone(node) | |
| 355 self.depth += 2 | |
| 356 # self.write('[\n' + self.indent()) | |
| 357 self.write('[') | |
| 358 self.depth -= 1 | |
| 359 # self.predicateList(node, newline=True) | |
| 360 self.predicateList(node, newline=False) | |
| 361 # self.write('\n' + self.indent() + ']') | |
| 362 self.write(' ]') | |
| 363 self.depth -= 1 | |
| 364 | |
| 365 return True | |
| 366 | |
| 367 def isValidList(self, l): | |
| 368 """ | |
| 369 Checks if l is a valid RDF list, i.e. no nodes have other properties. | |
| 370 """ | |
| 371 try: | |
| 372 if self.store.value(l, RDF.first) is None: | |
| 373 return False | |
| 374 except: | |
| 375 return False | |
| 376 while l: | |
| 377 if l != RDF.nil and len( | |
| 378 list(self.store.predicate_objects(l))) != 2: | |
| 379 return False | |
| 380 l = self.store.value(l, RDF.rest) | |
| 381 return True | |
| 382 | |
| 383 def doList(self, l): | |
| 384 while l: | |
| 385 item = self.store.value(l, RDF.first) | |
| 386 if item is not None: | |
| 387 self.path(item, OBJECT) | |
| 388 self.subjectDone(l) | |
| 389 l = self.store.value(l, RDF.rest) | |
| 390 | |
| 391 def predicateList(self, subject, newline=False): | |
| 392 properties = self.buildPredicateHash(subject) | |
| 393 propList = self.sortProperties(properties) | |
| 394 if len(propList) == 0: | |
| 395 return | |
| 396 self.verb(propList[0], newline=newline) | |
| 397 self.objectList(properties[propList[0]]) | |
| 398 for predicate in propList[1:]: | |
| 399 self.write(' ;\n' + self.indent(1)) | |
| 400 self.verb(predicate, newline=True) | |
| 401 self.objectList(properties[predicate]) | |
| 402 | |
| 403 def verb(self, node, newline=False): | |
| 404 self.path(node, VERB, newline) | |
| 405 | |
| 406 def objectList(self, objects): | |
| 407 count = len(objects) | |
| 408 if count == 0: | |
| 409 return | |
| 410 depthmod = (count == 1) and 0 or 1 | |
| 411 self.depth += depthmod | |
| 412 self.path(objects[0], OBJECT) | |
| 413 for obj in objects[1:]: | |
| 414 self.write(',\n' + self.indent(1)) | |
| 415 self.path(obj, OBJECT, newline=True) | |
| 416 self.depth -= depthmod |
