Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/rdflib/plugins/serializers/rdfxml.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 from rdflib.plugins.serializers.xmlwriter import XMLWriter | |
| 2 | |
| 3 from rdflib.namespace import Namespace, RDF, RDFS # , split_uri | |
| 4 | |
| 5 from rdflib.term import URIRef, Literal, BNode | |
| 6 from rdflib.util import first, more_than | |
| 7 from rdflib.collection import Collection | |
| 8 from rdflib.serializer import Serializer | |
| 9 | |
| 10 # from rdflib.exceptions import Error | |
| 11 | |
| 12 from rdflib.py3compat import b | |
| 13 | |
| 14 from xml.sax.saxutils import quoteattr, escape | |
| 15 import xml.dom.minidom | |
| 16 | |
| 17 from .xmlwriter import ESCAPE_ENTITIES | |
| 18 | |
| 19 __all__ = ['fix', 'XMLSerializer', 'PrettyXMLSerializer'] | |
| 20 | |
| 21 | |
| 22 class XMLSerializer(Serializer): | |
| 23 | |
| 24 def __init__(self, store): | |
| 25 super(XMLSerializer, self).__init__(store) | |
| 26 | |
| 27 def __bindings(self): | |
| 28 store = self.store | |
| 29 nm = store.namespace_manager | |
| 30 bindings = {} | |
| 31 | |
| 32 for predicate in set(store.predicates()): | |
| 33 prefix, namespace, name = nm.compute_qname(predicate) | |
| 34 bindings[prefix] = URIRef(namespace) | |
| 35 | |
| 36 RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#") | |
| 37 | |
| 38 if "rdf" in bindings: | |
| 39 assert bindings["rdf"] == RDFNS | |
| 40 else: | |
| 41 bindings["rdf"] = RDFNS | |
| 42 | |
| 43 for prefix, namespace in bindings.items(): | |
| 44 yield prefix, namespace | |
| 45 | |
| 46 def serialize(self, stream, base=None, encoding=None, **args): | |
| 47 self.base = base | |
| 48 self.__stream = stream | |
| 49 self.__serialized = {} | |
| 50 encoding = self.encoding | |
| 51 self.write = write = lambda uni: stream.write( | |
| 52 uni.encode(encoding, 'replace')) | |
| 53 | |
| 54 # startDocument | |
| 55 write('<?xml version="1.0" encoding="%s"?>\n' % self.encoding) | |
| 56 | |
| 57 # startRDF | |
| 58 write('<rdf:RDF\n') | |
| 59 | |
| 60 # If provided, write xml:base attribute for the RDF | |
| 61 if "xml_base" in args: | |
| 62 write(' xml:base="%s"\n' % args['xml_base']) | |
| 63 # TODO: | |
| 64 # assert( | |
| 65 # namespaces["http://www.w3.org/1999/02/22-rdf-syntax-ns#"]=='rdf') | |
| 66 bindings = list(self.__bindings()) | |
| 67 bindings.sort() | |
| 68 | |
| 69 for prefix, namespace in bindings: | |
| 70 if prefix: | |
| 71 write(' xmlns:%s="%s"\n' % (prefix, namespace)) | |
| 72 else: | |
| 73 write(' xmlns="%s"\n' % namespace) | |
| 74 write('>\n') | |
| 75 | |
| 76 # write out triples by subject | |
| 77 for subject in self.store.subjects(): | |
| 78 self.subject(subject, 1) | |
| 79 | |
| 80 # endRDF | |
| 81 write("</rdf:RDF>\n") | |
| 82 | |
| 83 # Set to None so that the memory can get garbage collected. | |
| 84 # self.__serialized = None | |
| 85 del self.__serialized | |
| 86 | |
| 87 def subject(self, subject, depth=1): | |
| 88 if not subject in self.__serialized: | |
| 89 self.__serialized[subject] = 1 | |
| 90 | |
| 91 if isinstance(subject, (BNode, URIRef)): | |
| 92 write = self.write | |
| 93 indent = " " * depth | |
| 94 element_name = "rdf:Description" | |
| 95 | |
| 96 if isinstance(subject, BNode): | |
| 97 write('%s<%s rdf:nodeID="%s"' % ( | |
| 98 indent, element_name, subject)) | |
| 99 else: | |
| 100 uri = quoteattr(self.relativize(subject)) | |
| 101 write("%s<%s rdf:about=%s" % (indent, element_name, uri)) | |
| 102 | |
| 103 if (subject, None, None) in self.store: | |
| 104 write(">\n") | |
| 105 | |
| 106 for predicate, object in self.store.predicate_objects( | |
| 107 subject): | |
| 108 self.predicate(predicate, object, depth + 1) | |
| 109 write("%s</%s>\n" % (indent, element_name)) | |
| 110 | |
| 111 else: | |
| 112 write("/>\n") | |
| 113 | |
| 114 def predicate(self, predicate, object, depth=1): | |
| 115 write = self.write | |
| 116 indent = " " * depth | |
| 117 qname = self.store.namespace_manager.qname(predicate) | |
| 118 | |
| 119 if isinstance(object, Literal): | |
| 120 attributes = "" | |
| 121 | |
| 122 if object.language: | |
| 123 attributes += ' xml:lang="%s"' % object.language | |
| 124 | |
| 125 if object.datatype: | |
| 126 attributes += ' rdf:datatype="%s"' % object.datatype | |
| 127 | |
| 128 write("%s<%s%s>%s</%s>\n" % | |
| 129 (indent, qname, attributes, | |
| 130 escape(object, ESCAPE_ENTITIES), qname)) | |
| 131 else: | |
| 132 | |
| 133 if isinstance(object, BNode): | |
| 134 write('%s<%s rdf:nodeID="%s"/>\n' % | |
| 135 (indent, qname, object)) | |
| 136 else: | |
| 137 write("%s<%s rdf:resource=%s/>\n" % | |
| 138 (indent, qname, quoteattr(self.relativize(object)))) | |
| 139 | |
| 140 XMLLANG = "http://www.w3.org/XML/1998/namespacelang" | |
| 141 XMLBASE = "http://www.w3.org/XML/1998/namespacebase" | |
| 142 OWL_NS = Namespace('http://www.w3.org/2002/07/owl#') | |
| 143 | |
| 144 | |
| 145 # TODO: | |
| 146 def fix(val): | |
| 147 "strip off _: from nodeIDs... as they are not valid NCNames" | |
| 148 if val.startswith("_:"): | |
| 149 return val[2:] | |
| 150 else: | |
| 151 return val | |
| 152 | |
| 153 | |
| 154 class PrettyXMLSerializer(Serializer): | |
| 155 | |
| 156 def __init__(self, store, max_depth=3): | |
| 157 super(PrettyXMLSerializer, self).__init__(store) | |
| 158 self.forceRDFAbout = set() | |
| 159 | |
| 160 def serialize(self, stream, base=None, encoding=None, **args): | |
| 161 self.__serialized = {} | |
| 162 store = self.store | |
| 163 self.base = base | |
| 164 self.max_depth = args.get("max_depth", 3) | |
| 165 assert self.max_depth > 0, "max_depth must be greater than 0" | |
| 166 | |
| 167 self.nm = nm = store.namespace_manager | |
| 168 self.writer = writer = XMLWriter(stream, nm, encoding) | |
| 169 namespaces = {} | |
| 170 | |
| 171 possible = set(store.predicates()).union( | |
| 172 store.objects(None, RDF.type)) | |
| 173 | |
| 174 for predicate in possible: | |
| 175 prefix, namespace, local = nm.compute_qname(predicate) | |
| 176 namespaces[prefix] = namespace | |
| 177 | |
| 178 namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" | |
| 179 | |
| 180 writer.push(RDF.RDF) | |
| 181 | |
| 182 if "xml_base" in args: | |
| 183 writer.attribute(XMLBASE, args["xml_base"]) | |
| 184 | |
| 185 writer.namespaces(iter(namespaces.items())) | |
| 186 | |
| 187 # Write out subjects that can not be inline | |
| 188 for subject in store.subjects(): | |
| 189 if (None, None, subject) in store: | |
| 190 if (subject, None, subject) in store: | |
| 191 self.subject(subject, 1) | |
| 192 else: | |
| 193 self.subject(subject, 1) | |
| 194 | |
| 195 # write out anything that has not yet been reached | |
| 196 # write out BNodes last (to ensure they can be inlined where possible) | |
| 197 bnodes = set() | |
| 198 | |
| 199 for subject in store.subjects(): | |
| 200 if isinstance(subject, BNode): | |
| 201 bnodes.add(subject) | |
| 202 continue | |
| 203 self.subject(subject, 1) | |
| 204 | |
| 205 # now serialize only those BNodes that have not been serialized yet | |
| 206 for bnode in bnodes: | |
| 207 if bnode not in self.__serialized: | |
| 208 self.subject(subject, 1) | |
| 209 | |
| 210 writer.pop(RDF.RDF) | |
| 211 stream.write(b("\n")) | |
| 212 | |
| 213 # Set to None so that the memory can get garbage collected. | |
| 214 self.__serialized = None | |
| 215 | |
| 216 def subject(self, subject, depth=1): | |
| 217 store = self.store | |
| 218 writer = self.writer | |
| 219 | |
| 220 if subject in self.forceRDFAbout: | |
| 221 writer.push(RDF.Description) | |
| 222 writer.attribute(RDF.about, self.relativize(subject)) | |
| 223 writer.pop(RDF.Description) | |
| 224 self.forceRDFAbout.remove(subject) | |
| 225 | |
| 226 elif not subject in self.__serialized: | |
| 227 self.__serialized[subject] = 1 | |
| 228 type = first(store.objects(subject, RDF.type)) | |
| 229 | |
| 230 try: | |
| 231 self.nm.qname(type) | |
| 232 except: | |
| 233 type = None | |
| 234 | |
| 235 element = type or RDF.Description | |
| 236 writer.push(element) | |
| 237 | |
| 238 if isinstance(subject, BNode): | |
| 239 def subj_as_obj_more_than(ceil): | |
| 240 return True | |
| 241 # more_than(store.triples((None, None, subject)), ceil) | |
| 242 | |
| 243 # here we only include BNode labels if they are referenced | |
| 244 # more than once (this reduces the use of redundant BNode | |
| 245 # identifiers) | |
| 246 if subj_as_obj_more_than(1): | |
| 247 writer.attribute(RDF.nodeID, fix(subject)) | |
| 248 | |
| 249 else: | |
| 250 writer.attribute(RDF.about, self.relativize(subject)) | |
| 251 | |
| 252 if (subject, None, None) in store: | |
| 253 for predicate, object in store.predicate_objects(subject): | |
| 254 if not (predicate == RDF.type and object == type): | |
| 255 self.predicate(predicate, object, depth + 1) | |
| 256 | |
| 257 writer.pop(element) | |
| 258 | |
| 259 elif subject in self.forceRDFAbout: | |
| 260 writer.push(RDF.Description) | |
| 261 writer.attribute(RDF.about, self.relativize(subject)) | |
| 262 writer.pop(RDF.Description) | |
| 263 self.forceRDFAbout.remove(subject) | |
| 264 | |
| 265 def predicate(self, predicate, object, depth=1): | |
| 266 writer = self.writer | |
| 267 store = self.store | |
| 268 writer.push(predicate) | |
| 269 | |
| 270 if isinstance(object, Literal): | |
| 271 if object.language: | |
| 272 writer.attribute(XMLLANG, object.language) | |
| 273 | |
| 274 if (object.datatype == RDF.XMLLiteral and | |
| 275 isinstance(object.value, xml.dom.minidom.Document)): | |
| 276 writer.attribute(RDF.parseType, "Literal") | |
| 277 writer.text("") | |
| 278 writer.stream.write(object) | |
| 279 else: | |
| 280 if object.datatype: | |
| 281 writer.attribute(RDF.datatype, object.datatype) | |
| 282 writer.text(object) | |
| 283 | |
| 284 elif object in self.__serialized or not (object, None, None) in store: | |
| 285 | |
| 286 if isinstance(object, BNode): | |
| 287 if more_than(store.triples((None, None, object)), 0): | |
| 288 writer.attribute(RDF.nodeID, fix(object)) | |
| 289 else: | |
| 290 writer.attribute(RDF.resource, self.relativize(object)) | |
| 291 | |
| 292 else: | |
| 293 if first(store.objects(object, RDF.first)): # may not have type | |
| 294 # RDF.List | |
| 295 | |
| 296 self.__serialized[object] = 1 | |
| 297 | |
| 298 # Warn that any assertions on object other than | |
| 299 # RDF.first and RDF.rest are ignored... including RDF.List | |
| 300 import warnings | |
| 301 warnings.warn( | |
| 302 "Assertions on %s other than RDF.first " % repr(object) + | |
| 303 "and RDF.rest are ignored ... including RDF.List", | |
| 304 UserWarning, stacklevel=2) | |
| 305 writer.attribute(RDF.parseType, "Collection") | |
| 306 | |
| 307 col = Collection(store, object) | |
| 308 | |
| 309 for item in col: | |
| 310 | |
| 311 if isinstance(item, URIRef): | |
| 312 self.forceRDFAbout.add(item) | |
| 313 self.subject(item) | |
| 314 | |
| 315 if not isinstance(item, URIRef): | |
| 316 self.__serialized[item] = 1 | |
| 317 else: | |
| 318 if first(store.triples_choices( | |
| 319 (object, RDF.type, [OWL_NS.Class, RDFS.Class]))) \ | |
| 320 and isinstance(object, URIRef): | |
| 321 writer.attribute(RDF.resource, self.relativize(object)) | |
| 322 | |
| 323 elif depth <= self.max_depth: | |
| 324 self.subject(object, depth + 1) | |
| 325 | |
| 326 elif isinstance(object, BNode): | |
| 327 | |
| 328 if not object in self.__serialized \ | |
| 329 and (object, None, None) in store \ | |
| 330 and len(list(store.subjects(object=object))) == 1: | |
| 331 # inline blank nodes if they haven't been serialized yet | |
| 332 # and are only referenced once (regardless of depth) | |
| 333 self.subject(object, depth + 1) | |
| 334 else: | |
| 335 writer.attribute(RDF.nodeID, fix(object)) | |
| 336 | |
| 337 else: | |
| 338 writer.attribute(RDF.resource, self.relativize(object)) | |
| 339 | |
| 340 writer.pop(predicate) |
