diff env/lib/python3.7/site-packages/rdflib/plugins/parsers/rdfxml.py @ 5:9b1c78e6ba9c draft default tip

"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author shellac
date Mon, 01 Jun 2020 08:59:25 -0400
parents 79f47841a781
children
line wrap: on
line diff
--- a/env/lib/python3.7/site-packages/rdflib/plugins/parsers/rdfxml.py	Thu May 14 16:47:39 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,578 +0,0 @@
-"""
-An RDF/XML parser for RDFLib
-"""
-
-from xml.sax import make_parser
-from xml.sax.handler import ErrorHandler
-from xml.sax.saxutils import handler, quoteattr, escape
-from urllib.parse import urljoin, urldefrag
-
-from rdflib.namespace import RDF, is_ncname
-from rdflib.term import URIRef
-from rdflib.term import BNode
-from rdflib.term import Literal
-from rdflib.exceptions import ParserError, Error
-from rdflib.parser import Parser
-
-__all__ = ['create_parser', 'BagID', 'ElementHandler',
-           'RDFXMLHandler', 'RDFXMLParser']
-
-RDFNS = RDF
-
-# http://www.w3.org/TR/rdf-syntax-grammar/#eventterm-attribute-URI
-# A mapping from unqualified terms to their qualified version.
-UNQUALIFIED = {"about": RDF.about,
-               "ID": RDF.ID,
-               "type": RDF.type,
-               "resource": RDF.resource,
-               "parseType": RDF.parseType}
-
-# http://www.w3.org/TR/rdf-syntax-grammar/#coreSyntaxTerms
-CORE_SYNTAX_TERMS = [RDF.RDF, RDF.ID, RDF.about, RDF.parseType,
-                     RDF.resource, RDF.nodeID, RDF.datatype]
-
-# http://www.w3.org/TR/rdf-syntax-grammar/#syntaxTerms
-SYNTAX_TERMS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li]
-
-# http://www.w3.org/TR/rdf-syntax-grammar/#oldTerms
-OLD_TERMS = [
-    URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"),
-    URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"),
-    URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID")]
-
-NODE_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.li, ] + OLD_TERMS
-NODE_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.nodeID, RDF.about]
-
-PROPERTY_ELEMENT_EXCEPTIONS = \
-    CORE_SYNTAX_TERMS + [RDF.Description, ] + OLD_TERMS
-PROPERTY_ATTRIBUTE_EXCEPTIONS = \
-    CORE_SYNTAX_TERMS + [RDF.Description, RDF.li] + OLD_TERMS
-PROPERTY_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.resource, RDF.nodeID]
-
-XMLNS = "http://www.w3.org/XML/1998/namespace"
-BASE = (XMLNS, "base")
-LANG = (XMLNS, "lang")
-
-
-class BagID(URIRef):
-    __slots__ = ['li']
-
-    def __init__(self, val):
-        super(URIRef, self).__init__(val)
-        self.li = 0
-
-    def next_li(self):
-        self.li += 1
-        return RDFNS[self.li]
-
-
-class ElementHandler(object):
-    __slots__ = ['start', 'char', 'end', 'li', 'id',
-                 'base', 'subject', 'predicate', 'object',
-                 'list', 'language', 'datatype', 'declared', 'data']
-
-    def __init__(self):
-        self.start = None
-        self.char = None
-        self.end = None
-        self.li = 0
-        self.id = None
-        self.base = None
-        self.subject = None
-        self.object = None
-        self.list = None
-        self.language = None
-        self.datatype = None
-        self.declared = None
-        self.data = None
-
-    def next_li(self):
-        self.li += 1
-        return RDFNS[self.li]
-
-
-class RDFXMLHandler(handler.ContentHandler):
-
-    def __init__(self, store):
-        self.store = store
-        self.preserve_bnode_ids = False
-        self.reset()
-
-    def reset(self):
-        document_element = ElementHandler()
-        document_element.start = self.document_element_start
-        document_element.end = lambda name, qname: None
-        self.stack = [None, document_element, ]
-        self.ids = {}  # remember IDs we have already seen
-        self.bnode = {}
-        self._ns_contexts = [{}]  # contains uri -> prefix dicts
-        self._current_context = self._ns_contexts[-1]
-
-    # ContentHandler methods
-
-    def setDocumentLocator(self, locator):
-        self.locator = locator
-
-    def startDocument(self):
-        pass
-
-    def startPrefixMapping(self, prefix, namespace):
-        self._ns_contexts.append(self._current_context.copy())
-        self._current_context[namespace] = prefix
-        self.store.bind(prefix, namespace or "", override=False)
-
-    def endPrefixMapping(self, prefix):
-        self._current_context = self._ns_contexts[-1]
-        del self._ns_contexts[-1]
-
-    def startElementNS(self, name, qname, attrs):
-        stack = self.stack
-        stack.append(ElementHandler())
-        current = self.current
-        parent = self.parent
-        base = attrs.get(BASE, None)
-        if base is not None:
-            base, frag = urldefrag(base)
-            if parent and parent.base:
-                base = urljoin(parent.base, base)
-            else:
-                systemId = self.locator.getPublicId() \
-                    or self.locator.getSystemId()
-                if systemId:
-                    base = urljoin(systemId, base)
-        else:
-            if parent:
-                base = parent.base
-            if base is None:
-                systemId = self.locator.getPublicId() \
-                    or self.locator.getSystemId()
-                if systemId:
-                    base, frag = urldefrag(systemId)
-        current.base = base
-        language = attrs.get(LANG, None)
-        if language is None:
-            if parent:
-                language = parent.language
-        current.language = language
-        current.start(name, qname, attrs)
-
-    def endElementNS(self, name, qname):
-        self.current.end(name, qname)
-        self.stack.pop()
-
-    def characters(self, content):
-        char = self.current.char
-        if char:
-            char(content)
-
-    def ignorableWhitespace(self, content):
-        pass
-
-    def processingInstruction(self, target, data):
-        pass
-
-    def add_reified(self, sid, xxx_todo_changeme):
-        (s, p, o) = xxx_todo_changeme
-        self.store.add((sid, RDF.type, RDF.Statement))
-        self.store.add((sid, RDF.subject, s))
-        self.store.add((sid, RDF.predicate, p))
-        self.store.add((sid, RDF.object, o))
-
-    def error(self, message):
-        locator = self.locator
-        info = "%s:%s:%s: " % (locator.getSystemId(),
-                               locator.getLineNumber(),
-                               locator.getColumnNumber())
-        raise ParserError(info + message)
-
-    def get_current(self):
-        return self.stack[-2]
-    # Create a read only property called current so that self.current
-    # give the current element handler.
-    current = property(get_current)
-
-    def get_next(self):
-        return self.stack[-1]
-    # Create a read only property that gives the element handler to be
-    # used for the next element.
-    next = property(get_next)
-
-    def get_parent(self):
-        return self.stack[-3]
-    # Create a read only property that gives the current parent
-    # element handler
-    parent = property(get_parent)
-
-    def absolutize(self, uri):
-        result = urljoin(self.current.base, uri, allow_fragments=1)
-        if uri and uri[-1] == "#" and result[-1] != "#":
-            result = "%s#" % result
-        return URIRef(result)
-
-    def convert(self, name, qname, attrs):
-        if name[0] is None:
-            name = URIRef(name[1])
-        else:
-            name = URIRef("".join(name))
-        atts = {}
-        for (n, v) in list(attrs.items()):  # attrs._attrs.iteritems(): #
-            if n[0] is None:
-                att = n[1]
-            else:
-                att = "".join(n)
-            if att.startswith(XMLNS) or att[0:3].lower() == "xml":
-                pass
-            elif att in UNQUALIFIED:
-                # if not RDFNS[att] in atts:
-                atts[RDFNS[att]] = v
-            else:
-                atts[URIRef(att)] = v
-        return name, atts
-
-    def document_element_start(self, name, qname, attrs):
-        if name[0] and URIRef("".join(name)) == RDF.RDF:
-            # Cheap hack so 2to3 doesn't turn it into __next__
-            next = getattr(self, 'next')
-            next.start = self.node_element_start
-            next.end = self.node_element_end
-        else:
-            self.node_element_start(name, qname, attrs)
-            # self.current.end = self.node_element_end
-            # TODO... set end to something that sets start such that
-            # another element will cause error
-
-    def node_element_start(self, name, qname, attrs):
-        name, atts = self.convert(name, qname, attrs)
-        current = self.current
-        absolutize = self.absolutize
-
-        # Cheap hack so 2to3 doesn't turn it into __next__
-        next = getattr(self, 'next')
-        next.start = self.property_element_start
-        next.end = self.property_element_end
-
-        if name in NODE_ELEMENT_EXCEPTIONS:
-            self.error("Invalid node element URI: %s" % name)
-
-        if RDF.ID in atts:
-            if RDF.about in atts or RDF.nodeID in atts:
-                self.error(
-                    "Can have at most one of rdf:ID, rdf:about, and rdf:nodeID"
-                )
-
-            id = atts[RDF.ID]
-            if not is_ncname(id):
-                self.error("rdf:ID value is not a valid NCName: %s" % id)
-            subject = absolutize("#%s" % id)
-            if subject in self.ids:
-                self.error(
-                    "two elements cannot use the same ID: '%s'" % subject)
-            self.ids[subject] = 1  # IDs can only appear once within a document
-        elif RDF.nodeID in atts:
-            if RDF.ID in atts or RDF.about in atts:
-                self.error(
-                    "Can have at most one of rdf:ID, rdf:about, and rdf:nodeID"
-                )
-            nodeID = atts[RDF.nodeID]
-            if not is_ncname(nodeID):
-                self.error(
-                    "rdf:nodeID value is not a valid NCName: %s" % nodeID)
-            if self.preserve_bnode_ids is False:
-                if nodeID in self.bnode:
-                    subject = self.bnode[nodeID]
-                else:
-                    subject = BNode()
-                    self.bnode[nodeID] = subject
-            else:
-                subject = BNode(nodeID)
-        elif RDF.about in atts:
-            if RDF.ID in atts or RDF.nodeID in atts:
-                self.error(
-                    "Can have at most one of rdf:ID, rdf:about, and rdf:nodeID"
-                )
-            subject = absolutize(atts[RDF.about])
-        else:
-            subject = BNode()
-
-        if name != RDF.Description:  # S1
-            self.store.add((subject, RDF.type, absolutize(name)))
-
-        language = current.language
-        for att in atts:
-            if not att.startswith(str(RDFNS)):
-                predicate = absolutize(att)
-                try:
-                    object = Literal(atts[att], language)
-                except Error as e:
-                    self.error(e.msg)
-            elif att == RDF.type:  # S2
-                predicate = RDF.type
-                object = absolutize(atts[RDF.type])
-            elif att in NODE_ELEMENT_ATTRIBUTES:
-                continue
-            elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS:  # S3
-                self.error("Invalid property attribute URI: %s" % att)
-                continue  # for when error does not throw an exception
-            else:
-                predicate = absolutize(att)
-                try:
-                    object = Literal(atts[att], language)
-                except Error as e:
-                    self.error(e.msg)
-            self.store.add((subject, predicate, object))
-
-        current.subject = subject
-
-    def node_element_end(self, name, qname):
-        # repeat node-elements are only allowed
-        # at at top-level
-
-        if self.parent.object and self.current != self.stack[2]:
-
-            self.error("Repeat node-elements inside property elements: %s"%"".join(name))
-
-        self.parent.object = self.current.subject
-
-    def property_element_start(self, name, qname, attrs):
-        name, atts = self.convert(name, qname, attrs)
-        current = self.current
-        absolutize = self.absolutize
-
-        # Cheap hack so 2to3 doesn't turn it into __next__
-        next = getattr(self, 'next')
-        object = None
-        current.data = None
-        current.list = None
-
-        if not name.startswith(str(RDFNS)):
-            current.predicate = absolutize(name)
-        elif name == RDF.li:
-            current.predicate = current.next_li()
-        elif name in PROPERTY_ELEMENT_EXCEPTIONS:
-            self.error("Invalid property element URI: %s" % name)
-        else:
-            current.predicate = absolutize(name)
-
-        id = atts.get(RDF.ID, None)
-        if id is not None:
-            if not is_ncname(id):
-                self.error("rdf:ID value is not a value NCName: %s" % id)
-            current.id = absolutize("#%s" % id)
-        else:
-            current.id = None
-
-        resource = atts.get(RDF.resource, None)
-        nodeID = atts.get(RDF.nodeID, None)
-        parse_type = atts.get(RDF.parseType, None)
-        if resource is not None and nodeID is not None:
-            self.error(
-                "Property element cannot have both rdf:nodeID and rdf:resource"
-            )
-        if resource is not None:
-            object = absolutize(resource)
-            next.start = self.node_element_start
-            next.end = self.node_element_end
-        elif nodeID is not None:
-            if not is_ncname(nodeID):
-                self.error(
-                    "rdf:nodeID value is not a valid NCName: %s" % nodeID)
-            if self.preserve_bnode_ids is False:
-                if nodeID in self.bnode:
-                    object = self.bnode[nodeID]
-                else:
-                    subject = BNode()
-                    self.bnode[nodeID] = subject
-                    object = subject
-            else:
-                object = subject = BNode(nodeID)
-            next.start = self.node_element_start
-            next.end = self.node_element_end
-        else:
-            if parse_type is not None:
-                for att in atts:
-                    if att != RDF.parseType and att != RDF.ID:
-                        self.error("Property attr '%s' now allowed here" % att)
-                if parse_type == "Resource":
-                    current.subject = object = BNode()
-                    current.char = self.property_element_char
-                    next.start = self.property_element_start
-                    next.end = self.property_element_end
-                elif parse_type == "Collection":
-                    current.char = None
-                    object = current.list = RDF.nil  # BNode()
-                                                     # self.parent.subject
-                    next.start = self.node_element_start
-                    next.end = self.list_node_element_end
-                else:  # if parse_type=="Literal":
-                     # All other values are treated as Literal
-                     # See: http://www.w3.org/TR/rdf-syntax-grammar/
-                                # parseTypeOtherPropertyElt
-                    object = Literal("", datatype=RDF.XMLLiteral)
-                    current.char = self.literal_element_char
-                    current.declared = {XMLNS: 'xml'}
-                    next.start = self.literal_element_start
-                    next.char = self.literal_element_char
-                    next.end = self.literal_element_end
-                current.object = object
-                return
-            else:
-                object = None
-                current.char = self.property_element_char
-                next.start = self.node_element_start
-                next.end = self.node_element_end
-
-        datatype = current.datatype = atts.get(RDF.datatype, None)
-        language = current.language
-        if datatype is not None:
-            # TODO: check that there are no atts other than datatype and id
-            datatype = absolutize(datatype)
-        else:
-            for att in atts:
-                if not att.startswith(str(RDFNS)):
-                    predicate = absolutize(att)
-                elif att in PROPERTY_ELEMENT_ATTRIBUTES:
-                    continue
-                elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS:
-                    self.error("""Invalid property attribute URI: %s""" % att)
-                else:
-                    predicate = absolutize(att)
-
-                if att == RDF.type:
-                    o = URIRef(atts[att])
-                else:
-                    if datatype is not None:
-                        language = None
-                    o = Literal(atts[att], language, datatype)
-
-                if object is None:
-                    object = BNode()
-                self.store.add((object, predicate, o))
-        if object is None:
-            current.data = ""
-            current.object = None
-        else:
-            current.data = None
-            current.object = object
-
-    def property_element_char(self, data):
-        current = self.current
-        if current.data is not None:
-            current.data += data
-
-    def property_element_end(self, name, qname):
-        current = self.current
-        if current.data is not None and current.object is None:
-            literalLang = current.language
-            if current.datatype is not None:
-                literalLang = None
-            current.object = Literal(
-                current.data, literalLang, current.datatype)
-            current.data = None
-        if self.next.end == self.list_node_element_end:
-            if current.object != RDF.nil:
-                self.store.add((current.list, RDF.rest, RDF.nil))
-        if current.object is not None:
-            self.store.add(
-                (self.parent.subject, current.predicate, current.object))
-            if current.id is not None:
-                self.add_reified(current.id, (self.parent.subject,
-                                 current.predicate, current.object))
-        current.subject = None
-
-    def list_node_element_end(self, name, qname):
-        current = self.current
-        if self.parent.list == RDF.nil:
-            list = BNode()
-            # Removed between 20030123 and 20030905
-            # self.store.add((list, RDF.type, LIST))
-            self.parent.list = list
-            self.store.add((self.parent.list, RDF.first, current.subject))
-            self.parent.object = list
-            self.parent.char = None
-        else:
-            list = BNode()
-            # Removed between 20030123 and 20030905
-            # self.store.add((list, RDF.type, LIST))
-            self.store.add((self.parent.list, RDF.rest, list))
-            self.store.add((list, RDF.first, current.subject))
-            self.parent.list = list
-
-    def literal_element_start(self, name, qname, attrs):
-        current = self.current
-        self.next.start = self.literal_element_start
-        self.next.char = self.literal_element_char
-        self.next.end = self.literal_element_end
-        current.declared = self.parent.declared.copy()
-        if name[0]:
-            prefix = self._current_context[name[0]]
-            if prefix:
-                current.object = "<%s:%s" % (prefix, name[1])
-            else:
-                current.object = "<%s" % name[1]
-            if not name[0] in current.declared:
-                current.declared[name[0]] = prefix
-                if prefix:
-                    current.object += (' xmlns:%s="%s"' % (prefix, name[0]))
-                else:
-                    current.object += (' xmlns="%s"' % name[0])
-        else:
-            current.object = "<%s" % name[1]
-
-        for (name, value) in list(attrs.items()):
-            if name[0]:
-                if not name[0] in current.declared:
-                    current.declared[name[0]] = self._current_context[name[0]]
-                name = current.declared[name[0]] + ":" + name[1]
-            else:
-                name = name[1]
-            current.object += (' %s=%s' % (name, quoteattr(value)))
-        current.object += ">"
-
-    def literal_element_char(self, data):
-        self.current.object += escape(data)
-
-    def literal_element_end(self, name, qname):
-        if name[0]:
-            prefix = self._current_context[name[0]]
-            if prefix:
-                end = "</%s:%s>" % (prefix, name[1])
-            else:
-                end = "</%s>" % name[1]
-        else:
-            end = "</%s>" % name[1]
-        self.parent.object += self.current.object + end
-
-
-def create_parser(target, store):
-    parser = make_parser()
-    try:
-        # Workaround for bug in expatreader.py. Needed when
-        # expatreader is trying to guess a prefix.
-        parser.start_namespace_decl(
-            "xml", "http://www.w3.org/XML/1998/namespace")
-    except AttributeError:
-        pass  # Not present in Jython (at least)
-    parser.setFeature(handler.feature_namespaces, 1)
-    rdfxml = RDFXMLHandler(store)
-    rdfxml.setDocumentLocator(target)
-    # rdfxml.setDocumentLocator(_Locator(self.url, self.parser))
-    parser.setContentHandler(rdfxml)
-    parser.setErrorHandler(ErrorHandler())
-    return parser
-
-
-class RDFXMLParser(Parser):
-
-    def __init__(self):
-        pass
-
-    def parse(self, source, sink, **args):
-        self._parser = create_parser(source, sink)
-        content_handler = self._parser.getContentHandler()
-        preserve_bnode_ids = args.get("preserve_bnode_ids", None)
-        if preserve_bnode_ids is not None:
-            content_handler.preserve_bnode_ids = preserve_bnode_ids
-        # # We're only using it once now
-        # content_handler.reset()
-        # self._parser.reset()
-        self._parser.parse(source)