Mercurial > repos > guerler > springsuite
annotate planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/pyRdfa/embeddedRDF.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler | 
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 1 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 1 # -*- coding: utf-8 -*- | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 2 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 3 Extracting possible embedded RDF/XML content from the file and parse it separately into the Graph. This is used, for example | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 4 by U{SVG 1.2 Tiny<http://www.w3.org/TR/SVGMobile12/>}. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 5 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 6 @author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 7 @license: This software is available for use under the | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 8 U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 9 @contact: Ivan Herman, ivan@w3.org | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 10 @version: $Id: embeddedRDF.py,v 1.15 2012/11/16 17:51:53 ivan Exp $ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 11 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 12 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 13 # Python 3 foolproof way... | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 14 try : | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 15 from io import StringIO | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 16 except : | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 17 from io import StringIO | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 18 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 19 from .host import HostLanguage, accept_embedded_rdf_xml, accept_embedded_turtle | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 20 from .utils import return_XML | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 21 import re, sys | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 22 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 23 def handle_embeddedRDF(node, graph, state) : | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 24 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 25 Handles embedded RDF. There are two possibilities: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 26 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 27 - the file is one of the XML dialects that allows for an embedded RDF/XML portion. See the L{host.accept_embedded_rdf_xml} for those (a typical example is SVG). | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 28 - the file is HTML and there is a turtle portion in the C{<script>} element with type text/turtle. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 29 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 30 @param node: a DOM node for the top level element | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 31 @param graph: target rdf graph | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 32 @type graph: RDFLib's Graph object instance | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 33 @param state: the inherited state (namespaces, lang, etc) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 34 @type state: L{state.ExecutionContext} | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 35 @return: whether an RDF/XML or turtle content has been detected or not. If TRUE, the RDFa processing should not occur on the node and its descendents. | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 36 @rtype: Boolean | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 37 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 38 #def _get_prefixes_in_turtle() : | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 39 # retval = "" | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 40 # for key in state.term_or_curie.ns : | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 41 # retval += "@prefix %s: <%s> .\n" % (key, state.term_or_curie.ns[key]) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 42 # retval += '\n' | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 43 # return retval | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 44 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 45 # This feature is optional! | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 46 def _get_literal(Pnode): | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 47 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 48 Get the full text | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 49 @param Pnode: DOM Node | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 50 @return: string | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 51 """ | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 52 rc = "" | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 53 for node in Pnode.childNodes: | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 54 if node.nodeType in [node.TEXT_NODE, node.CDATA_SECTION_NODE] : | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 55 rc = rc + node.data | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 56 # Sigh... the HTML5 parser does not recognize the CDATA escapes, ie, it just passes on the <![CDATA[ and ]]> strings:-( | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 57 return rc.replace("<![CDATA[","").replace("]]>","") | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 58 | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 59 if state.options.embedded_rdf : | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 60 # Embedded turtle, per the latest Turtle draft | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 61 if state.options.host_language in accept_embedded_turtle and node.nodeName.lower() == "script" : | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 62 if node.hasAttribute("type") and node.getAttribute("type") == "text/turtle" : | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 63 #prefixes = _get_prefixes_in_turtle() | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 64 #content = _get_literal(node) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 65 #rdf = StringIO(prefixes + content) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 66 content = _get_literal(node) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 67 rdf = StringIO(content) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 68 try : | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 69 graph.parse(rdf, format="n3", publicID = state.base) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 70 state.options.add_info("The output graph includes triples coming from an embedded Turtle script") | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 71 except : | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 72 (type,value,traceback) = sys.exc_info() | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 73 state.options.add_error("Embedded Turtle content could not be parsed (problems with %s?); ignored" % value) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 74 return True | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 75 elif state.options.host_language in accept_embedded_rdf_xml and node.localName == "RDF" and node.namespaceURI == "http://www.w3.org/1999/02/22-rdf-syntax-ns#" : | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 76 rdf = StringIO(return_XML(state, node)) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 77 try : | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 78 graph.parse(rdf) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 79 state.options.add_info("The output graph includes triples coming from an embedded RDF/XML subtree") | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 80 except : | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 81 (type,value,traceback) = sys.exc_info() | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 82 state.options.add_error("Embedded RDF/XML content could not parsed (problems with %s?); ignored" % value) | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 83 return True | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 84 else : | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 85 return False | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 86 else : | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 87 return False | 
| 
56ad4e20f292
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
 guerler parents: diff
changeset | 88 | 
