comparison planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/pyRdfa/embeddedRDF.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 # -*- coding: utf-8 -*-
2 """
3 Extracting possible embedded RDF/XML content from the file and parse it separately into the Graph. This is used, for example
4 by U{SVG 1.2 Tiny<http://www.w3.org/TR/SVGMobile12/>}.
5
6 @author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
7 @license: This software is available for use under the
8 U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
9 @contact: Ivan Herman, ivan@w3.org
10 @version: $Id: embeddedRDF.py,v 1.15 2012/11/16 17:51:53 ivan Exp $
11 """
12
13 # Python 3 foolproof way...
14 try :
15 from io import StringIO
16 except :
17 from io import StringIO
18
19 from .host import HostLanguage, accept_embedded_rdf_xml, accept_embedded_turtle
20 from .utils import return_XML
21 import re, sys
22
23 def handle_embeddedRDF(node, graph, state) :
24 """
25 Handles embedded RDF. There are two possibilities:
26
27 - the file is one of the XML dialects that allows for an embedded RDF/XML portion. See the L{host.accept_embedded_rdf_xml} for those (a typical example is SVG).
28 - the file is HTML and there is a turtle portion in the C{<script>} element with type text/turtle.
29
30 @param node: a DOM node for the top level element
31 @param graph: target rdf graph
32 @type graph: RDFLib's Graph object instance
33 @param state: the inherited state (namespaces, lang, etc)
34 @type state: L{state.ExecutionContext}
35 @return: whether an RDF/XML or turtle content has been detected or not. If TRUE, the RDFa processing should not occur on the node and its descendents.
36 @rtype: Boolean
37 """
38 #def _get_prefixes_in_turtle() :
39 # retval = ""
40 # for key in state.term_or_curie.ns :
41 # retval += "@prefix %s: <%s> .\n" % (key, state.term_or_curie.ns[key])
42 # retval += '\n'
43 # return retval
44
45 # This feature is optional!
46 def _get_literal(Pnode):
47 """
48 Get the full text
49 @param Pnode: DOM Node
50 @return: string
51 """
52 rc = ""
53 for node in Pnode.childNodes:
54 if node.nodeType in [node.TEXT_NODE, node.CDATA_SECTION_NODE] :
55 rc = rc + node.data
56 # Sigh... the HTML5 parser does not recognize the CDATA escapes, ie, it just passes on the <![CDATA[ and ]]> strings:-(
57 return rc.replace("<![CDATA[","").replace("]]>","")
58
59 if state.options.embedded_rdf :
60 # Embedded turtle, per the latest Turtle draft
61 if state.options.host_language in accept_embedded_turtle and node.nodeName.lower() == "script" :
62 if node.hasAttribute("type") and node.getAttribute("type") == "text/turtle" :
63 #prefixes = _get_prefixes_in_turtle()
64 #content = _get_literal(node)
65 #rdf = StringIO(prefixes + content)
66 content = _get_literal(node)
67 rdf = StringIO(content)
68 try :
69 graph.parse(rdf, format="n3", publicID = state.base)
70 state.options.add_info("The output graph includes triples coming from an embedded Turtle script")
71 except :
72 (type,value,traceback) = sys.exc_info()
73 state.options.add_error("Embedded Turtle content could not be parsed (problems with %s?); ignored" % value)
74 return True
75 elif state.options.host_language in accept_embedded_rdf_xml and node.localName == "RDF" and node.namespaceURI == "http://www.w3.org/1999/02/22-rdf-syntax-ns#" :
76 rdf = StringIO(return_XML(state, node))
77 try :
78 graph.parse(rdf)
79 state.options.add_info("The output graph includes triples coming from an embedded RDF/XML subtree")
80 except :
81 (type,value,traceback) = sys.exc_info()
82 state.options.add_error("Embedded RDF/XML content could not parsed (problems with %s?); ignored" % value)
83 return True
84 else :
85 return False
86 else :
87 return False
88