Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/pyRdfa/property.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d30785e31577 | 1:56ad4e20f292 |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 """ | |
| 3 Implementation of the C{@property} value handling. | |
| 4 | |
| 5 RDFa 1.0 and RDFa 1.1 are fairly different. RDFa 1.0 generates only literals, see | |
| 6 U{RDFa Task Force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>} for the details. | |
| 7 On the other hand, RDFa 1.1, beyond literals, can also generate URI references. Hence the duplicate method in the L{ProcessProperty} class, one for RDFa 1.0 and the other for RDFa 1.1. | |
| 8 | |
| 9 @summary: RDFa Literal generation | |
| 10 @requires: U{RDFLib package<http://rdflib.net>} | |
| 11 @organization: U{World Wide Web Consortium<http://www.w3.org>} | |
| 12 @author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} | |
| 13 @license: This software is available for use under the | |
| 14 U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} | |
| 15 """ | |
| 16 | |
| 17 """ | |
| 18 $Id: property.py,v 1.11 2012/06/12 11:47:11 ivan Exp $ | |
| 19 $Date: 2012/06/12 11:47:11 $ | |
| 20 """ | |
| 21 | |
| 22 import re, sys | |
| 23 | |
| 24 import rdflib | |
| 25 from rdflib import BNode | |
| 26 from rdflib import Literal, URIRef, Namespace | |
| 27 if rdflib.__version__ >= "3.0.0" : | |
| 28 from rdflib import RDF as ns_rdf | |
| 29 from rdflib.term import XSDToPython | |
| 30 else : | |
| 31 from rdflib.RDF import RDFNS as ns_rdf | |
| 32 from rdflib.Literal import XSDToPython | |
| 33 | |
| 34 from . import IncorrectBlankNodeUsage, IncorrectLiteral, err_no_blank_node, ns_xsd | |
| 35 from .utils import has_one_of_attributes, return_XML | |
| 36 from .host.html5 import handled_time_types | |
| 37 | |
| 38 XMLLiteral = ns_rdf["XMLLiteral"] | |
| 39 HTMLLiteral = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML") | |
| 40 | |
| 41 class ProcessProperty : | |
| 42 """Generate the value for C{@property} taking into account datatype, etc. | |
| 43 Note: this class is created only if the C{@property} is indeed present, no need to check. | |
| 44 | |
| 45 @ivar node: DOM element node | |
| 46 @ivar graph: the (RDF) graph to add the properies to | |
| 47 @ivar subject: the RDFLib URIRef serving as a subject for the generated triples | |
| 48 @ivar state: the current state to be used for the CURIE-s | |
| 49 @type state: L{state.ExecutionContext} | |
| 50 @ivar typed_resource: Typically the bnode generated by a @typeof | |
| 51 """ | |
| 52 def __init__(self, node, graph, subject, state, typed_resource = None) : | |
| 53 """ | |
| 54 @param node: DOM element node | |
| 55 @param graph: the (RDF) graph to add the properies to | |
| 56 @param subject: the RDFLib URIRef serving as a subject for the generated triples | |
| 57 @param state: the current state to be used for the CURIE-s | |
| 58 @param state: L{state.ExecutionContext} | |
| 59 @param typed_resource: Typically the bnode generated by a @typeof; in RDFa 1.1, that becomes the object for C{@property} | |
| 60 """ | |
| 61 self.node = node | |
| 62 self.graph = graph | |
| 63 self.subject = subject | |
| 64 self.state = state | |
| 65 self.typed_resource = typed_resource | |
| 66 | |
| 67 def generate(self) : | |
| 68 """ | |
| 69 Common entry point for the RDFa 1.0 and RDFa 1.1 versions; bifurcates based on the RDFa version, as retrieved from the state object. | |
| 70 """ | |
| 71 if self.state.rdfa_version >= "1.1" : | |
| 72 self.generate_1_1() | |
| 73 else : | |
| 74 self.generate_1_0() | |
| 75 | |
| 76 def generate_1_1(self) : | |
| 77 """Generate the property object, 1.1 version""" | |
| 78 | |
| 79 ######################################################################### | |
| 80 # See if the target is _not_ a literal | |
| 81 irirefs = ("resource", "href", "src") | |
| 82 noiri = ("content", "datatype", "rel", "rev") | |
| 83 notypediri = ("content", "datatype", "rel", "rev", "about", "about_pruned") | |
| 84 if has_one_of_attributes(self.node, irirefs) and not has_one_of_attributes(self.node, noiri) : | |
| 85 # @href/@resource/@src takes the lead here... | |
| 86 object = self.state.getResource(irirefs) | |
| 87 elif self.node.hasAttribute("typeof") and not has_one_of_attributes(self.node, notypediri) and self.typed_resource != None : | |
| 88 # a @typeof creates a special branch in case the typed resource was set during parsing | |
| 89 object = self.typed_resource | |
| 90 else : | |
| 91 # We have to generate a literal | |
| 92 | |
| 93 # Get, if exists, the value of @datatype | |
| 94 datatype = '' | |
| 95 dtset = False | |
| 96 if self.node.hasAttribute("datatype") : | |
| 97 dtset = True | |
| 98 dt = self.node.getAttribute("datatype") | |
| 99 if dt != "" : | |
| 100 datatype = self.state.getURI("datatype") | |
| 101 | |
| 102 # Supress lange is set in case some elements explicitly want to supress the effect of language | |
| 103 # There were discussions, for example, that the <time> element should do so. Although, | |
| 104 # after all, this was reversed, the functionality is kept in the code in case another | |
| 105 # element might need it... | |
| 106 if self.state.lang != None and self.state.supress_lang == False : | |
| 107 lang = self.state.lang | |
| 108 else : | |
| 109 lang = '' | |
| 110 | |
| 111 # The simple case: separate @content attribute | |
| 112 if self.node.hasAttribute("content") : | |
| 113 val = self.node.getAttribute("content") | |
| 114 # Handling the automatic uri conversion case | |
| 115 if dtset == False : | |
| 116 object = Literal(val, lang=lang) | |
| 117 else : | |
| 118 object = self._create_Literal(val, datatype=datatype, lang=lang) | |
| 119 # The value of datatype has been set, and the keyword parameters take care of the rest | |
| 120 else : | |
| 121 # see if there *is* a datatype (even if it is empty!) | |
| 122 if dtset : | |
| 123 if datatype == XMLLiteral : | |
| 124 litval = self._get_XML_literal(self.node) | |
| 125 object = Literal(litval,datatype=XMLLiteral) | |
| 126 elif datatype == HTMLLiteral : | |
| 127 # I am not sure why this hack is necessary, but otherwise an encoding error occurs | |
| 128 # In Python3 all this should become moot, due to the unicode everywhere approach... | |
| 129 if sys.version_info[0] >= 3 : | |
| 130 object = Literal(self._get_HTML_literal(self.node), datatype=HTMLLiteral) | |
| 131 else : | |
| 132 litval = self._get_HTML_literal(self.node) | |
| 133 o = Literal(litval, datatype=XMLLiteral) | |
| 134 object = Literal(o, datatype=HTMLLiteral) | |
| 135 else : | |
| 136 object = self._create_Literal(self._get_literal(self.node), datatype=datatype, lang=lang) | |
| 137 else : | |
| 138 object = self._create_Literal(self._get_literal(self.node), lang=lang) | |
| 139 | |
| 140 if object != None : | |
| 141 for prop in self.state.getURI("property") : | |
| 142 if not isinstance(prop, BNode) : | |
| 143 if self.node.hasAttribute("inlist") : | |
| 144 self.state.add_to_list_mapping(prop, object) | |
| 145 else : | |
| 146 self.graph.add( (self.subject, prop, object) ) | |
| 147 else : | |
| 148 self.state.options.add_warning(err_no_blank_node % "property", warning_type=IncorrectBlankNodeUsage, node=self.node.nodeName) | |
| 149 | |
| 150 # return | |
| 151 | |
| 152 def generate_1_0(self) : | |
| 153 """Generate the property object, 1.0 version""" | |
| 154 | |
| 155 ######################################################################### | |
| 156 # We have to generate a literal indeed. | |
| 157 # Get, if exists, the value of @datatype | |
| 158 datatype = '' | |
| 159 dtset = False | |
| 160 if self.node.hasAttribute("datatype") : | |
| 161 dtset = True | |
| 162 dt = self.node.getAttribute("datatype") | |
| 163 if dt != "" : | |
| 164 datatype = self.state.getURI("datatype") | |
| 165 | |
| 166 if self.state.lang != None : | |
| 167 lang = self.state.lang | |
| 168 else : | |
| 169 lang = '' | |
| 170 | |
| 171 # The simple case: separate @content attribute | |
| 172 if self.node.hasAttribute("content") : | |
| 173 val = self.node.getAttribute("content") | |
| 174 # Handling the automatic uri conversion case | |
| 175 if dtset == False : | |
| 176 object = Literal(val, lang=lang) | |
| 177 else : | |
| 178 object = self._create_Literal(val, datatype=datatype, lang=lang) | |
| 179 # The value of datatype has been set, and the keyword parameters take care of the rest | |
| 180 else : | |
| 181 # see if there *is* a datatype (even if it is empty!) | |
| 182 if dtset : | |
| 183 # yep. The Literal content is the pure text part of the current element: | |
| 184 # We have to check whether the specified datatype is, in fact, an | |
| 185 # explicit XML Literal | |
| 186 if datatype == XMLLiteral : | |
| 187 litval = self._get_XML_literal(self.node) | |
| 188 object = Literal(litval,datatype=XMLLiteral) | |
| 189 elif datatype == HTMLLiteral : | |
| 190 # I am not sure why this hack is necessary, but otherwise an encoding error occurs | |
| 191 # In Python3 all this should become moot, due to the unicode everywhere approach... | |
| 192 if sys.version_info[0] >= 3 : | |
| 193 object = Literal(self._get_HTML_literal(self.node), datatype=HTMLLiteral) | |
| 194 else : | |
| 195 litval = self._get_HTML_literal(self.node) | |
| 196 o = Literal(litval, datatype=XMLLiteral) | |
| 197 object = Literal(o, datatype=HTMLLiteral) | |
| 198 else : | |
| 199 object = self._create_Literal(self._get_literal(self.node), datatype=datatype, lang=lang) | |
| 200 else : | |
| 201 # no controlling @datatype. We have to see if there is markup in the contained | |
| 202 # element | |
| 203 if True in [ n.nodeType == self.node.ELEMENT_NODE for n in self.node.childNodes ] : | |
| 204 # yep, and XML Literal should be generated | |
| 205 object = self._create_Literal(self._get_XML_literal(self.node), datatype=XMLLiteral) | |
| 206 else : | |
| 207 # At this point, there might be entities in the string that are returned as real characters by the dom | |
| 208 # implementation. That should be turned back | |
| 209 object = self._create_Literal(self._get_literal(self.node), lang=lang) | |
| 210 | |
| 211 for prop in self.state.getURI("property") : | |
| 212 if not isinstance(prop,BNode) : | |
| 213 self.graph.add( (self.subject,prop,object) ) | |
| 214 else : | |
| 215 self.state.options.add_warning(err_no_blank_node % "property", warning_type=IncorrectBlankNodeUsage, node=self.node.nodeName) | |
| 216 | |
| 217 # return | |
| 218 | |
| 219 ###################################################################################################################################### | |
| 220 | |
| 221 | |
| 222 def _putBackEntities(self, str) : | |
| 223 """Put 'back' entities for the '&','<', and '>' characters, to produce a proper XML string. | |
| 224 Used by the XML Literal extraction. | |
| 225 @param str: string to be converted | |
| 226 @return: string with entities | |
| 227 @rtype: string | |
| 228 """ | |
| 229 return str.replace('&','&').replace('<','<').replace('>','>') | |
| 230 | |
| 231 def _get_literal(self, Pnode): | |
| 232 """ | |
| 233 Get (recursively) the full text from a DOM Node. | |
| 234 | |
| 235 @param Pnode: DOM Node | |
| 236 @return: string | |
| 237 """ | |
| 238 rc = "" | |
| 239 for node in Pnode.childNodes: | |
| 240 if node.nodeType == node.TEXT_NODE: | |
| 241 rc = rc + node.data | |
| 242 elif node.nodeType == node.ELEMENT_NODE : | |
| 243 rc = rc + self._get_literal(node) | |
| 244 | |
| 245 # The decision of the group in February 2008 is not to normalize the result by default. | |
| 246 # This is reflected in the default value of the option | |
| 247 | |
| 248 if self.state.options.space_preserve : | |
| 249 return rc | |
| 250 else : | |
| 251 return re.sub(r'(\r| |\n|\t)+'," ",rc).strip() | |
| 252 # end getLiteral | |
| 253 | |
| 254 def _get_XML_literal(self, Pnode) : | |
| 255 """ | |
| 256 Get (recursively) the XML Literal content of a DOM Node. | |
| 257 | |
| 258 @param Pnode: DOM Node | |
| 259 @return: string | |
| 260 """ | |
| 261 rc = "" | |
| 262 for node in Pnode.childNodes: | |
| 263 if node.nodeType == node.TEXT_NODE: | |
| 264 rc = rc + self._putBackEntities(node.data) | |
| 265 elif node.nodeType == node.ELEMENT_NODE : | |
| 266 rc = rc + return_XML(self.state, node, base = False) | |
| 267 return rc | |
| 268 # end getXMLLiteral | |
| 269 | |
| 270 def _get_HTML_literal(self, Pnode) : | |
| 271 """ | |
| 272 Get (recursively) the XML Literal content of a DOM Node. | |
| 273 | |
| 274 @param Pnode: DOM Node | |
| 275 @return: string | |
| 276 """ | |
| 277 rc = "" | |
| 278 for node in Pnode.childNodes: | |
| 279 if node.nodeType == node.TEXT_NODE: | |
| 280 rc = rc + self._putBackEntities(node.data) | |
| 281 elif node.nodeType == node.ELEMENT_NODE : | |
| 282 rc = rc + return_XML(self.state, node, base = False, xmlns = False ) | |
| 283 return rc | |
| 284 # end getXMLLiteral | |
| 285 | |
| 286 def _create_Literal(self, val, datatype = '', lang = '') : | |
| 287 """ | |
| 288 Create a literal, taking into account the datatype and language. | |
| 289 @return: Literal | |
| 290 """ | |
| 291 if datatype == None or datatype == '' : | |
| 292 return Literal(val, lang=lang) | |
| 293 #elif datatype == ns_xsd["string"] : | |
| 294 # return Literal(val) | |
| 295 else : | |
| 296 # This is a bit convoluted... the default setup of rdflib does not gracefully react if the | |
| 297 # datatype cannot properly be converted to Python. I have to copy and reuse some of the | |
| 298 # rdflib code to get this working... | |
| 299 # To make things worse: rdlib 3.1.0 does not handle the various xsd date types properly, ie, | |
| 300 # the conversion function below will generate errors. Ie, the check should be skipped for those | |
| 301 if ("%s" % datatype) in handled_time_types and rdflib.__version__ < "3.2.0" : | |
| 302 convFunc = False | |
| 303 else : | |
| 304 convFunc = XSDToPython.get(datatype, None) | |
| 305 if convFunc : | |
| 306 try : | |
| 307 pv = convFunc(val) | |
| 308 # If we got there the literal value and its datatype match | |
| 309 except : | |
| 310 self.state.options.add_warning("Incompatible value (%s) and datatype (%s) in Literal definition." % (val, datatype), warning_type=IncorrectLiteral, node=self.node.nodeName) | |
| 311 return Literal(val, datatype=datatype) |
