diff planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/pyRdfa/property.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/pyRdfa/property.py	Fri Jul 31 00:32:28 2020 -0400
@@ -0,0 +1,311 @@
+# -*- coding: utf-8 -*-
+"""
+Implementation of the C{@property} value handling.
+
+RDFa 1.0 and RDFa 1.1 are fairly different. RDFa 1.0 generates only literals, see
+U{RDFa Task Force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>} for the details.
+On the other hand, RDFa 1.1, beyond literals, can also generate URI references. Hence the duplicate method in the L{ProcessProperty} class, one for RDFa 1.0 and the other for RDFa 1.1.
+
+@summary: RDFa Literal generation
+@requires: U{RDFLib package<http://rdflib.net>}
+@organization: U{World Wide Web Consortium<http://www.w3.org>}
+@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
+@license: This software is available for use under the
+U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
+"""
+
+"""
+$Id: property.py,v 1.11 2012/06/12 11:47:11 ivan Exp $
+$Date: 2012/06/12 11:47:11 $
+"""
+
+import re, sys
+
+import rdflib
+from rdflib	import BNode
+from rdflib	import Literal, URIRef, Namespace
+if rdflib.__version__ >= "3.0.0" :
+	from rdflib	     import RDF as ns_rdf
+	from rdflib.term import XSDToPython
+else :
+	from rdflib.RDF	    import RDFNS as ns_rdf
+	from rdflib.Literal import XSDToPython
+
+from .	         import IncorrectBlankNodeUsage, IncorrectLiteral, err_no_blank_node, ns_xsd
+from .utils      import has_one_of_attributes, return_XML
+from .host.html5 import handled_time_types
+
+XMLLiteral  = ns_rdf["XMLLiteral"]
+HTMLLiteral = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#HTML")
+
+class ProcessProperty :
+	"""Generate the value for C{@property} taking into account datatype, etc.
+	Note: this class is created only if the C{@property} is indeed present, no need to check.
+
+	@ivar node: DOM element node
+	@ivar graph: the (RDF) graph to add the properies to
+	@ivar subject: the RDFLib URIRef serving as a subject for the generated triples
+	@ivar state: the current state to be used for the CURIE-s
+	@type state: L{state.ExecutionContext}
+	@ivar typed_resource: Typically the bnode generated by a @typeof
+	"""
+	def __init__(self, node, graph, subject, state, typed_resource = None) :
+		"""
+		@param node: DOM element node
+		@param graph: the (RDF) graph to add the properies to
+		@param subject: the RDFLib URIRef serving as a subject for the generated triples
+		@param state: the current state to be used for the CURIE-s
+		@param state: L{state.ExecutionContext}
+		@param typed_resource: Typically the bnode generated by a @typeof; in RDFa 1.1, that becomes the object for C{@property}
+		"""
+		self.node           = node
+		self.graph          = graph
+		self.subject        = subject
+		self.state          = state
+		self.typed_resource = typed_resource
+
+	def generate(self) :
+		"""
+		Common entry point for the RDFa 1.0 and RDFa 1.1 versions; bifurcates based on the RDFa version, as retrieved from the state object.
+		"""
+		if self.state.rdfa_version >= "1.1" :
+			self.generate_1_1()
+		else :
+			self.generate_1_0()
+
+	def generate_1_1(self) :
+		"""Generate the property object, 1.1 version"""
+
+		#########################################################################
+		# See if the target is _not_ a literal
+		irirefs      = ("resource", "href", "src")
+		noiri        = ("content", "datatype", "rel", "rev")
+		notypediri   = ("content", "datatype", "rel", "rev", "about", "about_pruned")
+		if has_one_of_attributes(self.node, irirefs) and not has_one_of_attributes(self.node, noiri) :
+			# @href/@resource/@src takes the lead here...
+			object = self.state.getResource(irirefs)
+		elif self.node.hasAttribute("typeof") and not has_one_of_attributes(self.node, notypediri) and self.typed_resource != None :
+				# a @typeof creates a special branch in case the typed resource was set during parsing
+				object = self.typed_resource
+		else :
+			# We have to generate a literal
+
+			# Get, if exists, the value of @datatype
+			datatype = ''
+			dtset    = False
+			if self.node.hasAttribute("datatype") :
+				dtset = True
+				dt = self.node.getAttribute("datatype")
+				if dt != "" :
+					datatype = self.state.getURI("datatype")
+
+			# Supress lange is set in case some elements explicitly want to supress the effect of language
+			# There were discussions, for example, that the <time> element should do so. Although,
+			# after all, this was reversed, the functionality is kept in the code in case another
+			# element might need it...
+			if self.state.lang != None and self.state.supress_lang == False :
+				lang = self.state.lang
+			else :
+				lang = ''
+
+			# The simple case: separate @content attribute
+			if self.node.hasAttribute("content") :
+				val = self.node.getAttribute("content")
+				# Handling the automatic uri conversion case
+				if dtset == False :
+					object = Literal(val, lang=lang)
+				else :
+					object = self._create_Literal(val, datatype=datatype, lang=lang)
+				# The value of datatype has been set, and the keyword parameters take care of the rest
+			else :
+				# see if there *is* a datatype (even if it is empty!)
+				if dtset :
+					if datatype == XMLLiteral :
+						litval = self._get_XML_literal(self.node)
+						object = Literal(litval,datatype=XMLLiteral)
+					elif datatype == HTMLLiteral :
+						# I am not sure why this hack is necessary, but otherwise an encoding error occurs
+						# In Python3 all this should become moot, due to the unicode everywhere approach...
+						if sys.version_info[0] >= 3 :
+							object = Literal(self._get_HTML_literal(self.node), datatype=HTMLLiteral)
+						else :
+							litval = self._get_HTML_literal(self.node)
+							o = Literal(litval, datatype=XMLLiteral)
+							object = Literal(o, datatype=HTMLLiteral)
+					else :
+						object = self._create_Literal(self._get_literal(self.node), datatype=datatype, lang=lang)
+				else :
+					object = self._create_Literal(self._get_literal(self.node), lang=lang)
+
+		if object != None :
+			for prop in self.state.getURI("property") :
+				if not isinstance(prop, BNode) :
+					if self.node.hasAttribute("inlist") :
+						self.state.add_to_list_mapping(prop, object)
+					else :
+						self.graph.add( (self.subject, prop, object) )
+				else :
+					self.state.options.add_warning(err_no_blank_node % "property", warning_type=IncorrectBlankNodeUsage, node=self.node.nodeName)
+
+		# return
+
+	def generate_1_0(self) :
+		"""Generate the property object, 1.0 version"""
+
+		#########################################################################
+		# We have to generate a literal indeed.
+		# Get, if exists, the value of @datatype
+		datatype = ''
+		dtset    = False
+		if self.node.hasAttribute("datatype") :
+			dtset = True
+			dt = self.node.getAttribute("datatype")
+			if dt != "" :
+				datatype = self.state.getURI("datatype")
+
+		if self.state.lang != None :
+			lang = self.state.lang
+		else :
+			lang = ''
+
+		# The simple case: separate @content attribute
+		if self.node.hasAttribute("content") :
+			val = self.node.getAttribute("content")
+			# Handling the automatic uri conversion case
+			if dtset == False :
+				object = Literal(val, lang=lang)
+			else :
+				object = self._create_Literal(val, datatype=datatype, lang=lang)
+			# The value of datatype has been set, and the keyword parameters take care of the rest
+		else :
+			# see if there *is* a datatype (even if it is empty!)
+			if dtset :
+				# yep. The Literal content is the pure text part of the current element:
+				# We have to check whether the specified datatype is, in fact, an
+				# explicit XML Literal
+				if datatype == XMLLiteral :
+					litval = self._get_XML_literal(self.node)
+					object = Literal(litval,datatype=XMLLiteral)
+				elif datatype == HTMLLiteral :
+					# I am not sure why this hack is necessary, but otherwise an encoding error occurs
+					# In Python3 all this should become moot, due to the unicode everywhere approach...
+					if sys.version_info[0] >= 3 :
+						object = Literal(self._get_HTML_literal(self.node), datatype=HTMLLiteral)
+					else :
+						litval = self._get_HTML_literal(self.node)
+						o = Literal(litval, datatype=XMLLiteral)
+						object = Literal(o, datatype=HTMLLiteral)
+				else :
+					object = self._create_Literal(self._get_literal(self.node), datatype=datatype, lang=lang)
+			else :
+				# no controlling @datatype. We have to see if there is markup in the contained
+				# element
+				if True in [ n.nodeType == self.node.ELEMENT_NODE for n in self.node.childNodes ] :
+					# yep, and XML Literal should be generated
+					object = self._create_Literal(self._get_XML_literal(self.node), datatype=XMLLiteral)
+				else :
+					# At this point, there might be entities in the string that are returned as real characters by the dom
+					# implementation. That should be turned back
+					object = self._create_Literal(self._get_literal(self.node), lang=lang)
+
+		for prop in self.state.getURI("property") :
+			if not isinstance(prop,BNode) :
+				self.graph.add( (self.subject,prop,object) )
+			else :
+				self.state.options.add_warning(err_no_blank_node % "property", warning_type=IncorrectBlankNodeUsage, node=self.node.nodeName)
+
+		# return
+
+	######################################################################################################################################
+
+
+	def _putBackEntities(self, str) :
+		"""Put 'back' entities for the '&','<', and '>' characters, to produce a proper XML string.
+		Used by the XML Literal extraction.
+		@param str: string to be converted
+		@return: string with entities
+		@rtype: string
+		"""
+		return str.replace('&','&amp;').replace('<','&lt;').replace('>','&gt;')
+
+	def _get_literal(self, Pnode):
+		"""
+		Get (recursively) the full text from a DOM Node.
+
+		@param Pnode: DOM Node
+		@return: string
+		"""
+		rc = ""
+		for node in Pnode.childNodes:
+			if node.nodeType == node.TEXT_NODE:
+				rc = rc + node.data
+			elif node.nodeType == node.ELEMENT_NODE :
+				rc = rc + self._get_literal(node)
+
+		# The decision of the group in February 2008 is not to normalize the result by default.
+		# This is reflected in the default value of the option
+
+		if self.state.options.space_preserve :
+			return rc
+		else :
+			return re.sub(r'(\r| |\n|\t)+'," ",rc).strip()
+	# end getLiteral
+
+	def _get_XML_literal(self, Pnode) :
+		"""
+		Get (recursively) the XML Literal content of a DOM Node.
+
+		@param Pnode: DOM Node
+		@return: string
+		"""
+		rc = ""
+		for node in Pnode.childNodes:
+			if node.nodeType == node.TEXT_NODE:
+				rc = rc + self._putBackEntities(node.data)
+			elif node.nodeType == node.ELEMENT_NODE :
+				rc = rc + return_XML(self.state, node, base = False)
+		return rc
+	# end getXMLLiteral
+
+	def _get_HTML_literal(self, Pnode) :
+		"""
+		Get (recursively) the XML Literal content of a DOM Node.
+
+		@param Pnode: DOM Node
+		@return: string
+		"""
+		rc = ""
+		for node in Pnode.childNodes:
+			if node.nodeType == node.TEXT_NODE:
+				rc = rc + self._putBackEntities(node.data)
+			elif node.nodeType == node.ELEMENT_NODE :
+				rc = rc + return_XML(self.state, node, base = False, xmlns = False )
+		return rc
+	# end getXMLLiteral
+
+	def _create_Literal(self, val, datatype = '', lang = '') :
+		"""
+		Create a literal, taking into account the datatype and language.
+		@return: Literal
+		"""
+		if datatype == None or datatype == '' :
+			return Literal(val, lang=lang)
+		#elif datatype == ns_xsd["string"] :
+		#	return Literal(val)
+		else :
+			# This is a bit convoluted... the default setup of rdflib does not gracefully react if the
+			# datatype cannot properly be converted to Python. I have to copy and reuse some of the
+			# rdflib code to get this working...
+			# To make things worse: rdlib 3.1.0 does not handle the various xsd date types properly, ie,
+			# the conversion function below will generate errors. Ie, the check should be skipped for those
+			if ("%s" % datatype) in handled_time_types and rdflib.__version__ < "3.2.0" :
+				convFunc = False
+			else :
+				convFunc = XSDToPython.get(datatype, None)
+			if convFunc :
+				try :
+					pv = convFunc(val)
+					# If we got there the literal value and its datatype match
+				except :
+					self.state.options.add_warning("Incompatible value (%s) and datatype (%s) in Literal definition." % (val, datatype), warning_type=IncorrectLiteral, node=self.node.nodeName)
+			return Literal(val, datatype=datatype)