Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/pyMicrodata/microdata.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d30785e31577 | 1:56ad4e20f292 |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 """ | |
| 3 | |
| 4 The core of the Microdata->RDF conversion, a more or less verbatim implementation of the | |
| 5 U{W3C IG Note<http://www.w3.org/TR/microdata-rdf/>}. Because the implementation was also used to check | |
| 6 the note itself, it tries to be fairly close to the text. | |
| 7 | |
| 8 | |
| 9 @organization: U{World Wide Web Consortium<http://www.w3.org>} | |
| 10 @author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} | |
| 11 @license: This software is available for use under the | |
| 12 U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} | |
| 13 """ | |
| 14 | |
| 15 """ | |
| 16 $Id: microdata.py,v 1.4 2012/09/05 16:40:43 ivan Exp $ | |
| 17 $Date: 2012/09/05 16:40:43 $ | |
| 18 | |
| 19 Added a reaction on the RDFaStopParsing exception: if raised while setting up the local execution context, parsing | |
| 20 is stopped (on the whole subtree) | |
| 21 """ | |
| 22 | |
| 23 import sys | |
| 24 if sys.version_info[0] >= 3 : | |
| 25 from urllib.parse import urlsplit, urlunsplit | |
| 26 else : | |
| 27 from urllib.parse import urlsplit, urlunsplit | |
| 28 | |
| 29 from types import * | |
| 30 | |
| 31 import rdflib | |
| 32 from rdflib import URIRef | |
| 33 from rdflib import Literal | |
| 34 from rdflib import BNode | |
| 35 from rdflib import Namespace | |
| 36 if rdflib.__version__ >= "3.0.0" : | |
| 37 from rdflib import Graph | |
| 38 from rdflib import RDF as ns_rdf | |
| 39 from rdflib import RDFS as ns_rdfs | |
| 40 from rdflib import XSD as ns_xsd | |
| 41 else : | |
| 42 from rdflib.Graph import Graph | |
| 43 from rdflib.RDFS import RDFSNS as ns_rdfs | |
| 44 from rdflib.Literal import _XSD_NS as ns_xsd | |
| 45 from rdflib.RDF import RDFNS as ns_rdf | |
| 46 | |
| 47 ns_owl = Namespace("http://www.w3.org/2002/07/owl#") | |
| 48 | |
| 49 from .registry import registry, vocab_names | |
| 50 from .utils import generate_RDF_collection, get_Literal, get_time_type | |
| 51 from .utils import get_lang_from_hierarchy, is_absolute_URI, generate_URI, fragment_escape | |
| 52 | |
| 53 MD_VOCAB = "http://www.w3.org/ns/md#" | |
| 54 RDFA_VOCAB = URIRef("http://www.w3.org/ns/rdfa#usesVocabulary") | |
| 55 | |
| 56 from . import debug | |
| 57 | |
| 58 # Existing predicate schemes | |
| 59 class PropertySchemes : | |
| 60 vocabulary = "vocabulary" | |
| 61 contextual = "contextual" | |
| 62 | |
| 63 class ValueMethod : | |
| 64 unordered = "unordered" | |
| 65 list = "list" | |
| 66 | |
| 67 # ---------------------------------------------------------------------------- | |
| 68 | |
| 69 class Evaluation_Context : | |
| 70 """ | |
| 71 Evaluation context structure. See Section 4.1 of the U{W3C IG Note<http://www.w3.org/TR/microdata-rdf/>}for the details. | |
| 72 | |
| 73 @ivar current_type : an absolute URL for the current type, used when an item does not contain an item type | |
| 74 @ivar memory: mapping from items to RDF subjects | |
| 75 @type memory: dictionary | |
| 76 @ivar current_name: an absolute URL for the in-scope name, used for generating URIs for properties of items without an item type | |
| 77 @ivar current_vocabulary: an absolute URL for the current vocabulary, from the registry | |
| 78 """ | |
| 79 def __init__( self ) : | |
| 80 self.current_type = None | |
| 81 self.memory = {} | |
| 82 self.current_name = None | |
| 83 self.current_vocabulary = None | |
| 84 | |
| 85 def get_memory( self, item ) : | |
| 86 """ | |
| 87 Get the memory content (ie, RDF subject) for 'item', or None if not stored yet | |
| 88 @param item: an 'item', in microdata terminology | |
| 89 @type item: DOM Element Node | |
| 90 @return: None, or an RDF Subject (URIRef or BNode) | |
| 91 """ | |
| 92 if item in self.memory : | |
| 93 return self.memory[item] | |
| 94 else : | |
| 95 return None | |
| 96 | |
| 97 def set_memory( self, item, subject ) : | |
| 98 """ | |
| 99 Set the memory content, ie, the subject, for 'item'. | |
| 100 @param item: an 'item', in microdata terminology | |
| 101 @type item: DOM Element Node | |
| 102 @param subject: RDF Subject | |
| 103 @type subject: URIRef or Blank Node | |
| 104 """ | |
| 105 self.memory[item] = subject | |
| 106 | |
| 107 def new_copy(self, itype) : | |
| 108 """ | |
| 109 During the generation algorithm a new copy of the current context has to be done with a new current type. | |
| 110 | |
| 111 At the moment, the content of memory is copied, ie, a fresh dictionary is created and the content copied over. | |
| 112 Not clear whether that is necessary, though, maybe a simple reference is enough... | |
| 113 @param itype : an absolute URL for the current type | |
| 114 @return: a new evaluation context instance | |
| 115 """ | |
| 116 retval = Evaluation_Context() | |
| 117 for k in self.memory : | |
| 118 retval.memory[k] = self.memory[k] | |
| 119 | |
| 120 retval.current_type = itype | |
| 121 retval.current_name = self.current_name | |
| 122 retval.current_vocabulary = self.current_vocabulary | |
| 123 return retval | |
| 124 | |
| 125 def __str__(self) : | |
| 126 retval = "Evaluation context:\n" | |
| 127 retval += " current type: %s\n" % self.current_type | |
| 128 retval += " current name: %s\n" % self.current_name | |
| 129 retval += " current vocabulary: %s\n" % self.current_vocabulary | |
| 130 retval += " memory: %s\n" % self.memory | |
| 131 retval += "----\n" | |
| 132 return retval | |
| 133 | |
| 134 class Microdata : | |
| 135 """ | |
| 136 This class encapsulates methods that are defined by the U{microdata spec<http://dev.w3.org/html5/md/Overview.html>}, | |
| 137 as opposed to the RDF conversion note. | |
| 138 | |
| 139 @ivar document: top of the DOM tree, as returned by the HTML5 parser | |
| 140 @ivar base: the base URI of the Dom tree, either set from the outside or via a @base element | |
| 141 """ | |
| 142 def __init__( self, document, base = None) : | |
| 143 """ | |
| 144 @param document: top of the DOM tree, as returned by the HTML5 parser | |
| 145 @param base: the base URI of the Dom tree, either set from the outside or via a @base element | |
| 146 """ | |
| 147 self.document = document | |
| 148 | |
| 149 #----------------------------------------------------------------- | |
| 150 # set the document base, will be used to generate top level URIs | |
| 151 self.base = None | |
| 152 # handle the base element case for HTML | |
| 153 for set_base in document.getElementsByTagName("base") : | |
| 154 if set_base.hasAttribute("href") : | |
| 155 # Yep, there is a local setting for base | |
| 156 self.base = set_base.getAttribute("href") | |
| 157 return | |
| 158 # If got here, ie, if no local setting for base occurs, the input argument has it | |
| 159 self.base = base | |
| 160 | |
| 161 def get_top_level_items( self ) : | |
| 162 """ | |
| 163 A top level item is and element that has the @itemscope set, but no @itemtype. They have to | |
| 164 be collected in pre-order and depth-first fashion. | |
| 165 | |
| 166 @return: list of items (ie, DOM Nodes) | |
| 167 """ | |
| 168 def collect_items( node ) : | |
| 169 items = [] | |
| 170 for child in node.childNodes : | |
| 171 if child.nodeType == node.ELEMENT_NODE : | |
| 172 items += collect_items( child ) | |
| 173 | |
| 174 if node.hasAttribute("itemscope") and not node.hasAttribute("itemprop") : | |
| 175 # This is also a top level item | |
| 176 items.append(node) | |
| 177 | |
| 178 return items | |
| 179 | |
| 180 return collect_items( self.document ) | |
| 181 | |
| 182 def get_item_properties( self, item ) : | |
| 183 """ | |
| 184 Collect the item's properties, ie, all DOM descendent nodes with @itemprop until the subtree hits another | |
| 185 @itemscope. @itemrefs are also added at this point. | |
| 186 | |
| 187 @param item: current item | |
| 188 @type item: DOM Node | |
| 189 @return: array of items, ie, DOM Nodes | |
| 190 """ | |
| 191 # go down the tree until another itemprop is hit, take care of the itemrefs, too; see the microdata doc | |
| 192 # probably the ugliest stuff | |
| 193 # returns a series of element nodes. | |
| 194 # Is it worth filtering the ones with itemprop at that level??? | |
| 195 results = [] | |
| 196 memory = [ item ] | |
| 197 pending = [ child for child in item.childNodes if child.nodeType == item.ELEMENT_NODE ] | |
| 198 | |
| 199 if item.hasAttribute("itemref") : | |
| 200 for id in item.getAttribute("itemref").strip().split() : | |
| 201 obj = self.getElementById(id) | |
| 202 if obj != None : pending.append(obj) | |
| 203 | |
| 204 while len(pending) > 0 : | |
| 205 current = pending.pop(0) | |
| 206 if current in memory : | |
| 207 # in general this raises an error; the same item cannot be there twice. In this case this is | |
| 208 # simply ignored | |
| 209 continue | |
| 210 else : | |
| 211 # this for the check above | |
| 212 memory.append(current) | |
| 213 | |
| 214 # @itemscope is the barrier... | |
| 215 if not current.hasAttribute("itemscope") : | |
| 216 pending = [ child for child in current.childNodes if child.nodeType == child.ELEMENT_NODE ] + pending | |
| 217 | |
| 218 if current.hasAttribute("itemprop") and current.getAttribute("itemprop").strip() != "" : | |
| 219 results.append(current) | |
| 220 | |
| 221 return results | |
| 222 | |
| 223 def getElementById(self, id) : | |
| 224 """This is a method defined for DOM 2 HTML, but the HTML5 parser does not seem to define it. Oh well... | |
| 225 @param id: value of an @id attribute to look for | |
| 226 @return: array of nodes whose @id attribute matches C{id} (formally, there should be only one...) | |
| 227 """ | |
| 228 def collect_ids( node ) : | |
| 229 ids = [] | |
| 230 for child in node.childNodes : | |
| 231 if child.nodeType == node.ELEMENT_NODE : | |
| 232 ids += collect_ids( child ) | |
| 233 | |
| 234 if node.hasAttribute("id") and node.getAttribute("id") == id : | |
| 235 # This is also a top level item | |
| 236 ids.append(node) | |
| 237 | |
| 238 return ids | |
| 239 | |
| 240 ids = collect_ids(self.document) | |
| 241 if len(ids) > 0 : | |
| 242 return ids[0] | |
| 243 else : | |
| 244 return None | |
| 245 | |
| 246 class MicrodataConversion(Microdata) : | |
| 247 """ | |
| 248 Top level class encapsulating the conversion algorithms as described in the W3C note. | |
| 249 | |
| 250 @ivar graph: an RDF graph; an RDFLib Graph | |
| 251 @type graph: RDFLib Graph | |
| 252 @ivar document: top of the DOM tree, as returned by the HTML5 parser | |
| 253 @ivar ns_md: the Namespace for the microdata vocabulary | |
| 254 @ivar base: the base of the Dom tree, either set from the outside or via a @base element | |
| 255 """ | |
| 256 def __init__( self, document, graph, base = None, vocab_expansion = False, vocab_cache = True ) : | |
| 257 """ | |
| 258 @param graph: an RDF graph; an RDFLib Graph | |
| 259 @type graph: RDFLib Graph | |
| 260 @param document: top of the DOM tree, as returned by the HTML5 parser | |
| 261 @keyword base: the base of the Dom tree, either set from the outside or via a @base element | |
| 262 @keyword vocab_expansion: whether vocab expansion should be performed or not | |
| 263 @type vocab_expansion: Boolean | |
| 264 @keyword vocab_cache: if vocabulary expansion is done, then perform caching of the vocabulary data | |
| 265 @type vocab_cache: Boolean | |
| 266 """ | |
| 267 Microdata.__init__(self, document, base) | |
| 268 self.vocab_expansion = vocab_expansion | |
| 269 self.vocab_cache = vocab_cache | |
| 270 self.graph = graph | |
| 271 self.ns_md = Namespace( MD_VOCAB ) | |
| 272 self.graph.bind( "md",MD_VOCAB ) | |
| 273 self.vocabularies_used = False | |
| 274 | |
| 275 # Get the vocabularies defined in the registry bound to proper names, if any... | |
| 276 | |
| 277 def _use_rdfa_context () : | |
| 278 try : | |
| 279 from ..pyRdfa.initialcontext import initial_context | |
| 280 except : | |
| 281 from pyRdfa.initialcontext import initial_context | |
| 282 retval = {} | |
| 283 vocabs = initial_context["http://www.w3.org/2011/rdfa-context/rdfa-1.1"].ns | |
| 284 for prefix in list(vocabs.keys()) : | |
| 285 uri = vocabs[prefix] | |
| 286 if uri not in vocab_names and uri not in registry : retval[uri] = prefix | |
| 287 return retval | |
| 288 | |
| 289 for vocab in registry : | |
| 290 if vocab in vocab_names : | |
| 291 self.graph.bind( vocab_names[vocab],vocab ) | |
| 292 else : | |
| 293 hvocab = vocab + '#' | |
| 294 if hvocab in vocab_names : | |
| 295 self.graph.bind( vocab_names[hvocab],hvocab ) | |
| 296 | |
| 297 # Add the prefixes defined in the RDFa initial context to improve the outlook of the output | |
| 298 # I put this into a try: except: in case the pyRdfa package is not available... | |
| 299 try : | |
| 300 try : | |
| 301 from ..pyRdfa.initialcontext import initial_context | |
| 302 except : | |
| 303 from pyRdfa.initialcontext import initial_context | |
| 304 vocabs = initial_context["http://www.w3.org/2011/rdfa-context/rdfa-1.1"].ns | |
| 305 for prefix in list(vocabs.keys()) : | |
| 306 uri = vocabs[prefix] | |
| 307 if uri not in registry : | |
| 308 # if it is in the registry, then it may have needed some special microdata massage... | |
| 309 self.graph.bind( prefix,uri ) | |
| 310 except : | |
| 311 pass | |
| 312 | |
| 313 def convert( self ) : | |
| 314 """ | |
| 315 Top level entry to convert and generate all the triples. It finds the top level items, | |
| 316 and generates triples for each of them; additionally, it generates a top level entry point | |
| 317 to the items from base in the form of an RDF list. | |
| 318 """ | |
| 319 item_list = [] | |
| 320 for top_level_item in self.get_top_level_items() : | |
| 321 item_list.append( self.generate_triples(top_level_item, Evaluation_Context()) ) | |
| 322 list = generate_RDF_collection( self.graph, item_list ) | |
| 323 self.graph.add( (URIRef(self.base),self.ns_md["item"],list) ) | |
| 324 | |
| 325 # If the vocab expansion is also switched on, this is the time to do it. | |
| 326 | |
| 327 # This is the version with my current proposal: the basic expansion is always there; | |
| 328 # the follow-your-nose inclusion of vocabulary is optional | |
| 329 if self.vocabularies_used : | |
| 330 try : | |
| 331 try : | |
| 332 from ..pyRdfa.rdfs.process import MiniOWL, process_rdfa_sem | |
| 333 from ..pyRdfa.options import Options | |
| 334 except : | |
| 335 from pyRdfa.rdfs.process import MiniOWL, process_rdfa_sem | |
| 336 from pyRdfa.options import Options | |
| 337 # if we did not get here, the pyRdfa package could not be | |
| 338 # imported. Too bad, but life should go on in the except branch... | |
| 339 if self.vocab_expansion : | |
| 340 # This is the full deal | |
| 341 options = Options(vocab_expansion = self.vocab_expansion, vocab_cache = self.vocab_cache) | |
| 342 process_rdfa_sem(self.graph, options) | |
| 343 else : | |
| 344 MiniOWL(self.graph).closure() | |
| 345 except : | |
| 346 pass | |
| 347 | |
| 348 def generate_triples( self, item, context ) : | |
| 349 """ | |
| 350 Generate the triples for a specific item. See the W3C Note for the details. | |
| 351 | |
| 352 @param item: the DOM Node for the specific item | |
| 353 @type item: DOM Node | |
| 354 @param context: an instance of an evaluation context | |
| 355 @type context: L{Evaluation_Context} | |
| 356 @return: a URIRef or a BNode for the (RDF) subject | |
| 357 """ | |
| 358 # Step 1,2: if the subject has to be set, store it in memory | |
| 359 subject = context.get_memory( item ) | |
| 360 if subject == None : | |
| 361 # nop, there is no subject set. If there is a valid @itemid, that carries it | |
| 362 if item.hasAttribute("itemid") and is_absolute_URI( item.getAttribute("itemid") ): | |
| 363 subject = URIRef( item.getAttribute("itemid").strip() ) | |
| 364 else : | |
| 365 subject = BNode() | |
| 366 context.set_memory( item, subject ) | |
| 367 | |
| 368 # Step 3: set the type triples if any | |
| 369 types = [] | |
| 370 if item.hasAttribute("itemtype") : | |
| 371 types = item.getAttribute("itemtype").strip().split() | |
| 372 for t in types : | |
| 373 if is_absolute_URI( t ) : | |
| 374 self.graph.add( (subject, ns_rdf["type"], URIRef(t)) ) | |
| 375 | |
| 376 # Step 4, 5 and 6 to set the typing variable | |
| 377 if len(types) == 0 : | |
| 378 itype = None | |
| 379 else : | |
| 380 if is_absolute_URI(types[0]) : | |
| 381 itype = types[0] | |
| 382 context.current_name = None | |
| 383 elif context.current_type != None : | |
| 384 itype = context.current_type | |
| 385 else : | |
| 386 itype = None | |
| 387 | |
| 388 # Step 7, 8, 9: Check the registry for possible keys and set the vocab | |
| 389 vocab = None | |
| 390 if itype != None : | |
| 391 for key in list(registry.keys()) : | |
| 392 if itype.startswith(key) : | |
| 393 # There is a predefined vocabulary for this type... | |
| 394 vocab = key | |
| 395 # Step 7: Issue an rdfa usesVocabulary triple | |
| 396 self.graph.add( (URIRef(self.base), RDFA_VOCAB, URIRef(vocab))) | |
| 397 self.vocabularies_used = True | |
| 398 break | |
| 399 # The registry has not set the vocabulary; has to be extracted from the type | |
| 400 if vocab == None : | |
| 401 parsed = urlsplit(itype) | |
| 402 if parsed.fragment != "" : | |
| 403 vocab = urlunsplit( (parsed.scheme,parsed.netloc,parsed.path,parsed.query,"") ) + '#' | |
| 404 elif parsed.path == "" and parsed.query == "" : | |
| 405 vocab = itype | |
| 406 if vocab[-1] != '/' : vocab += '/' | |
| 407 else : | |
| 408 vocab = itype.rsplit('/',1)[0] + '/' | |
| 409 | |
| 410 # Step 9: update vocab in the context | |
| 411 if vocab != None : | |
| 412 context.current_vocabulary = vocab | |
| 413 elif item.hasAttribute("itemtype") : | |
| 414 context.current_vocabulary = None | |
| 415 | |
| 416 # Step 10: set up a property list; this will be used to generate triples later. | |
| 417 # each entry in the dictionary is an array of RDF objects | |
| 418 property_list = {} | |
| 419 | |
| 420 # Step 11: Get the item properties and run a cycle on those | |
| 421 for prop in self.get_item_properties(item) : | |
| 422 for name in prop.getAttribute("itemprop").strip().split() : | |
| 423 # 11.1.1. set a new context | |
| 424 new_context = context.new_copy(itype) | |
| 425 # 11.1.2, generate the URI for the property name, that will be the predicate | |
| 426 # Also update the context | |
| 427 new_context.current_name = predicate = self.generate_predicate_URI( name,new_context ) | |
| 428 # 11.1.3, generate the property value. The extra flag signals that the value is a new item | |
| 429 # Note that 10.1.4 step is done in the method itself, ie, a recursion may occur there | |
| 430 # if a new item is hit (in which case the return value is a RDF resource chaining to a subject) | |
| 431 value = self.get_property_value( prop, new_context ) | |
| 432 # 11.1.5, store all the values | |
| 433 if predicate in property_list : | |
| 434 property_list[predicate].append(value) | |
| 435 else : | |
| 436 property_list[predicate] = [ value ] | |
| 437 | |
| 438 # step 12: generate the triples | |
| 439 for property in list(property_list.keys()) : | |
| 440 self.generate_property_values( subject, URIRef(property), property_list[property], context ) | |
| 441 | |
| 442 # Step 13: return the subject to the caller | |
| 443 return subject | |
| 444 | |
| 445 def generate_predicate_URI( self, name, context ) : | |
| 446 """ | |
| 447 Generate a full URI for a predicate, using the type, the vocabulary, etc. | |
| 448 | |
| 449 For details of this entry, see Section 4.4 | |
| 450 @param name: name of the property, ie, what appears in @itemprop | |
| 451 @param context: an instance of an evaluation context | |
| 452 @type context: L{Evaluation_Context} | |
| 453 """ | |
| 454 if debug: print(( "name: %s, %s" % (name,context) )) | |
| 455 | |
| 456 # Step 1: absolute URI-s are fine, take them as they are | |
| 457 if is_absolute_URI(name) : return name | |
| 458 | |
| 459 # Step 2: if type is none, that this is just used as a fragment | |
| 460 # if not context.current_type : | |
| 461 if context.current_type == None and context.current_vocabulary == None : | |
| 462 if self.base[-1] == '#' : | |
| 463 b = self.base[:-1] | |
| 464 else : | |
| 465 b = self.base | |
| 466 return b + '#' + fragment_escape(name) | |
| 467 | |
| 468 #if context.current_type == None : | |
| 469 # return generate_URI( self.base, name ) | |
| 470 | |
| 471 # Step 3: set the scheme | |
| 472 try : | |
| 473 if context.current_vocabulary in registry and "propertyURI" in registry[context.current_vocabulary] : | |
| 474 scheme = registry[context.current_vocabulary]["propertyURI"] | |
| 475 else : | |
| 476 scheme = PropertySchemes.vocabulary | |
| 477 except : | |
| 478 # This is when the structure of the registry is broken | |
| 479 scheme = PropertySchemes.vocabulary | |
| 480 | |
| 481 name = fragment_escape( name ) | |
| 482 if scheme == PropertySchemes.contextual : | |
| 483 # Step 5.1 | |
| 484 s = context.current_name | |
| 485 # s = context.current_type | |
| 486 if s != None and s.startswith("http://www.w3.org/ns/md?type=") : | |
| 487 # Step 5.2 | |
| 488 expandedURI = s + '.' + name | |
| 489 else : | |
| 490 # Step 5.3 | |
| 491 expandedURI = "http://www.w3.org/ns/md?type=" + fragment_escape(context.current_type) + "&prop=" + name | |
| 492 else : | |
| 493 # Step 4 | |
| 494 if context.current_vocabulary[-1] == '#' or context.current_vocabulary[-1] == '/' : | |
| 495 expandedURI = context.current_vocabulary + name | |
| 496 else : | |
| 497 expandedURI = context.current_vocabulary + '#' + name | |
| 498 | |
| 499 # see if there are subproperty/equivalentproperty relations | |
| 500 try : | |
| 501 vocab_mapping = registry[context.current_vocabulary]["properties"][name] | |
| 502 # if we got that far, we may have some mappings | |
| 503 | |
| 504 expandedURIRef = URIRef(expandedURI) | |
| 505 try : | |
| 506 subpr = vocab_mapping["subPropertyOf"] | |
| 507 if subpr != None : | |
| 508 if isinstance(subpr,list) : | |
| 509 for p in subpr : | |
| 510 self.graph.add( (expandedURIRef, ns_rdfs["subPropertyOf"], URIRef(p)) ) | |
| 511 else : | |
| 512 self.graph.add( (expandedURIRef, ns_rdfs["subPropertyOf"], URIRef(subpr)) ) | |
| 513 except : | |
| 514 # Ok, no sub property | |
| 515 pass | |
| 516 try : | |
| 517 subpr = vocab_mapping["equivalentProperty"] | |
| 518 if subpr != None : | |
| 519 if isinstance(subpr,list) : | |
| 520 for p in subpr : | |
| 521 self.graph.add( (expandedURIRef, ns_owl["equivalentProperty"], URIRef(p)) ) | |
| 522 else : | |
| 523 self.graph.add( (expandedURIRef, ns_owl["equivalentProperty"], URIRef(subpr)) ) | |
| 524 except : | |
| 525 # Ok, no sub property | |
| 526 pass | |
| 527 except : | |
| 528 # no harm done, no extra vocabulary term | |
| 529 pass | |
| 530 | |
| 531 | |
| 532 return expandedURI | |
| 533 | |
| 534 def get_property_value(self, node, context) : | |
| 535 """ | |
| 536 Generate an RDF object, ie, the value of a property. Note that if this element contains | |
| 537 an @itemscope, then a recursive call to L{MicrodataConversion.generate_triples} is done and the | |
| 538 return value of that method (ie, the subject for the corresponding item) is return as an | |
| 539 object. | |
| 540 | |
| 541 Otherwise, either URIRefs are created for <a>, <img>, etc, elements, or a Literal; the latter | |
| 542 gets a time-related type for the <time> element. | |
| 543 | |
| 544 @param node: the DOM Node for which the property values should be generated | |
| 545 @type node: DOM Node | |
| 546 @param context: an instance of an evaluation context | |
| 547 @type context: L{Evaluation_Context} | |
| 548 @return: an RDF resource (URIRef, BNode, or Literal) | |
| 549 """ | |
| 550 URI_attrs = { | |
| 551 "audio" : "src", | |
| 552 "embed" : "src", | |
| 553 "iframe" : "src", | |
| 554 "img" : "src", | |
| 555 "source" : "src", | |
| 556 "track" : "src", | |
| 557 "video" : "src", | |
| 558 "data" : "src", | |
| 559 "a" : "href", | |
| 560 "area" : "href", | |
| 561 "link" : "href", | |
| 562 "object" : "data" | |
| 563 } | |
| 564 lang = get_lang_from_hierarchy( self.document, node ) | |
| 565 | |
| 566 if node.hasAttribute("itemscope") : | |
| 567 # THIS IS A RECURSION ENTRY POINT! | |
| 568 return self.generate_triples( node, context ) | |
| 569 | |
| 570 elif node.tagName in URI_attrs and node.hasAttribute(URI_attrs[node.tagName]) : | |
| 571 return URIRef( generate_URI( self.base, node.getAttribute(URI_attrs[node.tagName]).strip() ) ) | |
| 572 | |
| 573 elif node.tagName == "meta" and node.hasAttribute("content") : | |
| 574 if lang : | |
| 575 return Literal( node.getAttribute("content"), lang = lang ) | |
| 576 else : | |
| 577 return Literal( node.getAttribute("content") ) | |
| 578 | |
| 579 elif node.tagName == "meter" or node.tagName == "data" : | |
| 580 if node.hasAttribute("value") : | |
| 581 val = node.getAttribute("value") | |
| 582 # check whether the attribute value can be defined as a float or an integer | |
| 583 try : | |
| 584 fval = int(val) | |
| 585 dt = ns_xsd["integer"] | |
| 586 except : | |
| 587 # Well, not an int, try then a integer | |
| 588 try : | |
| 589 fval = float(val) | |
| 590 dt = ns_xsd["float"] | |
| 591 except : | |
| 592 # Sigh, this is not a valid value, but let it go through as a plain literal nevertheless | |
| 593 fval = val | |
| 594 dt = None | |
| 595 if dt : | |
| 596 return Literal( val, datatype = dt) | |
| 597 else : | |
| 598 return Literal( val ) | |
| 599 else : | |
| 600 return Literal( "" ) | |
| 601 | |
| 602 elif node.tagName == "time" and node.hasAttribute("datetime") : | |
| 603 litval = node.getAttribute("datetime") | |
| 604 dtype = get_time_type(litval) | |
| 605 if dtype : | |
| 606 return Literal( litval, datatype = dtype ) | |
| 607 else : | |
| 608 return Literal( litval ) | |
| 609 | |
| 610 else : | |
| 611 if lang : | |
| 612 return Literal( get_Literal(node), lang = lang ) | |
| 613 else : | |
| 614 return Literal( get_Literal(node) ) | |
| 615 | |
| 616 def generate_property_values( self, subject, predicate, objects, context) : | |
| 617 """ | |
| 618 Generate the property values for a specific subject and predicate. The context should specify whether | |
| 619 the objects should be added in an RDF list or each triples individually. | |
| 620 | |
| 621 @param subject: RDF subject | |
| 622 @type subject: RDFLib Node (URIRef or blank node) | |
| 623 @param predicate: RDF predicate | |
| 624 @type predicate: RDFLib URIRef | |
| 625 @param objects: RDF objects | |
| 626 @type objects: list of RDFLib nodes (URIRefs, Blank Nodes, or literals) | |
| 627 @param context: evaluation context | |
| 628 @type context: L{Evaluation_Context} | |
| 629 """ | |
| 630 # generate triples with a list, or a bunch of triples, depending on the context | |
| 631 # The biggest complication is to find the method... | |
| 632 method = ValueMethod.unordered | |
| 633 superproperties = None | |
| 634 | |
| 635 # This is necessary because predicate is a URIRef, and I am not sure the comparisons would work well | |
| 636 # to be tested, in fact... | |
| 637 pred_key = "%s" % predicate | |
| 638 for key in registry : | |
| 639 if predicate.startswith(key) : | |
| 640 # This the part of the registry corresponding to the predicate's vocabulary | |
| 641 registry_object = registry[key] | |
| 642 try : | |
| 643 if "multipleValues" in registry_object : method = registry_object["multipleValues"] | |
| 644 # The generic definition can be overwritten for a specific property. The simplest is to rely on a 'try' | |
| 645 # with the right structure... | |
| 646 try : | |
| 647 method = registry_object["properties"][pred_key[len(key):]]["multipleValues"] | |
| 648 except : | |
| 649 pass | |
| 650 except : | |
| 651 pass | |
| 652 | |
| 653 if method == ValueMethod.unordered : | |
| 654 for object in objects : | |
| 655 self.graph.add( (subject, predicate, object) ) | |
| 656 else : | |
| 657 self.graph.add( (subject,predicate,generate_RDF_collection( self.graph, objects )) ) | |
| 658 | |
| 659 | |
| 660 | |
| 661 | |
| 662 | |
| 663 |
