Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/rdflib/plugins/parsers/hturtle.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 """ | |
| 3 Extraction parser RDF embedded verbatim into HTML or XML files. This is based | |
| 4 on: | |
| 5 | |
| 6 * The specification on embedding turtle into html: | |
| 7 http://www.w3.org/TR/turtle/#in-html | |
| 8 | |
| 9 For SVG (and currently SVG only) the method also extracts an embedded RDF/XML | |
| 10 data, per SVG specification | |
| 11 | |
| 12 License: W3C Software License, | |
| 13 http://www.w3.org/Consortium/Legal/copyright-software | |
| 14 Author: Ivan Herman | |
| 15 Copyright: W3C | |
| 16 """ | |
| 17 | |
| 18 from rdflib.parser import Parser | |
| 19 from .pyRdfa import pyRdfa, Options | |
| 20 from .pyRdfa.state import ExecutionContext | |
| 21 from .pyRdfa.embeddedRDF import handle_embeddedRDF | |
| 22 from .structureddata import _get_orig_source, _check_error | |
| 23 | |
| 24 try: | |
| 25 import html5lib | |
| 26 assert html5lib | |
| 27 html5lib = True | |
| 28 except ImportError: | |
| 29 import warnings | |
| 30 warnings.warn( | |
| 31 'html5lib not found! RDFa and Microdata parsers ' + | |
| 32 'will not be available.') | |
| 33 html5lib = False | |
| 34 | |
| 35 | |
| 36 class HTurtle(pyRdfa): | |
| 37 """ | |
| 38 Bastardizing the RDFa 1.1 parser to do a hturtle extractions | |
| 39 """ | |
| 40 def __init__(self, options=None, base="", media_type=""): | |
| 41 pyRdfa.__init__(self, options=options, base=base, | |
| 42 media_type=media_type, rdfa_version="1.1") | |
| 43 | |
| 44 def graph_from_DOM(self, dom, graph, pgraph=None): | |
| 45 """ | |
| 46 Stealing the parsing function from the original class, to do | |
| 47 turtle extraction only | |
| 48 """ | |
| 49 | |
| 50 def copyGraph(tog, fromg): | |
| 51 for t in fromg: | |
| 52 tog.add(t) | |
| 53 for k, ns in fromg.namespaces(): | |
| 54 tog.bind(k, ns) | |
| 55 | |
| 56 def _process_one_node(node, graph, state): | |
| 57 if handle_embeddedRDF(node, graph, state): | |
| 58 # we got an RDF content that has been extracted into Graph; | |
| 59 # the recursion should stop | |
| 60 return | |
| 61 else: | |
| 62 # recurse through all the child elements of the current node | |
| 63 for n in node.childNodes: | |
| 64 if n.nodeType == node.ELEMENT_NODE: | |
| 65 _process_one_node(n, graph, state) | |
| 66 | |
| 67 topElement = dom.documentElement | |
| 68 state = ExecutionContext(topElement, graph, base=self.base, | |
| 69 options=self.options, rdfa_version="1.1") | |
| 70 _process_one_node(topElement, graph, state) | |
| 71 if pgraph is not None: | |
| 72 copyGraph(pgraph, self.options.processor_graph.graph) | |
| 73 | |
| 74 # This is the parser interface as it would look when called from the rest of | |
| 75 # RDFLib | |
| 76 | |
| 77 | |
| 78 class HTurtleParser(Parser): | |
| 79 def parse(self, source, graph, pgraph=None, media_type=""): | |
| 80 """ | |
| 81 @param source: one of the input sources that the RDFLib package defined | |
| 82 @type source: InputSource class instance | |
| 83 @param graph: target graph for the triples; output graph, in RDFa spec. | |
| 84 parlance | |
| 85 @type graph: RDFLib Graph | |
| 86 @keyword media_type: explicit setting of the preferred media type | |
| 87 (a.k.a. content type) of the the RDFa source. None means the content | |
| 88 type of the HTTP result is used, or a guess is made based on the | |
| 89 suffix of a file | |
| 90 @type media_type: string | |
| 91 """ | |
| 92 if html5lib is False: | |
| 93 raise ImportError( | |
| 94 'html5lib is not installed, cannot ' + | |
| 95 'use RDFa and Microdata parsers.') | |
| 96 | |
| 97 (baseURI, orig_source) = _get_orig_source(source) | |
| 98 self._process( | |
| 99 graph, pgraph, baseURI, orig_source, media_type=media_type) | |
| 100 | |
| 101 def _process(self, graph, baseURI, orig_source, media_type=""): | |
| 102 self.options = Options(output_processor_graph=None, | |
| 103 embedded_rdf=True, | |
| 104 vocab_expansion=False, | |
| 105 vocab_cache=False) | |
| 106 | |
| 107 if media_type is None: | |
| 108 media_type = "" | |
| 109 processor = HTurtle( | |
| 110 self.options, base=baseURI, media_type=media_type) | |
| 111 processor.graph_from_source( | |
| 112 orig_source, graph=graph, pgraph=None, rdfOutput=False) | |
| 113 # get possible error triples to raise exceptions | |
| 114 _check_error(graph) |
