Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/pyRdfa/utils.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d30785e31577 | 1:56ad4e20f292 |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 """ | |
| 3 Various utilities for pyRdfa. | |
| 4 | |
| 5 Most of the utilities are straightforward. | |
| 6 | |
| 7 @organization: U{World Wide Web Consortium<http://www.w3.org>} | |
| 8 @author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} | |
| 9 @license: This software is available for use under the | |
| 10 U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} | |
| 11 | |
| 12 | |
| 13 """ | |
| 14 | |
| 15 """ | |
| 16 $Id: utils.py,v 1.9 2012/11/16 17:51:53 ivan Exp $ | |
| 17 $Date: 2012/11/16 17:51:53 $ | |
| 18 """ | |
| 19 import os, os.path, sys, imp, datetime | |
| 20 | |
| 21 # Python 3 vs. 2 switch | |
| 22 if sys.version_info[0] >= 3 : | |
| 23 from urllib.request import Request, urlopen | |
| 24 from urllib.parse import urljoin, quote | |
| 25 from http.server import BaseHTTPRequestHandler | |
| 26 from urllib.error import HTTPError as urllib_HTTPError | |
| 27 else : | |
| 28 from urllib.request import Request, urlopen | |
| 29 from urllib.error import HTTPError as urllib_HTTPError | |
| 30 from urllib.parse import urljoin | |
| 31 from urllib.parse import quote | |
| 32 from http.server import BaseHTTPRequestHandler | |
| 33 | |
| 34 from .extras.httpheader import content_type, parse_http_datetime | |
| 35 | |
| 36 import rdflib | |
| 37 if rdflib.__version__ >= "3.0.0" : | |
| 38 from rdflib import RDF as ns_rdf | |
| 39 else : | |
| 40 from rdflib.RDF import RDFNS as ns_rdf | |
| 41 | |
| 42 from .host import HostLanguage, preferred_suffixes | |
| 43 | |
| 44 ######################################################################################################### | |
| 45 # Handling URIs | |
| 46 class URIOpener : | |
| 47 """A wrapper around the urllib2 method to open a resource. Beyond accessing the data itself, the class | |
| 48 sets a number of instance variable that might be relevant for processing. | |
| 49 The class also adds an accept header to the outgoing request, namely | |
| 50 text/html and application/xhtml+xml (unless set explicitly by the caller). | |
| 51 | |
| 52 If the content type is set by the server, the relevant HTTP response field is used. Otherwise, | |
| 53 common suffixes are used (see L{host.preferred_suffixes}) to set the content type (this is really of importance | |
| 54 for C{file:///} URI-s). If none of these works, the content type is empty. | |
| 55 | |
| 56 Interpretation of the content type for the return is done by Deron Meranda's U{httpheader module<http://deron.meranda.us/>}. | |
| 57 | |
| 58 @ivar data: the real data, ie, a file-like object | |
| 59 @ivar headers: the return headers as sent back by the server | |
| 60 @ivar content_type: the content type of the resource or the empty string, if the content type cannot be determined | |
| 61 @ivar location: the real location of the data (ie, after possible redirection and content negotiation) | |
| 62 @ivar last_modified_date: sets the last modified date if set in the header, None otherwise | |
| 63 @ivar expiration_date: sets the expiration date if set in the header, I{current UTC plus one day} otherwise (this is used for caching purposes, hence this artificial setting) | |
| 64 """ | |
| 65 CONTENT_LOCATION = 'Content-Location' | |
| 66 CONTENT_TYPE = 'Content-Type' | |
| 67 LAST_MODIFIED = 'Last-Modified' | |
| 68 EXPIRES = 'Expires' | |
| 69 def __init__(self, name, additional_headers = {}) : | |
| 70 """ | |
| 71 @param name: URL to be opened | |
| 72 @keyword additional_headers: additional HTTP request headers to be added to the call | |
| 73 """ | |
| 74 try : | |
| 75 # Note the removal of the fragment ID. This is necessary, per the HTTP spec | |
| 76 req = Request(url=name.split('#')[0]) | |
| 77 | |
| 78 for key in additional_headers : | |
| 79 req.add_header(key, additional_headers[key]) | |
| 80 if 'Accept' not in additional_headers : | |
| 81 req.add_header('Accept', 'text/html, application/xhtml+xml') | |
| 82 | |
| 83 self.data = urlopen(req) | |
| 84 self.headers = self.data.info() | |
| 85 | |
| 86 if URIOpener.CONTENT_TYPE in self.headers : | |
| 87 # The call below will remove the possible media type parameters, like charset settings | |
| 88 ct = content_type(self.headers[URIOpener.CONTENT_TYPE]) | |
| 89 self.content_type = ct.media_type | |
| 90 if 'charset' in ct.parmdict : | |
| 91 self.charset = ct.parmdict['charset'] | |
| 92 else : | |
| 93 self.charset = None | |
| 94 # print | |
| 95 else : | |
| 96 # check if the suffix can be used for the content type; this may be important | |
| 97 # for file:// type URI or if the server is not properly set up to return the right | |
| 98 # mime type | |
| 99 self.charset = None | |
| 100 self.content_type = "" | |
| 101 for suffix in list(preferred_suffixes.keys()) : | |
| 102 if name.endswith(suffix) : | |
| 103 self.content_type = preferred_suffixes[suffix] | |
| 104 break | |
| 105 | |
| 106 if URIOpener.CONTENT_LOCATION in self.headers : | |
| 107 self.location = urljoin(self.data.geturl(),self.headers[URIOpener.CONTENT_LOCATION]) | |
| 108 else : | |
| 109 self.location = name | |
| 110 | |
| 111 self.expiration_date = datetime.datetime.utcnow() + datetime.timedelta(days=1) | |
| 112 if URIOpener.EXPIRES in self.headers : | |
| 113 try : | |
| 114 # Thanks to Deron Meranda for the HTTP date conversion method... | |
| 115 self.expiration_date = parse_http_datetime(self.headers[URIOpener.EXPIRES]) | |
| 116 except : | |
| 117 # The Expires date format was wrong, sorry, forget it... | |
| 118 pass | |
| 119 | |
| 120 self.last_modified_date = None | |
| 121 if URIOpener.LAST_MODIFIED in self.headers : | |
| 122 try : | |
| 123 # Thanks to Deron Meranda for the HTTP date conversion method... | |
| 124 self.last_modified_date = parse_http_datetime(self.headers[URIOpener.LAST_MODIFIED]) | |
| 125 except : | |
| 126 # The last modified date format was wrong, sorry, forget it... | |
| 127 pass | |
| 128 | |
| 129 except urllib_HTTPError : | |
| 130 e = sys.exc_info()[1] | |
| 131 from . import HTTPError | |
| 132 msg = BaseHTTPRequestHandler.responses[e.code] | |
| 133 raise HTTPError('%s' % msg[1], e.code) | |
| 134 except Exception : | |
| 135 e = sys.exc_info()[1] | |
| 136 from . import RDFaError | |
| 137 raise RDFaError('%s' % e) | |
| 138 | |
| 139 ######################################################################################################### | |
| 140 | |
| 141 # 'safe' characters for the URI quoting, ie, characters that can safely stay as they are. Other | |
| 142 # special characters are converted to their %.. equivalents for namespace prefixes | |
| 143 _unquotedChars = ':/\?=#~' | |
| 144 _warnChars = [' ','\n','\r','\t'] | |
| 145 | |
| 146 def quote_URI(uri, options = None) : | |
| 147 """ | |
| 148 'quote' a URI, ie, exchange special characters for their '%..' equivalents. Some of the characters | |
| 149 may stay as they are (listed in L{_unquotedChars}. If one of the characters listed in L{_warnChars} | |
| 150 is also in the uri, an extra warning is also generated. | |
| 151 @param uri: URI | |
| 152 @param options: | |
| 153 @type options: L{Options<pyRdfa.Options>} | |
| 154 """ | |
| 155 from . import err_unusual_char_in_URI | |
| 156 suri = uri.strip() | |
| 157 for c in _warnChars : | |
| 158 if suri.find(c) != -1 : | |
| 159 if options != None : | |
| 160 options.add_warning(err_unusual_char_in_URI % suri) | |
| 161 break | |
| 162 return quote(suri, _unquotedChars) | |
| 163 | |
| 164 ######################################################################################################### | |
| 165 | |
| 166 def create_file_name(uri) : | |
| 167 """ | |
| 168 Create a suitable file name from an (absolute) URI. Used, eg, for the generation of a file name for a cached vocabulary file. | |
| 169 """ | |
| 170 suri = uri.strip() | |
| 171 final_uri = quote(suri,_unquotedChars) | |
| 172 # Remove some potentially dangereous characters | |
| 173 return final_uri.replace(' ','_').replace('%','_').replace('-','_').replace('+','_').replace('/','_').replace('?','_').replace(':','_').replace('=','_').replace('#','_') | |
| 174 | |
| 175 ######################################################################################################### | |
| 176 def has_one_of_attributes(node,*args) : | |
| 177 """ | |
| 178 Check whether one of the listed attributes is present on a (DOM) node. | |
| 179 @param node: DOM element node | |
| 180 @param args: possible attribute names | |
| 181 @return: True or False | |
| 182 @rtype: Boolean | |
| 183 """ | |
| 184 if len(args) == 0 : | |
| 185 return None | |
| 186 if isinstance(args[0], tuple) or isinstance(args[0], list) : | |
| 187 rargs = args[0] | |
| 188 else : | |
| 189 rargs = args | |
| 190 | |
| 191 return True in [ node.hasAttribute(attr) for attr in rargs ] | |
| 192 | |
| 193 ######################################################################################################### | |
| 194 def traverse_tree(node, func) : | |
| 195 """Traverse the whole element tree, and perform the function C{func} on all the elements. | |
| 196 @param node: DOM element node | |
| 197 @param func: function to be called on the node. Input parameter is a DOM Element Node. If the function returns a boolean True, the recursion is stopped. | |
| 198 """ | |
| 199 if func(node) : | |
| 200 return | |
| 201 | |
| 202 for n in node.childNodes : | |
| 203 if n.nodeType == node.ELEMENT_NODE : | |
| 204 traverse_tree(n, func) | |
| 205 | |
| 206 ######################################################################################################### | |
| 207 def return_XML(state, inode, base = True, xmlns = True) : | |
| 208 """ | |
| 209 Get (recursively) the XML Literal content of a DOM Element Node. (Most of the processing is done | |
| 210 via a C{node.toxml} call of the xml minidom implementation.) | |
| 211 | |
| 212 @param inode: DOM Node | |
| 213 @param state: L{pyRdfa.state.ExecutionContext} | |
| 214 @param base: whether the base element should be added to the output | |
| 215 @type base: Boolean | |
| 216 @param xmlns: whether the namespace declarations should be repeated in the generated node | |
| 217 @type xmlns: Boolean | |
| 218 @return: string | |
| 219 """ | |
| 220 node = inode.cloneNode(True) | |
| 221 # Decorate the element with namespaces.lang values and, optionally, base | |
| 222 if base : | |
| 223 node.setAttribute("xml:base",state.base) | |
| 224 if xmlns : | |
| 225 for prefix in state.term_or_curie.xmlns : | |
| 226 if not node.hasAttribute("xmlns:%s" % prefix) : | |
| 227 node.setAttribute("xmlns:%s" % prefix,"%s" % state.term_or_curie.xmlns[prefix]) | |
| 228 # Set the default namespace, if not done (and is available) | |
| 229 if not node.getAttribute("xmlns") and state.defaultNS != None : | |
| 230 node.setAttribute("xmlns", state.defaultNS) | |
| 231 # Get the lang, if necessary | |
| 232 if state.lang : | |
| 233 if state.options.host_language in [ HostLanguage.xhtml, HostLanguage.xhtml5, HostLanguage.html5 ] : | |
| 234 if not node.getAttribute("lang") : | |
| 235 node.setAttribute("lang", state.lang) | |
| 236 else : | |
| 237 if not node.getAttribute("xml:lang") : | |
| 238 node.setAttribute("xml:lang", state.lang) | |
| 239 | |
| 240 if sys.version_info[0] >= 3 : | |
| 241 return node.toxml() | |
| 242 else : | |
| 243 q = node.toxml(encoding='utf-8') | |
| 244 return str(q, encoding='utf-8') | |
| 245 | |
| 246 ######################################################################################################### | |
| 247 | |
| 248 def dump(node) : | |
| 249 """ | |
| 250 This is just for debug purposes: it prints the essential content of the node in the tree starting at node. | |
| 251 | |
| 252 @param node: DOM node | |
| 253 """ | |
| 254 print(( node.toprettyxml(indent="", newl="") )) | |
| 255 | |
| 256 | |
| 257 |
