Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/parser.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler | 
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 0:d30785e31577 | 1:56ad4e20f292 | 
|---|---|
| 1 """ | |
| 2 Parser plugin interface. | |
| 3 | |
| 4 This module defines the parser plugin interface and contains other | |
| 5 related parser support code. | |
| 6 | |
| 7 The module is mainly useful for those wanting to write a parser that | |
| 8 can plugin to rdflib. If you are wanting to invoke a parser you likely | |
| 9 want to do so through the Graph class parse method. | |
| 10 | |
| 11 """ | |
| 12 | |
| 13 import os | |
| 14 import sys | |
| 15 from urllib.request import pathname2url, url2pathname | |
| 16 from urllib.request import urlopen, Request | |
| 17 from urllib.parse import urljoin | |
| 18 from rdflib.py3compat import PY3 | |
| 19 if PY3: | |
| 20 from io import BytesIO | |
| 21 assert BytesIO | |
| 22 else: | |
| 23 from io import StringIO as BytesIO | |
| 24 from xml.sax import xmlreader | |
| 25 | |
| 26 from rdflib import __version__ | |
| 27 from rdflib.term import URIRef | |
| 28 from rdflib.namespace import Namespace | |
| 29 | |
| 30 __all__ = [ | |
| 31 'Parser', 'InputSource', 'StringInputSource', | |
| 32 'URLInputSource', 'FileInputSource'] | |
| 33 | |
| 34 | |
| 35 class Parser(object): | |
| 36 | |
| 37 def __init__(self): | |
| 38 pass | |
| 39 | |
| 40 def parse(self, source, sink): | |
| 41 pass | |
| 42 | |
| 43 | |
| 44 class InputSource(xmlreader.InputSource, object): | |
| 45 """ | |
| 46 TODO: | |
| 47 """ | |
| 48 | |
| 49 def __init__(self, system_id=None): | |
| 50 xmlreader.InputSource.__init__(self, system_id=system_id) | |
| 51 self.content_type = None | |
| 52 self.auto_close = False # see Graph.parse(), true if opened by us | |
| 53 | |
| 54 def close(self): | |
| 55 f = self.getByteStream() | |
| 56 if f and hasattr(f, 'close'): | |
| 57 f.close() | |
| 58 | |
| 59 | |
| 60 class StringInputSource(InputSource): | |
| 61 """ | |
| 62 TODO: | |
| 63 """ | |
| 64 | |
| 65 def __init__(self, value, system_id=None): | |
| 66 super(StringInputSource, self).__init__(system_id) | |
| 67 stream = BytesIO(value) | |
| 68 self.setByteStream(stream) | |
| 69 # TODO: | |
| 70 # encoding = value.encoding | |
| 71 # self.setEncoding(encoding) | |
| 72 | |
| 73 | |
| 74 headers = { | |
| 75 'User-agent': | |
| 76 'rdflib-%s (http://rdflib.net/; eikeon@eikeon.com)' % __version__ | |
| 77 } | |
| 78 | |
| 79 | |
| 80 class URLInputSource(InputSource): | |
| 81 """ | |
| 82 TODO: | |
| 83 """ | |
| 84 | |
| 85 def __init__(self, system_id=None, format=None): | |
| 86 super(URLInputSource, self).__init__(system_id) | |
| 87 self.url = system_id | |
| 88 | |
| 89 # copy headers to change | |
| 90 myheaders = dict(headers) | |
| 91 if format == 'application/rdf+xml': | |
| 92 myheaders['Accept'] = 'application/rdf+xml, */*;q=0.1' | |
| 93 elif format == 'n3': | |
| 94 myheaders['Accept'] = 'text/n3, */*;q=0.1' | |
| 95 elif format == 'nt': | |
| 96 myheaders['Accept'] = 'text/plain, */*;q=0.1' | |
| 97 elif format == 'json-ld': | |
| 98 myheaders['Accept'] = ( | |
| 99 'application/ld+json, application/json;p=0.9, */*;q=0.1') | |
| 100 else: | |
| 101 myheaders['Accept'] = ( | |
| 102 'application/rdf+xml,text/rdf+n3;q=0.9,' + | |
| 103 'application/xhtml+xml;q=0.5, */*;q=0.1') | |
| 104 | |
| 105 req = Request(system_id, None, myheaders) | |
| 106 file = urlopen(req) | |
| 107 # Fix for issue 130 https://github.com/RDFLib/rdflib/issues/130 | |
| 108 self.url = file.geturl() # in case redirections took place | |
| 109 self.setPublicId(self.url) | |
| 110 self.content_type = file.info().get('content-type') | |
| 111 if self.content_type is not None: | |
| 112 self.content_type = self.content_type.split(";", 1)[0] | |
| 113 self.setByteStream(file) | |
| 114 # TODO: self.setEncoding(encoding) | |
| 115 self.response_info = file.info() # a mimetools.Message instance | |
| 116 | |
| 117 def __repr__(self): | |
| 118 return self.url | |
| 119 | |
| 120 | |
| 121 class FileInputSource(InputSource): | |
| 122 | |
| 123 def __init__(self, file): | |
| 124 base = urljoin("file:", pathname2url(os.getcwd())) | |
| 125 system_id = URIRef(urljoin("file:", pathname2url(file.name)), base=base) | |
| 126 super(FileInputSource, self).__init__(system_id) | |
| 127 self.file = file | |
| 128 self.setByteStream(file) | |
| 129 # TODO: self.setEncoding(encoding) | |
| 130 | |
| 131 def __repr__(self): | |
| 132 return repr(self.file) | |
| 133 | |
| 134 | |
| 135 def create_input_source(source=None, publicID=None, | |
| 136 location=None, file=None, data=None, format=None): | |
| 137 """ | |
| 138 Return an appropriate InputSource instance for the given | |
| 139 parameters. | |
| 140 """ | |
| 141 | |
| 142 # test that exactly one of source, location, file, and data is not None. | |
| 143 if sum(( | |
| 144 source is not None, | |
| 145 location is not None, | |
| 146 file is not None, | |
| 147 data is not None, | |
| 148 )) != 1: | |
| 149 raise ValueError( | |
| 150 'exactly one of source, location, file or data must be given' | |
| 151 ) | |
| 152 | |
| 153 input_source = None | |
| 154 | |
| 155 if source is not None: | |
| 156 if isinstance(source, InputSource): | |
| 157 input_source = source | |
| 158 else: | |
| 159 if isinstance(source, str): | |
| 160 location = source | |
| 161 elif hasattr(source, "read") and not isinstance(source, Namespace): | |
| 162 f = source | |
| 163 input_source = InputSource() | |
| 164 input_source.setByteStream(f) | |
| 165 if f is sys.stdin: | |
| 166 input_source.setSystemId("file:///dev/stdin") | |
| 167 elif hasattr(f, "name"): | |
| 168 input_source.setSystemId(f.name) | |
| 169 else: | |
| 170 raise Exception("Unexpected type '%s' for source '%s'" % | |
| 171 (type(source), source)) | |
| 172 | |
| 173 absolute_location = None # Further to fix for issue 130 | |
| 174 | |
| 175 auto_close = False # make sure we close all file handles we open | |
| 176 if location is not None: | |
| 177 # Fix for Windows problem https://github.com/RDFLib/rdflib/issues/145 | |
| 178 if os.path.exists(location): | |
| 179 location = pathname2url(location) | |
| 180 base = urljoin("file:", "%s/" % pathname2url(os.getcwd())) | |
| 181 absolute_location = URIRef(location, base=base).defrag() | |
| 182 if absolute_location.startswith("file:///"): | |
| 183 filename = url2pathname(absolute_location.replace("file:///", "/")) | |
| 184 file = open(filename, "rb") | |
| 185 else: | |
| 186 input_source = URLInputSource(absolute_location, format) | |
| 187 auto_close = True | |
| 188 # publicID = publicID or absolute_location # Further to fix | |
| 189 # for issue 130 | |
| 190 | |
| 191 if file is not None: | |
| 192 input_source = FileInputSource(file) | |
| 193 | |
| 194 if data is not None: | |
| 195 if isinstance(data, str): | |
| 196 data = data.encode('utf-8') | |
| 197 input_source = StringInputSource(data) | |
| 198 auto_close = True | |
| 199 | |
| 200 if input_source is None: | |
| 201 raise Exception("could not create InputSource") | |
| 202 else: | |
| 203 input_source.auto_close |= auto_close | |
| 204 if publicID is not None: # Further to fix for issue 130 | |
| 205 input_source.setPublicId(publicID) | |
| 206 # Further to fix for issue 130 | |
| 207 elif input_source.getPublicId() is None: | |
| 208 input_source.setPublicId(absolute_location or "") | |
| 209 return input_source | 
