comparison env/lib/python3.9/site-packages/rdflib/parser.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """
2 Parser plugin interface.
3
4 This module defines the parser plugin interface and contains other
5 related parser support code.
6
7 The module is mainly useful for those wanting to write a parser that
8 can plugin to rdflib. If you are wanting to invoke a parser you likely
9 want to do so through the Graph class parse method.
10
11 """
12 from __future__ import absolute_import
13 from __future__ import division
14 from __future__ import print_function
15
16 import os
17 import sys
18
19 from six import BytesIO
20 from six import string_types
21 from six import text_type
22
23 from six.moves.urllib.request import pathname2url
24 from six.moves.urllib.request import Request
25 from six.moves.urllib.request import url2pathname
26 from six.moves.urllib.parse import urljoin
27 from six.moves.urllib.request import urlopen
28
29 from xml.sax import xmlreader
30
31 from rdflib import __version__
32 from rdflib.term import URIRef
33 from rdflib.namespace import Namespace
34
35 __all__ = [
36 'Parser', 'InputSource', 'StringInputSource',
37 'URLInputSource', 'FileInputSource']
38
39
40 class Parser(object):
41
42 def __init__(self):
43 pass
44
45 def parse(self, source, sink):
46 pass
47
48
49 class InputSource(xmlreader.InputSource, object):
50 """
51 TODO:
52 """
53
54 def __init__(self, system_id=None):
55 xmlreader.InputSource.__init__(self, system_id=system_id)
56 self.content_type = None
57 self.auto_close = False # see Graph.parse(), true if opened by us
58
59 def close(self):
60 f = self.getByteStream()
61 if f and hasattr(f, 'close'):
62 f.close()
63
64
65 class StringInputSource(InputSource):
66 """
67 TODO:
68 """
69
70 def __init__(self, value, system_id=None):
71 super(StringInputSource, self).__init__(system_id)
72 stream = BytesIO(value)
73 self.setByteStream(stream)
74 # TODO:
75 # encoding = value.encoding
76 # self.setEncoding(encoding)
77
78
79 headers = {
80 'User-agent':
81 'rdflib-%s (http://rdflib.net/; eikeon@eikeon.com)' % __version__
82 }
83
84
85 class URLInputSource(InputSource):
86 """
87 TODO:
88 """
89
90 def __init__(self, system_id=None, format=None):
91 super(URLInputSource, self).__init__(system_id)
92 self.url = system_id
93
94 # copy headers to change
95 myheaders = dict(headers)
96 if format == 'application/rdf+xml':
97 myheaders['Accept'] = 'application/rdf+xml, */*;q=0.1'
98 elif format == 'n3':
99 myheaders['Accept'] = 'text/n3, */*;q=0.1'
100 elif format == 'turtle':
101 myheaders['Accept'] = 'text/turtle,application/x-turtle, */*;q=0.1'
102 elif format == 'nt':
103 myheaders['Accept'] = 'text/plain, */*;q=0.1'
104 elif format == 'json-ld':
105 myheaders['Accept'] = (
106 'application/ld+json, application/json;q=0.9, */*;q=0.1')
107 else:
108 myheaders['Accept'] = (
109 'application/rdf+xml,text/rdf+n3;q=0.9,' +
110 'application/xhtml+xml;q=0.5, */*;q=0.1')
111
112 req = Request(system_id, None, myheaders)
113 file = urlopen(req)
114 # Fix for issue 130 https://github.com/RDFLib/rdflib/issues/130
115 self.url = file.geturl() # in case redirections took place
116 self.setPublicId(self.url)
117 self.content_type = file.info().get('content-type')
118 if self.content_type is not None:
119 self.content_type = self.content_type.split(";", 1)[0]
120 self.setByteStream(file)
121 # TODO: self.setEncoding(encoding)
122 self.response_info = file.info() # a mimetools.Message instance
123
124 def __repr__(self):
125 return self.url
126
127
128 class FileInputSource(InputSource):
129
130 def __init__(self, file):
131 base = urljoin("file:", pathname2url(os.getcwd()))
132 system_id = URIRef(urljoin("file:", pathname2url(file.name)), base=base)
133 super(FileInputSource, self).__init__(system_id)
134 self.file = file
135 self.setByteStream(file)
136 # TODO: self.setEncoding(encoding)
137
138 def __repr__(self):
139 return repr(self.file)
140
141
142 def create_input_source(source=None, publicID=None,
143 location=None, file=None, data=None, format=None):
144 """
145 Return an appropriate InputSource instance for the given
146 parameters.
147 """
148
149 # test that exactly one of source, location, file, and data is not None.
150 if sum((
151 source is not None,
152 location is not None,
153 file is not None,
154 data is not None,
155 )) != 1:
156 raise ValueError(
157 'exactly one of source, location, file or data must be given'
158 )
159
160 input_source = None
161
162 if source is not None:
163 if isinstance(source, InputSource):
164 input_source = source
165 else:
166 if isinstance(source, string_types):
167 location = source
168 elif hasattr(source, "read") and not isinstance(source, Namespace):
169 f = source
170 input_source = InputSource()
171 input_source.setByteStream(f)
172 if f is sys.stdin:
173 input_source.setSystemId("file:///dev/stdin")
174 elif hasattr(f, "name"):
175 input_source.setSystemId(f.name)
176 else:
177 raise Exception("Unexpected type '%s' for source '%s'" %
178 (type(source), source))
179
180 absolute_location = None # Further to fix for issue 130
181
182 auto_close = False # make sure we close all file handles we open
183 if location is not None:
184 # Fix for Windows problem https://github.com/RDFLib/rdflib/issues/145
185 if os.path.exists(location):
186 location = pathname2url(location)
187 base = urljoin("file:", "%s/" % pathname2url(os.getcwd()))
188 absolute_location = URIRef(location, base=base).defrag()
189 if absolute_location.startswith("file:///"):
190 filename = url2pathname(absolute_location.replace("file:///", "/"))
191 file = open(filename, "rb")
192 else:
193 input_source = URLInputSource(absolute_location, format)
194 auto_close = True
195 # publicID = publicID or absolute_location # Further to fix
196 # for issue 130
197
198 if file is not None:
199 input_source = FileInputSource(file)
200
201 if data is not None:
202 if isinstance(data, text_type):
203 data = data.encode('utf-8')
204 input_source = StringInputSource(data)
205 auto_close = True
206
207 if input_source is None:
208 raise Exception("could not create InputSource")
209 else:
210 input_source.auto_close |= auto_close
211 if publicID is not None: # Further to fix for issue 130
212 input_source.setPublicId(publicID)
213 # Further to fix for issue 130
214 elif input_source.getPublicId() is None:
215 input_source.setPublicId(absolute_location or "")
216 return input_source