comparison env/lib/python3.9/site-packages/rdflib/tools/rdfpipe.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 #!/usr/bin/env python
2 # -*- coding: UTF-8 -*-
3 """
4 A commandline tool for parsing RDF in different formats and serializing the
5 resulting graph to a chosen format.
6 """
7
8 from __future__ import absolute_import
9
10 import sys
11 from optparse import OptionParser
12 import logging
13
14 import rdflib
15 from rdflib import plugin
16 from rdflib.store import Store
17 from rdflib.graph import ConjunctiveGraph
18 from rdflib.namespace import RDF, RDFS, OWL, XSD
19 from rdflib.parser import Parser
20 from rdflib.serializer import Serializer
21
22 from rdflib.util import guess_format
23 from six import PY3
24
25
26 DEFAULT_INPUT_FORMAT = 'xml'
27 DEFAULT_OUTPUT_FORMAT = 'n3'
28
29
30 def parse_and_serialize(input_files, input_format, guess,
31 outfile, output_format, ns_bindings,
32 store_conn="", store_type=None):
33
34 if store_type:
35 store = plugin.get(store_type, Store)()
36 store.open(store_conn)
37 graph = ConjunctiveGraph(store)
38 else:
39 store = None
40 graph = ConjunctiveGraph()
41
42 for prefix, uri in ns_bindings.items():
43 graph.namespace_manager.bind(prefix, uri, override=False)
44
45 for fpath in input_files:
46 use_format, kws = _format_and_kws(input_format)
47 if fpath == '-':
48 fpath = sys.stdin
49 elif not input_format and guess:
50 use_format = guess_format(fpath) or DEFAULT_INPUT_FORMAT
51 graph.parse(fpath, format=use_format, **kws)
52
53 if outfile:
54 output_format, kws = _format_and_kws(output_format)
55 kws.setdefault('base', None)
56 graph.serialize(destination=outfile, format=output_format, **kws)
57
58 if store:
59 store.rollback()
60
61
62 def _format_and_kws(fmt):
63 """
64 >>> _format_and_kws("fmt")
65 ('fmt', {})
66 >>> _format_and_kws("fmt:+a")
67 ('fmt', {'a': True})
68 >>> _format_and_kws("fmt:a")
69 ('fmt', {'a': True})
70 >>> _format_and_kws("fmt:+a,-b") #doctest: +SKIP
71 ('fmt', {'a': True, 'b': False})
72 >>> _format_and_kws("fmt:c=d")
73 ('fmt', {'c': 'd'})
74 >>> _format_and_kws("fmt:a=b:c")
75 ('fmt', {'a': 'b:c'})
76 """
77 fmt, kws = fmt, {}
78 if fmt and ':' in fmt:
79 fmt, kwrepr = fmt.split(':', 1)
80 for kw in kwrepr.split(','):
81 if '=' in kw:
82 k, v = kw.split('=')
83 kws[k] = v
84 elif kw.startswith('-'):
85 kws[kw[1:]] = False
86 elif kw.startswith('+'):
87 kws[kw[1:]] = True
88 else: # same as "+"
89 kws[kw] = True
90 return fmt, kws
91
92
93 def make_option_parser():
94 parser_names = _get_plugin_names(Parser)
95 serializer_names = _get_plugin_names(Serializer)
96 kw_example = "FORMAT:(+)KW1,-KW2,KW3=VALUE"
97
98 oparser = OptionParser(
99 "%prog [-h] [-i INPUT_FORMAT] [-o OUTPUT_FORMAT] " +
100 "[--ns=PFX=NS ...] [-] [FILE ...]",
101 description=__doc__.strip() + (
102 " Reads file system paths, URLs or from stdin if '-' is given."
103 " The result is serialized to stdout."),
104 version="%prog " + "(using rdflib %s)" % rdflib.__version__)
105
106 oparser.add_option(
107 '-i', '--input-format',
108 type=str, # default=DEFAULT_INPUT_FORMAT,
109 help="Format of the input document(s)."
110 " Available input formats are: %s." % parser_names +
111 " If no format is given, it will be " +
112 "guessed from the file name extension." +
113 " Keywords to parser can be given after format like: %s." % kw_example,
114 metavar="INPUT_FORMAT")
115
116 oparser.add_option(
117 '-o', '--output-format',
118 type=str, default=DEFAULT_OUTPUT_FORMAT,
119 help="Format of the graph serialization."
120 " Available output formats are: %s."
121 % serializer_names +
122 " Default format is: '%default'." +
123 " Keywords to serializer can be given after format like: %s." %
124 kw_example,
125 metavar="OUTPUT_FORMAT")
126
127 oparser.add_option(
128 '--ns',
129 action="append", type=str,
130 help="Register a namespace binding (QName prefix to a base URI). "
131 "This can be used more than once.",
132 metavar="PREFIX=NAMESPACE")
133
134 oparser.add_option(
135 '--no-guess', dest='guess',
136 action='store_false', default=True,
137 help="Don't guess format based on file suffix.")
138
139 oparser.add_option(
140 '--no-out',
141 action='store_true', default=False,
142 help="Don't output the resulting graph " +
143 "(useful for checking validity of input).")
144
145 oparser.add_option(
146 '-w', '--warn',
147 action='store_true', default=False,
148 help="Output warnings to stderr (by default only critical errors).")
149
150 return oparser
151
152
153 def _get_plugin_names(kind): return ", ".join(
154 p.name for p in plugin.plugins(kind=kind))
155
156
157 def main():
158 oparser = make_option_parser()
159 opts, args = oparser.parse_args()
160 if len(args) < 1:
161 oparser.print_usage()
162 oparser.exit()
163
164 if opts.warn:
165 loglevel = logging.WARNING
166 else:
167 loglevel = logging.CRITICAL
168 logging.basicConfig(level=loglevel)
169
170 ns_bindings = {}
171 if opts.ns:
172 for ns_kw in opts.ns:
173 pfx, uri = ns_kw.split('=')
174 ns_bindings[pfx] = uri
175
176 outfile = sys.stdout
177 if PY3:
178 outfile = sys.stdout.buffer
179
180 if opts.no_out:
181 outfile = None
182
183 parse_and_serialize(args, opts.input_format, opts.guess,
184 outfile, opts.output_format, ns_bindings)
185
186
187 if __name__ == "__main__":
188 main()