Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/rdflib/tools/rdfpipe.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 #!/usr/bin/env python | |
2 # -*- coding: UTF-8 -*- | |
3 """ | |
4 A commandline tool for parsing RDF in different formats and serializing the | |
5 resulting graph to a chosen format. | |
6 """ | |
7 | |
8 from __future__ import absolute_import | |
9 | |
10 import sys | |
11 from optparse import OptionParser | |
12 import logging | |
13 | |
14 import rdflib | |
15 from rdflib import plugin | |
16 from rdflib.store import Store | |
17 from rdflib.graph import ConjunctiveGraph | |
18 from rdflib.namespace import RDF, RDFS, OWL, XSD | |
19 from rdflib.parser import Parser | |
20 from rdflib.serializer import Serializer | |
21 | |
22 from rdflib.util import guess_format | |
23 from six import PY3 | |
24 | |
25 | |
26 DEFAULT_INPUT_FORMAT = 'xml' | |
27 DEFAULT_OUTPUT_FORMAT = 'n3' | |
28 | |
29 | |
30 def parse_and_serialize(input_files, input_format, guess, | |
31 outfile, output_format, ns_bindings, | |
32 store_conn="", store_type=None): | |
33 | |
34 if store_type: | |
35 store = plugin.get(store_type, Store)() | |
36 store.open(store_conn) | |
37 graph = ConjunctiveGraph(store) | |
38 else: | |
39 store = None | |
40 graph = ConjunctiveGraph() | |
41 | |
42 for prefix, uri in ns_bindings.items(): | |
43 graph.namespace_manager.bind(prefix, uri, override=False) | |
44 | |
45 for fpath in input_files: | |
46 use_format, kws = _format_and_kws(input_format) | |
47 if fpath == '-': | |
48 fpath = sys.stdin | |
49 elif not input_format and guess: | |
50 use_format = guess_format(fpath) or DEFAULT_INPUT_FORMAT | |
51 graph.parse(fpath, format=use_format, **kws) | |
52 | |
53 if outfile: | |
54 output_format, kws = _format_and_kws(output_format) | |
55 kws.setdefault('base', None) | |
56 graph.serialize(destination=outfile, format=output_format, **kws) | |
57 | |
58 if store: | |
59 store.rollback() | |
60 | |
61 | |
62 def _format_and_kws(fmt): | |
63 """ | |
64 >>> _format_and_kws("fmt") | |
65 ('fmt', {}) | |
66 >>> _format_and_kws("fmt:+a") | |
67 ('fmt', {'a': True}) | |
68 >>> _format_and_kws("fmt:a") | |
69 ('fmt', {'a': True}) | |
70 >>> _format_and_kws("fmt:+a,-b") #doctest: +SKIP | |
71 ('fmt', {'a': True, 'b': False}) | |
72 >>> _format_and_kws("fmt:c=d") | |
73 ('fmt', {'c': 'd'}) | |
74 >>> _format_and_kws("fmt:a=b:c") | |
75 ('fmt', {'a': 'b:c'}) | |
76 """ | |
77 fmt, kws = fmt, {} | |
78 if fmt and ':' in fmt: | |
79 fmt, kwrepr = fmt.split(':', 1) | |
80 for kw in kwrepr.split(','): | |
81 if '=' in kw: | |
82 k, v = kw.split('=') | |
83 kws[k] = v | |
84 elif kw.startswith('-'): | |
85 kws[kw[1:]] = False | |
86 elif kw.startswith('+'): | |
87 kws[kw[1:]] = True | |
88 else: # same as "+" | |
89 kws[kw] = True | |
90 return fmt, kws | |
91 | |
92 | |
93 def make_option_parser(): | |
94 parser_names = _get_plugin_names(Parser) | |
95 serializer_names = _get_plugin_names(Serializer) | |
96 kw_example = "FORMAT:(+)KW1,-KW2,KW3=VALUE" | |
97 | |
98 oparser = OptionParser( | |
99 "%prog [-h] [-i INPUT_FORMAT] [-o OUTPUT_FORMAT] " + | |
100 "[--ns=PFX=NS ...] [-] [FILE ...]", | |
101 description=__doc__.strip() + ( | |
102 " Reads file system paths, URLs or from stdin if '-' is given." | |
103 " The result is serialized to stdout."), | |
104 version="%prog " + "(using rdflib %s)" % rdflib.__version__) | |
105 | |
106 oparser.add_option( | |
107 '-i', '--input-format', | |
108 type=str, # default=DEFAULT_INPUT_FORMAT, | |
109 help="Format of the input document(s)." | |
110 " Available input formats are: %s." % parser_names + | |
111 " If no format is given, it will be " + | |
112 "guessed from the file name extension." + | |
113 " Keywords to parser can be given after format like: %s." % kw_example, | |
114 metavar="INPUT_FORMAT") | |
115 | |
116 oparser.add_option( | |
117 '-o', '--output-format', | |
118 type=str, default=DEFAULT_OUTPUT_FORMAT, | |
119 help="Format of the graph serialization." | |
120 " Available output formats are: %s." | |
121 % serializer_names + | |
122 " Default format is: '%default'." + | |
123 " Keywords to serializer can be given after format like: %s." % | |
124 kw_example, | |
125 metavar="OUTPUT_FORMAT") | |
126 | |
127 oparser.add_option( | |
128 '--ns', | |
129 action="append", type=str, | |
130 help="Register a namespace binding (QName prefix to a base URI). " | |
131 "This can be used more than once.", | |
132 metavar="PREFIX=NAMESPACE") | |
133 | |
134 oparser.add_option( | |
135 '--no-guess', dest='guess', | |
136 action='store_false', default=True, | |
137 help="Don't guess format based on file suffix.") | |
138 | |
139 oparser.add_option( | |
140 '--no-out', | |
141 action='store_true', default=False, | |
142 help="Don't output the resulting graph " + | |
143 "(useful for checking validity of input).") | |
144 | |
145 oparser.add_option( | |
146 '-w', '--warn', | |
147 action='store_true', default=False, | |
148 help="Output warnings to stderr (by default only critical errors).") | |
149 | |
150 return oparser | |
151 | |
152 | |
153 def _get_plugin_names(kind): return ", ".join( | |
154 p.name for p in plugin.plugins(kind=kind)) | |
155 | |
156 | |
157 def main(): | |
158 oparser = make_option_parser() | |
159 opts, args = oparser.parse_args() | |
160 if len(args) < 1: | |
161 oparser.print_usage() | |
162 oparser.exit() | |
163 | |
164 if opts.warn: | |
165 loglevel = logging.WARNING | |
166 else: | |
167 loglevel = logging.CRITICAL | |
168 logging.basicConfig(level=loglevel) | |
169 | |
170 ns_bindings = {} | |
171 if opts.ns: | |
172 for ns_kw in opts.ns: | |
173 pfx, uri = ns_kw.split('=') | |
174 ns_bindings[pfx] = uri | |
175 | |
176 outfile = sys.stdout | |
177 if PY3: | |
178 outfile = sys.stdout.buffer | |
179 | |
180 if opts.no_out: | |
181 outfile = None | |
182 | |
183 parse_and_serialize(args, opts.input_format, opts.guess, | |
184 outfile, opts.output_format, ns_bindings) | |
185 | |
186 | |
187 if __name__ == "__main__": | |
188 main() |