Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/rdflib/tools/rdfpipe.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac |
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4f3585e2f14b |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # -*- coding: UTF-8 -*- | |
| 3 """ | |
| 4 A commandline tool for parsing RDF in different formats and serializing the | |
| 5 resulting graph to a chosen format. | |
| 6 """ | |
| 7 | |
| 8 from __future__ import absolute_import | |
| 9 | |
| 10 import sys | |
| 11 from optparse import OptionParser | |
| 12 import logging | |
| 13 | |
| 14 import rdflib | |
| 15 from rdflib import plugin | |
| 16 from rdflib.store import Store | |
| 17 from rdflib.graph import ConjunctiveGraph | |
| 18 from rdflib.namespace import RDF, RDFS, OWL, XSD | |
| 19 from rdflib.parser import Parser | |
| 20 from rdflib.serializer import Serializer | |
| 21 | |
| 22 from rdflib.util import guess_format | |
| 23 from six import PY3 | |
| 24 | |
| 25 | |
| 26 DEFAULT_INPUT_FORMAT = 'xml' | |
| 27 DEFAULT_OUTPUT_FORMAT = 'n3' | |
| 28 | |
| 29 | |
| 30 def parse_and_serialize(input_files, input_format, guess, | |
| 31 outfile, output_format, ns_bindings, | |
| 32 store_conn="", store_type=None): | |
| 33 | |
| 34 if store_type: | |
| 35 store = plugin.get(store_type, Store)() | |
| 36 store.open(store_conn) | |
| 37 graph = ConjunctiveGraph(store) | |
| 38 else: | |
| 39 store = None | |
| 40 graph = ConjunctiveGraph() | |
| 41 | |
| 42 for prefix, uri in ns_bindings.items(): | |
| 43 graph.namespace_manager.bind(prefix, uri, override=False) | |
| 44 | |
| 45 for fpath in input_files: | |
| 46 use_format, kws = _format_and_kws(input_format) | |
| 47 if fpath == '-': | |
| 48 fpath = sys.stdin | |
| 49 elif not input_format and guess: | |
| 50 use_format = guess_format(fpath) or DEFAULT_INPUT_FORMAT | |
| 51 graph.parse(fpath, format=use_format, **kws) | |
| 52 | |
| 53 if outfile: | |
| 54 output_format, kws = _format_and_kws(output_format) | |
| 55 kws.setdefault('base', None) | |
| 56 graph.serialize(destination=outfile, format=output_format, **kws) | |
| 57 | |
| 58 if store: | |
| 59 store.rollback() | |
| 60 | |
| 61 | |
| 62 def _format_and_kws(fmt): | |
| 63 """ | |
| 64 >>> _format_and_kws("fmt") | |
| 65 ('fmt', {}) | |
| 66 >>> _format_and_kws("fmt:+a") | |
| 67 ('fmt', {'a': True}) | |
| 68 >>> _format_and_kws("fmt:a") | |
| 69 ('fmt', {'a': True}) | |
| 70 >>> _format_and_kws("fmt:+a,-b") #doctest: +SKIP | |
| 71 ('fmt', {'a': True, 'b': False}) | |
| 72 >>> _format_and_kws("fmt:c=d") | |
| 73 ('fmt', {'c': 'd'}) | |
| 74 >>> _format_and_kws("fmt:a=b:c") | |
| 75 ('fmt', {'a': 'b:c'}) | |
| 76 """ | |
| 77 fmt, kws = fmt, {} | |
| 78 if fmt and ':' in fmt: | |
| 79 fmt, kwrepr = fmt.split(':', 1) | |
| 80 for kw in kwrepr.split(','): | |
| 81 if '=' in kw: | |
| 82 k, v = kw.split('=') | |
| 83 kws[k] = v | |
| 84 elif kw.startswith('-'): | |
| 85 kws[kw[1:]] = False | |
| 86 elif kw.startswith('+'): | |
| 87 kws[kw[1:]] = True | |
| 88 else: # same as "+" | |
| 89 kws[kw] = True | |
| 90 return fmt, kws | |
| 91 | |
| 92 | |
| 93 def make_option_parser(): | |
| 94 parser_names = _get_plugin_names(Parser) | |
| 95 serializer_names = _get_plugin_names(Serializer) | |
| 96 kw_example = "FORMAT:(+)KW1,-KW2,KW3=VALUE" | |
| 97 | |
| 98 oparser = OptionParser( | |
| 99 "%prog [-h] [-i INPUT_FORMAT] [-o OUTPUT_FORMAT] " + | |
| 100 "[--ns=PFX=NS ...] [-] [FILE ...]", | |
| 101 description=__doc__.strip() + ( | |
| 102 " Reads file system paths, URLs or from stdin if '-' is given." | |
| 103 " The result is serialized to stdout."), | |
| 104 version="%prog " + "(using rdflib %s)" % rdflib.__version__) | |
| 105 | |
| 106 oparser.add_option( | |
| 107 '-i', '--input-format', | |
| 108 type=str, # default=DEFAULT_INPUT_FORMAT, | |
| 109 help="Format of the input document(s)." | |
| 110 " Available input formats are: %s." % parser_names + | |
| 111 " If no format is given, it will be " + | |
| 112 "guessed from the file name extension." + | |
| 113 " Keywords to parser can be given after format like: %s." % kw_example, | |
| 114 metavar="INPUT_FORMAT") | |
| 115 | |
| 116 oparser.add_option( | |
| 117 '-o', '--output-format', | |
| 118 type=str, default=DEFAULT_OUTPUT_FORMAT, | |
| 119 help="Format of the graph serialization." | |
| 120 " Available output formats are: %s." | |
| 121 % serializer_names + | |
| 122 " Default format is: '%default'." + | |
| 123 " Keywords to serializer can be given after format like: %s." % | |
| 124 kw_example, | |
| 125 metavar="OUTPUT_FORMAT") | |
| 126 | |
| 127 oparser.add_option( | |
| 128 '--ns', | |
| 129 action="append", type=str, | |
| 130 help="Register a namespace binding (QName prefix to a base URI). " | |
| 131 "This can be used more than once.", | |
| 132 metavar="PREFIX=NAMESPACE") | |
| 133 | |
| 134 oparser.add_option( | |
| 135 '--no-guess', dest='guess', | |
| 136 action='store_false', default=True, | |
| 137 help="Don't guess format based on file suffix.") | |
| 138 | |
| 139 oparser.add_option( | |
| 140 '--no-out', | |
| 141 action='store_true', default=False, | |
| 142 help="Don't output the resulting graph " + | |
| 143 "(useful for checking validity of input).") | |
| 144 | |
| 145 oparser.add_option( | |
| 146 '-w', '--warn', | |
| 147 action='store_true', default=False, | |
| 148 help="Output warnings to stderr (by default only critical errors).") | |
| 149 | |
| 150 return oparser | |
| 151 | |
| 152 | |
| 153 def _get_plugin_names(kind): return ", ".join( | |
| 154 p.name for p in plugin.plugins(kind=kind)) | |
| 155 | |
| 156 | |
| 157 def main(): | |
| 158 oparser = make_option_parser() | |
| 159 opts, args = oparser.parse_args() | |
| 160 if len(args) < 1: | |
| 161 oparser.print_usage() | |
| 162 oparser.exit() | |
| 163 | |
| 164 if opts.warn: | |
| 165 loglevel = logging.WARNING | |
| 166 else: | |
| 167 loglevel = logging.CRITICAL | |
| 168 logging.basicConfig(level=loglevel) | |
| 169 | |
| 170 ns_bindings = {} | |
| 171 if opts.ns: | |
| 172 for ns_kw in opts.ns: | |
| 173 pfx, uri = ns_kw.split('=') | |
| 174 ns_bindings[pfx] = uri | |
| 175 | |
| 176 outfile = sys.stdout | |
| 177 if PY3: | |
| 178 outfile = sys.stdout.buffer | |
| 179 | |
| 180 if opts.no_out: | |
| 181 outfile = None | |
| 182 | |
| 183 parse_and_serialize(args, opts.input_format, opts.guess, | |
| 184 outfile, opts.output_format, ns_bindings) | |
| 185 | |
| 186 | |
| 187 if __name__ == "__main__": | |
| 188 main() |
