Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/rdflib/tools/rdfpipe.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
| author | shellac |
|---|---|
| date | Mon, 01 Jun 2020 08:59:25 -0400 |
| parents | 79f47841a781 |
| children |
comparison
equal
deleted
inserted
replaced
| 4:79f47841a781 | 5:9b1c78e6ba9c |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # -*- coding: UTF-8 -*- | |
| 3 """ | |
| 4 A commandline tool for parsing RDF in different formats and serializing the | |
| 5 resulting graph to a chosen format. | |
| 6 """ | |
| 7 | |
| 8 import sys | |
| 9 from optparse import OptionParser | |
| 10 import logging | |
| 11 | |
| 12 import rdflib | |
| 13 from rdflib import plugin | |
| 14 from rdflib.store import Store | |
| 15 from rdflib.graph import ConjunctiveGraph | |
| 16 from rdflib.namespace import RDF, RDFS, OWL, XSD | |
| 17 from rdflib.parser import Parser | |
| 18 from rdflib.serializer import Serializer | |
| 19 | |
| 20 from rdflib.util import guess_format | |
| 21 from rdflib.py3compat import PY3 | |
| 22 | |
| 23 | |
| 24 DEFAULT_INPUT_FORMAT = 'xml' | |
| 25 DEFAULT_OUTPUT_FORMAT = 'n3' | |
| 26 | |
| 27 | |
| 28 def parse_and_serialize(input_files, input_format, guess, | |
| 29 outfile, output_format, ns_bindings, | |
| 30 store_conn="", store_type=None): | |
| 31 | |
| 32 if store_type: | |
| 33 store = plugin.get(store_type, Store)() | |
| 34 store.open(store_conn) | |
| 35 graph = ConjunctiveGraph(store) | |
| 36 else: | |
| 37 store = None | |
| 38 graph = ConjunctiveGraph() | |
| 39 | |
| 40 for prefix, uri in list(ns_bindings.items()): | |
| 41 graph.namespace_manager.bind(prefix, uri, override=False) | |
| 42 | |
| 43 for fpath in input_files: | |
| 44 use_format, kws = _format_and_kws(input_format) | |
| 45 if fpath == '-': | |
| 46 fpath = sys.stdin | |
| 47 elif not input_format and guess: | |
| 48 use_format = guess_format(fpath) or DEFAULT_INPUT_FORMAT | |
| 49 graph.parse(fpath, format=use_format, **kws) | |
| 50 | |
| 51 if outfile: | |
| 52 output_format, kws = _format_and_kws(output_format) | |
| 53 kws.setdefault('base', None) | |
| 54 graph.serialize(destination=outfile, format=output_format, **kws) | |
| 55 | |
| 56 if store: | |
| 57 store.rollback() | |
| 58 | |
| 59 | |
| 60 def _format_and_kws(fmt): | |
| 61 """ | |
| 62 >>> _format_and_kws("fmt") | |
| 63 ('fmt', {}) | |
| 64 >>> _format_and_kws("fmt:+a") | |
| 65 ('fmt', {'a': True}) | |
| 66 >>> _format_and_kws("fmt:a") | |
| 67 ('fmt', {'a': True}) | |
| 68 >>> _format_and_kws("fmt:+a,-b") #doctest: +SKIP | |
| 69 ('fmt', {'a': True, 'b': False}) | |
| 70 >>> _format_and_kws("fmt:c=d") | |
| 71 ('fmt', {'c': 'd'}) | |
| 72 >>> _format_and_kws("fmt:a=b:c") | |
| 73 ('fmt', {'a': 'b:c'}) | |
| 74 """ | |
| 75 fmt, kws = fmt, {} | |
| 76 if fmt and ':' in fmt: | |
| 77 fmt, kwrepr = fmt.split(':', 1) | |
| 78 for kw in kwrepr.split(','): | |
| 79 if '=' in kw: | |
| 80 k, v = kw.split('=') | |
| 81 kws[k] = v | |
| 82 elif kw.startswith('-'): | |
| 83 kws[kw[1:]] = False | |
| 84 elif kw.startswith('+'): | |
| 85 kws[kw[1:]] = True | |
| 86 else: # same as "+" | |
| 87 kws[kw] = True | |
| 88 return fmt, kws | |
| 89 | |
| 90 | |
| 91 def make_option_parser(): | |
| 92 parser_names = _get_plugin_names(Parser) | |
| 93 serializer_names = _get_plugin_names(Serializer) | |
| 94 kw_example = "FORMAT:(+)KW1,-KW2,KW3=VALUE" | |
| 95 | |
| 96 oparser = OptionParser( | |
| 97 "%prog [-h] [-i INPUT_FORMAT] [-o OUTPUT_FORMAT] " + | |
| 98 "[--ns=PFX=NS ...] [-] [FILE ...]", | |
| 99 description=__doc__.strip() + ( | |
| 100 " Reads file system paths, URLs or from stdin if '-' is given." | |
| 101 " The result is serialized to stdout."), | |
| 102 version="%prog " + "(using rdflib %s)" % rdflib.__version__) | |
| 103 | |
| 104 oparser.add_option( | |
| 105 '-i', '--input-format', | |
| 106 type=str, # default=DEFAULT_INPUT_FORMAT, | |
| 107 help="Format of the input document(s)." | |
| 108 " Available input formats are: %s." % parser_names + | |
| 109 " If no format is given, it will be " + | |
| 110 "guessed from the file name extension." + | |
| 111 " Keywords to parser can be given after format like: %s." % kw_example, | |
| 112 metavar="INPUT_FORMAT") | |
| 113 | |
| 114 oparser.add_option( | |
| 115 '-o', '--output-format', | |
| 116 type=str, default=DEFAULT_OUTPUT_FORMAT, | |
| 117 help="Format of the graph serialization." | |
| 118 " Available output formats are: %s." | |
| 119 % serializer_names + | |
| 120 " Default format is: '%default'." + | |
| 121 " Keywords to serializer can be given after format like: %s." % | |
| 122 kw_example, | |
| 123 metavar="OUTPUT_FORMAT") | |
| 124 | |
| 125 oparser.add_option( | |
| 126 '--ns', | |
| 127 action="append", type=str, | |
| 128 help="Register a namespace binding (QName prefix to a base URI). " | |
| 129 "This can be used more than once.", | |
| 130 metavar="PREFIX=NAMESPACE") | |
| 131 | |
| 132 oparser.add_option( | |
| 133 '--no-guess', dest='guess', | |
| 134 action='store_false', default=True, | |
| 135 help="Don't guess format based on file suffix.") | |
| 136 | |
| 137 oparser.add_option( | |
| 138 '--no-out', | |
| 139 action='store_true', default=False, | |
| 140 help="Don't output the resulting graph " + | |
| 141 "(useful for checking validity of input).") | |
| 142 | |
| 143 oparser.add_option( | |
| 144 '-w', '--warn', | |
| 145 action='store_true', default=False, | |
| 146 help="Output warnings to stderr (by default only critical errors).") | |
| 147 | |
| 148 return oparser | |
| 149 | |
| 150 _get_plugin_names = lambda kind: ", ".join( | |
| 151 p.name for p in plugin.plugins(kind=kind)) | |
| 152 | |
| 153 | |
| 154 def main(): | |
| 155 oparser = make_option_parser() | |
| 156 opts, args = oparser.parse_args() | |
| 157 if len(args) < 1: | |
| 158 oparser.print_usage() | |
| 159 oparser.exit() | |
| 160 | |
| 161 if opts.warn: | |
| 162 loglevel = logging.WARNING | |
| 163 else: | |
| 164 loglevel = logging.CRITICAL | |
| 165 logging.basicConfig(level=loglevel) | |
| 166 | |
| 167 ns_bindings = {} | |
| 168 if opts.ns: | |
| 169 for ns_kw in opts.ns: | |
| 170 pfx, uri = ns_kw.split('=') | |
| 171 ns_bindings[pfx] = uri | |
| 172 | |
| 173 outfile = sys.stdout | |
| 174 if PY3: | |
| 175 outfile = sys.stdout.buffer | |
| 176 | |
| 177 if opts.no_out: | |
| 178 outfile = None | |
| 179 | |
| 180 parse_and_serialize(args, opts.input_format, opts.guess, | |
| 181 outfile, opts.output_format, ns_bindings) | |
| 182 | |
| 183 | |
| 184 if __name__ == "__main__": | |
| 185 main() |
