Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/chardet/cli/chardetect.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
| author | shellac |
|---|---|
| date | Mon, 01 Jun 2020 08:59:25 -0400 |
| parents | 79f47841a781 |
| children |
comparison
equal
deleted
inserted
replaced
| 4:79f47841a781 | 5:9b1c78e6ba9c |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 Script which takes one or more file paths and reports on their detected | |
| 4 encodings | |
| 5 | |
| 6 Example:: | |
| 7 | |
| 8 % chardetect somefile someotherfile | |
| 9 somefile: windows-1252 with confidence 0.5 | |
| 10 someotherfile: ascii with confidence 1.0 | |
| 11 | |
| 12 If no paths are provided, it takes its input from stdin. | |
| 13 | |
| 14 """ | |
| 15 | |
| 16 from __future__ import absolute_import, print_function, unicode_literals | |
| 17 | |
| 18 import argparse | |
| 19 import sys | |
| 20 | |
| 21 from chardet import __version__ | |
| 22 from chardet.compat import PY2 | |
| 23 from chardet.universaldetector import UniversalDetector | |
| 24 | |
| 25 | |
| 26 def description_of(lines, name='stdin'): | |
| 27 """ | |
| 28 Return a string describing the probable encoding of a file or | |
| 29 list of strings. | |
| 30 | |
| 31 :param lines: The lines to get the encoding of. | |
| 32 :type lines: Iterable of bytes | |
| 33 :param name: Name of file or collection of lines | |
| 34 :type name: str | |
| 35 """ | |
| 36 u = UniversalDetector() | |
| 37 for line in lines: | |
| 38 line = bytearray(line) | |
| 39 u.feed(line) | |
| 40 # shortcut out of the loop to save reading further - particularly useful if we read a BOM. | |
| 41 if u.done: | |
| 42 break | |
| 43 u.close() | |
| 44 result = u.result | |
| 45 if PY2: | |
| 46 name = name.decode(sys.getfilesystemencoding(), 'ignore') | |
| 47 if result['encoding']: | |
| 48 return '{0}: {1} with confidence {2}'.format(name, result['encoding'], | |
| 49 result['confidence']) | |
| 50 else: | |
| 51 return '{0}: no result'.format(name) | |
| 52 | |
| 53 | |
| 54 def main(argv=None): | |
| 55 """ | |
| 56 Handles command line arguments and gets things started. | |
| 57 | |
| 58 :param argv: List of arguments, as if specified on the command-line. | |
| 59 If None, ``sys.argv[1:]`` is used instead. | |
| 60 :type argv: list of str | |
| 61 """ | |
| 62 # Get command line arguments | |
| 63 parser = argparse.ArgumentParser( | |
| 64 description="Takes one or more file paths and reports their detected \ | |
| 65 encodings") | |
| 66 parser.add_argument('input', | |
| 67 help='File whose encoding we would like to determine. \ | |
| 68 (default: stdin)', | |
| 69 type=argparse.FileType('rb'), nargs='*', | |
| 70 default=[sys.stdin if PY2 else sys.stdin.buffer]) | |
| 71 parser.add_argument('--version', action='version', | |
| 72 version='%(prog)s {0}'.format(__version__)) | |
| 73 args = parser.parse_args(argv) | |
| 74 | |
| 75 for f in args.input: | |
| 76 if f.isatty(): | |
| 77 print("You are running chardetect interactively. Press " + | |
| 78 "CTRL-D twice at the start of a blank line to signal the " + | |
| 79 "end of your input. If you want help, run chardetect " + | |
| 80 "--help\n", file=sys.stderr) | |
| 81 print(description_of(f, f.name)) | |
| 82 | |
| 83 | |
| 84 if __name__ == '__main__': | |
| 85 main() |
