comparison env/lib/python3.9/site-packages/chardet/cli/chardetect.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """
2 Script which takes one or more file paths and reports on their detected
3 encodings
4
5 Example::
6
7 % chardetect somefile someotherfile
8 somefile: windows-1252 with confidence 0.5
9 someotherfile: ascii with confidence 1.0
10
11 If no paths are provided, it takes its input from stdin.
12
13 """
14
15 from __future__ import absolute_import, print_function, unicode_literals
16
17 import argparse
18 import sys
19
20 from chardet import __version__
21 from chardet.compat import PY2
22 from chardet.universaldetector import UniversalDetector
23
24
25 def description_of(lines, name='stdin'):
26 """
27 Return a string describing the probable encoding of a file or
28 list of strings.
29
30 :param lines: The lines to get the encoding of.
31 :type lines: Iterable of bytes
32 :param name: Name of file or collection of lines
33 :type name: str
34 """
35 u = UniversalDetector()
36 for line in lines:
37 line = bytearray(line)
38 u.feed(line)
39 # shortcut out of the loop to save reading further - particularly useful if we read a BOM.
40 if u.done:
41 break
42 u.close()
43 result = u.result
44 if PY2:
45 name = name.decode(sys.getfilesystemencoding(), 'ignore')
46 if result['encoding']:
47 return '{}: {} with confidence {}'.format(name, result['encoding'],
48 result['confidence'])
49 else:
50 return '{}: no result'.format(name)
51
52
53 def main(argv=None):
54 """
55 Handles command line arguments and gets things started.
56
57 :param argv: List of arguments, as if specified on the command-line.
58 If None, ``sys.argv[1:]`` is used instead.
59 :type argv: list of str
60 """
61 # Get command line arguments
62 parser = argparse.ArgumentParser(
63 description="Takes one or more file paths and reports their detected \
64 encodings")
65 parser.add_argument('input',
66 help='File whose encoding we would like to determine. \
67 (default: stdin)',
68 type=argparse.FileType('rb'), nargs='*',
69 default=[sys.stdin if PY2 else sys.stdin.buffer])
70 parser.add_argument('--version', action='version',
71 version='%(prog)s {}'.format(__version__))
72 args = parser.parse_args(argv)
73
74 for f in args.input:
75 if f.isatty():
76 print("You are running chardetect interactively. Press " +
77 "CTRL-D twice at the start of a blank line to signal the " +
78 "end of your input. If you want help, run chardetect " +
79 "--help\n", file=sys.stderr)
80 print(description_of(f, f.name))
81
82
83 if __name__ == '__main__':
84 main()