comparison sort-header @ 0:5314e5d6f040 draft

Imported from capsule None
author bgruening
date Thu, 29 Jan 2015 07:53:17 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5314e5d6f040
1 #!/usr/bin/env perl
2 ##
3 ## Sort-header - wrapper for GNU sort with header-line support
4 ##
5 ## Copyright(C) A. Gordon
6 ## license AGPLv3+
7 ##
8 use strict;
9 use warnings;
10 use Data::Dumper;
11 use IO::Handle;
12 use Getopt::Long qw(:config bundling no_ignore_case_always);
13
14 ## Forward declarations
15 sub add_standard_sort_param(@);
16 sub add_standard_sort_param_value(@);
17 sub forbidden_sort_param(@);
18 sub show_help();
19 sub show_version();
20 sub show_examples();
21 sub parse_commandline_options();
22 sub reassign_input_output();
23 sub process_header_lines();
24 sub run_sort();
25 sub read_line_non_buffered();
26
27
28 ##
29 ## Runtime options
30 ##
31 my $PROGRAM="sort-header";
32 my $VERSION=0.4;
33
34 my $check_only=undef;
35 my $input_file=undef;
36 my $output_file=undef;
37 my $field_separator=undef;
38 my $header_lines =1 ;
39 my $debug=undef;
40 my $sort_exit_code=1; #by default, assume some error
41
42 my @sort_options;
43
44 ##
45 ## Program Start
46 ##
47 parse_commandline_options();
48 reassign_input_output();
49 process_header_lines();
50 run_sort();
51 exit($sort_exit_code);
52 ##
53 ## Program End
54 ##
55
56 sub show_examples()
57 {
58 print<<EOF;
59 Sorting a file with a header line:
60
61 \$ cat input.txt
62 Fruit Color Price
63 Banana Yellow 4.1
64 Avocado Green 8.0
65 Apple Red 3.0
66 Melon Green 6.1
67
68 # By default, 'sort-header' assumes 1 header line
69 # (no need to use --header in this case).
70
71 \$ sort-header -k3,3nr input.txt
72 Fruit Color Price
73 Avocado Green 8.0
74 Melon Green 6.1
75 Banana Yellow 4.1
76 Apple Red 3.0
77
78 EOF
79 exit(0);
80 }
81
82 sub show_help()
83 {
84 print<<EOF;
85 ${PROGRAM}: Wrapper for GNU sort, allowing sorting files with header lines.
86
87 Usage: $PROGRAM [HEADER-OPTIONS] [GNU sort Options] [INPUT-FILE]
88
89 HEADER-OPTIONS: the following options are supported by '${PROGRAM}':
90
91 --header N = Treat the first N lines as header lines.
92 These line will NOT be sorted. They will be passed
93 directly to the output file. (default: 1)
94
95 --version = Print ${PROGRAM}'s version.
96
97 --debugheader = Print debug messages (relating to ${PROGRAM}'s operation).
98
99 --help = Show this help screen.
100
101 --examples = Show usage examples.
102
103 GNU sort options:
104 Most of the standard GNU sort options are supported and passed to GNU sort.
105 The following options can not be used with '${PROGRAM}':
106
107 -m --merge => ${PROGRAM} can only sort one file, not merge multiple files.
108 -c -C --check => Currently not supported
109 --files0-from => Currently not supported
110 -z --zero-terminated => Currently not supported
111
112 INPUT-FILE:
113 If INPUT-FILE is not specified, $PROGRAM will use STDIN (just like GNU sort).
114
115 EOF
116 exit(0);
117 }
118
119 sub show_version()
120 {
121 print<<EOF;
122 $PROGRAM $VERSION
123 Copyright (C) 2010 A. Gordon (gordon\@cshl.edu)
124 License AGPLv3+: Affero GPL version 3 or later (http://www.gnu.org/licenses/agpl.html)
125
126 To see the GNU's sort version, run:
127 sort --version
128 EOF
129 exit(0);
130 }
131
132 sub parse_commandline_options()
133 {
134 my $rc = GetOptions(
135 "ignore-leading-blanks|b" => \&add_standard_sort_param,
136 "dictionary-order|d" => \&add_standard_sort_param,
137 "ignore-case|f" => \&add_standard_sort_param,
138 "general-numeric-sort|g" => \&add_standard_sort_param,
139 "ignore-nonprinting|i" => \&add_standard_sort_param,
140 "month-sort|M" => \&add_standard_sort_param,
141 "human-numeric-sort|h" => \&add_standard_sort_param,
142 "numeric-sort|n" => \&add_standard_sort_param,
143 "random-source=s" => \&add_standard_sort_param_value,
144 "random-sort|R" => \&add_standard_sort_param,
145 "reverse|r" => \&add_standard_sort_param,
146 "sort=s" => \&add_standard_sort_param_value,
147 "version-sort|V" => \&add_standard_sort_param,
148
149 "check|c" => \&forbidden_sort_param,
150 "C" => \&forbidden_sort_param,
151 "compress-program=s" => \&add_standard_sort_param_value,
152 "debug" => \&add_standard_sort_param,
153
154 "files0-from=s" => \&forbidden_sort_param,
155
156 "key|k=s" => \&add_standard_sort_param_value,
157 "merge|m" => \&forbidden_sort_param,
158 "batch-size=i" => \&forbidden_sort_param,
159
160 "parallel=i" => \&add_standard_sort_param_value,
161
162 "output|o=s" => \$output_file,
163
164 "stable|s" => \&add_standard_sort_param,
165 "buffer-size|S=s" => \&add_standard_sort_param_value,
166
167 "field-separator|t=s" => \&add_standard_sort_param_value,
168 "temporary-directory|T=s" => \&add_standard_sort_param_value,
169 "unique|u" => \&add_standard_sort_param,
170
171 "zero-terminated|z" => \&forbidden_sort_param,
172
173 "help" => \&show_help,
174 "version" => \&show_version,
175 "examples" => \&show_examples,
176
177 "header=i" => \$header_lines,
178 "debugheader" => \$debug,
179 );
180
181 exit 1 unless $rc;
182
183 my @INPUT_FILES = @ARGV;
184
185 die "$PROGRAM: error: invalid number of header lines ($header_lines)\n" unless $header_lines>=0;
186 die "$PROGRAM: error: Multiple input files specified. This program can sort only a signle file.\n" if (scalar(@INPUT_FILES)>1);
187 $input_file = shift @INPUT_FILES if scalar(@INPUT_FILES)==1;
188
189 if ($debug) {
190 warn "$PROGRAM: number of header lines = $header_lines\n";
191 warn "$PROGRAM: PASS-to-Sort options:\n", Dumper(\@sort_options), "\n";
192 }
193 }
194
195 sub reassign_input_output()
196 {
197 if ($output_file) {
198 warn "$PROGRAM: Re-assigning STDOUT to '$output_file'\n" if $debug;
199 open OUTPUT, '>', $output_file or die "$PROGRAM: Error: failed to create output file '$output_file': $!\n";
200 STDOUT->fdopen(\*OUTPUT, 'w') or die "$PROGRAM: Error: failed to reassign STDOUT to '$output_file': $!\n";
201 }
202
203
204 if ($input_file) {
205 warn "$PROGRAM: Re-assigning STDIN to '$input_file'\n" if $debug;
206 open INPUT, '<', $input_file or die "$PROGRAM: Error: failed to open input file '$input_file': $!\n";
207 STDIN->fdopen(\*INPUT, 'r') or die "$PROGRAM: Error: failed to reassign STDIN to '$input_file': $!\n";
208 }
209 }
210
211 sub process_header_lines()
212 {
213 warn "$PROGRAM: Reading $header_lines header lines...\n" if $debug;
214 for (my $i=0; $i<$header_lines; $i++) {
215 my $line = read_line_non_buffered();
216 exit unless defined $line;
217 print $line;
218 }
219 }
220
221 sub run_sort()
222 {
223 warn "$PROGRAM: Running GNU sort...\n" if $debug;
224 system('sort', @sort_options);
225 if ($? == -1) {
226 die "$PROGRAM: Error: failed to execute 'sort': $!\n";
227 }
228 elsif ($? & 127) {
229 my $signal = ($? & 127);
230 kill 2, $$ if $signal == 2; ##if sort was interrupted (CTRL-C) - just pass it on and commit suicide
231 die "$PROGRAM: Error: 'sort' child-process died with signal $signal\n";
232 }
233 else {
234 $sort_exit_code = ($? >> 8);
235 }
236 }
237
238
239 sub add_standard_sort_param(@)
240 {
241 my ($obj)= @_;
242 add_standard_sort_param_value($obj, undef);
243 }
244
245 sub add_standard_sort_param_value(@)
246 {
247 my ($obj,$value)= @_;
248
249 my $option = "" . $obj ; #stringify the optino object, get the option name.
250
251 if (length($option)==1) {
252 $option = "-" . $option ;
253 } else {
254 $option = "--" . $option ;
255 }
256 push @sort_options, $option ;
257 push @sort_options, $value if $value;
258 }
259
260 sub forbidden_sort_param(@)
261 {
262 my ($obj,$value)= @_;
263 my $option = "" . $obj ; #stringify the optino object, get the option name.
264
265 die "$PROGRAM: Error: option '$option' can not be used with this program. If you must use it, run GNU sort directly. see --help for more details.\n";
266 }
267
268 sub read_line_non_buffered()
269 {
270 my $line = '';
271 while ( 1 ) {
272 my $c;
273 my $rc = sysread STDIN, $c, 1;
274 die "$PROGRAM: STDIN Read error: $!" unless defined $rc;
275 return $line if $rc==0 && $line;
276 return undef if $rc==0 && (!$line);
277 $line .= $c ;
278 return $line if ( $c eq "\n");
279 }
280 }
281