0
|
1 #!/usr/bin/env perl
|
|
2 ##
|
|
3 ## Sort-header - wrapper for GNU sort with header-line support
|
|
4 ##
|
|
5 ## Copyright(C) A. Gordon
|
|
6 ## license AGPLv3+
|
|
7 ##
|
|
8 use strict;
|
|
9 use warnings;
|
|
10 use Data::Dumper;
|
|
11 use IO::Handle;
|
|
12 use Getopt::Long qw(:config bundling no_ignore_case_always);
|
|
13
|
|
14 ## Forward declarations
|
|
15 sub add_standard_sort_param(@);
|
|
16 sub add_standard_sort_param_value(@);
|
|
17 sub forbidden_sort_param(@);
|
|
18 sub show_help();
|
|
19 sub show_version();
|
|
20 sub show_examples();
|
|
21 sub parse_commandline_options();
|
|
22 sub reassign_input_output();
|
|
23 sub process_header_lines();
|
|
24 sub run_sort();
|
|
25 sub read_line_non_buffered();
|
|
26
|
|
27
|
|
28 ##
|
|
29 ## Runtime options
|
|
30 ##
|
|
31 my $PROGRAM="sort-header";
|
|
32 my $VERSION=0.4;
|
|
33
|
|
34 my $check_only=undef;
|
|
35 my $input_file=undef;
|
|
36 my $output_file=undef;
|
|
37 my $field_separator=undef;
|
|
38 my $header_lines =1 ;
|
|
39 my $debug=undef;
|
|
40 my $sort_exit_code=1; #by default, assume some error
|
|
41
|
|
42 my @sort_options;
|
|
43
|
|
44 ##
|
|
45 ## Program Start
|
|
46 ##
|
|
47 parse_commandline_options();
|
|
48 reassign_input_output();
|
|
49 process_header_lines();
|
|
50 run_sort();
|
|
51 exit($sort_exit_code);
|
|
52 ##
|
|
53 ## Program End
|
|
54 ##
|
|
55
|
|
56 sub show_examples()
|
|
57 {
|
|
58 print<<EOF;
|
|
59 Sorting a file with a header line:
|
|
60
|
|
61 \$ cat input.txt
|
|
62 Fruit Color Price
|
|
63 Banana Yellow 4.1
|
|
64 Avocado Green 8.0
|
|
65 Apple Red 3.0
|
|
66 Melon Green 6.1
|
|
67
|
|
68 # By default, 'sort-header' assumes 1 header line
|
|
69 # (no need to use --header in this case).
|
|
70
|
|
71 \$ sort-header -k3,3nr input.txt
|
|
72 Fruit Color Price
|
|
73 Avocado Green 8.0
|
|
74 Melon Green 6.1
|
|
75 Banana Yellow 4.1
|
|
76 Apple Red 3.0
|
|
77
|
|
78 EOF
|
|
79 exit(0);
|
|
80 }
|
|
81
|
|
82 sub show_help()
|
|
83 {
|
|
84 print<<EOF;
|
|
85 ${PROGRAM}: Wrapper for GNU sort, allowing sorting files with header lines.
|
|
86
|
|
87 Usage: $PROGRAM [HEADER-OPTIONS] [GNU sort Options] [INPUT-FILE]
|
|
88
|
|
89 HEADER-OPTIONS: the following options are supported by '${PROGRAM}':
|
|
90
|
|
91 --header N = Treat the first N lines as header lines.
|
|
92 These line will NOT be sorted. They will be passed
|
|
93 directly to the output file. (default: 1)
|
|
94
|
|
95 --version = Print ${PROGRAM}'s version.
|
|
96
|
|
97 --debugheader = Print debug messages (relating to ${PROGRAM}'s operation).
|
|
98
|
|
99 --help = Show this help screen.
|
|
100
|
|
101 --examples = Show usage examples.
|
|
102
|
|
103 GNU sort options:
|
|
104 Most of the standard GNU sort options are supported and passed to GNU sort.
|
|
105 The following options can not be used with '${PROGRAM}':
|
|
106
|
|
107 -m --merge => ${PROGRAM} can only sort one file, not merge multiple files.
|
|
108 -c -C --check => Currently not supported
|
|
109 --files0-from => Currently not supported
|
|
110 -z --zero-terminated => Currently not supported
|
|
111
|
|
112 INPUT-FILE:
|
|
113 If INPUT-FILE is not specified, $PROGRAM will use STDIN (just like GNU sort).
|
|
114
|
|
115 EOF
|
|
116 exit(0);
|
|
117 }
|
|
118
|
|
119 sub show_version()
|
|
120 {
|
|
121 print<<EOF;
|
|
122 $PROGRAM $VERSION
|
|
123 Copyright (C) 2010 A. Gordon (gordon\@cshl.edu)
|
|
124 License AGPLv3+: Affero GPL version 3 or later (http://www.gnu.org/licenses/agpl.html)
|
|
125
|
|
126 To see the GNU's sort version, run:
|
|
127 sort --version
|
|
128 EOF
|
|
129 exit(0);
|
|
130 }
|
|
131
|
|
132 sub parse_commandline_options()
|
|
133 {
|
|
134 my $rc = GetOptions(
|
|
135 "ignore-leading-blanks|b" => \&add_standard_sort_param,
|
|
136 "dictionary-order|d" => \&add_standard_sort_param,
|
|
137 "ignore-case|f" => \&add_standard_sort_param,
|
|
138 "general-numeric-sort|g" => \&add_standard_sort_param,
|
|
139 "ignore-nonprinting|i" => \&add_standard_sort_param,
|
|
140 "month-sort|M" => \&add_standard_sort_param,
|
|
141 "human-numeric-sort|h" => \&add_standard_sort_param,
|
|
142 "numeric-sort|n" => \&add_standard_sort_param,
|
|
143 "random-source=s" => \&add_standard_sort_param_value,
|
|
144 "random-sort|R" => \&add_standard_sort_param,
|
|
145 "reverse|r" => \&add_standard_sort_param,
|
|
146 "sort=s" => \&add_standard_sort_param_value,
|
|
147 "version-sort|V" => \&add_standard_sort_param,
|
|
148
|
|
149 "check|c" => \&forbidden_sort_param,
|
|
150 "C" => \&forbidden_sort_param,
|
|
151 "compress-program=s" => \&add_standard_sort_param_value,
|
|
152 "debug" => \&add_standard_sort_param,
|
|
153
|
|
154 "files0-from=s" => \&forbidden_sort_param,
|
|
155
|
|
156 "key|k=s" => \&add_standard_sort_param_value,
|
|
157 "merge|m" => \&forbidden_sort_param,
|
|
158 "batch-size=i" => \&forbidden_sort_param,
|
|
159
|
|
160 "parallel=i" => \&add_standard_sort_param_value,
|
|
161
|
|
162 "output|o=s" => \$output_file,
|
|
163
|
|
164 "stable|s" => \&add_standard_sort_param,
|
|
165 "buffer-size|S=s" => \&add_standard_sort_param_value,
|
|
166
|
|
167 "field-separator|t=s" => \&add_standard_sort_param_value,
|
|
168 "temporary-directory|T=s" => \&add_standard_sort_param_value,
|
|
169 "unique|u" => \&add_standard_sort_param,
|
|
170
|
|
171 "zero-terminated|z" => \&forbidden_sort_param,
|
|
172
|
|
173 "help" => \&show_help,
|
|
174 "version" => \&show_version,
|
|
175 "examples" => \&show_examples,
|
|
176
|
|
177 "header=i" => \$header_lines,
|
|
178 "debugheader" => \$debug,
|
|
179 );
|
|
180
|
|
181 exit 1 unless $rc;
|
|
182
|
|
183 my @INPUT_FILES = @ARGV;
|
|
184
|
|
185 die "$PROGRAM: error: invalid number of header lines ($header_lines)\n" unless $header_lines>=0;
|
|
186 die "$PROGRAM: error: Multiple input files specified. This program can sort only a signle file.\n" if (scalar(@INPUT_FILES)>1);
|
|
187 $input_file = shift @INPUT_FILES if scalar(@INPUT_FILES)==1;
|
|
188
|
|
189 if ($debug) {
|
|
190 warn "$PROGRAM: number of header lines = $header_lines\n";
|
|
191 warn "$PROGRAM: PASS-to-Sort options:\n", Dumper(\@sort_options), "\n";
|
|
192 }
|
|
193 }
|
|
194
|
|
195 sub reassign_input_output()
|
|
196 {
|
|
197 if ($output_file) {
|
|
198 warn "$PROGRAM: Re-assigning STDOUT to '$output_file'\n" if $debug;
|
|
199 open OUTPUT, '>', $output_file or die "$PROGRAM: Error: failed to create output file '$output_file': $!\n";
|
|
200 STDOUT->fdopen(\*OUTPUT, 'w') or die "$PROGRAM: Error: failed to reassign STDOUT to '$output_file': $!\n";
|
|
201 }
|
|
202
|
|
203
|
|
204 if ($input_file) {
|
|
205 warn "$PROGRAM: Re-assigning STDIN to '$input_file'\n" if $debug;
|
|
206 open INPUT, '<', $input_file or die "$PROGRAM: Error: failed to open input file '$input_file': $!\n";
|
|
207 STDIN->fdopen(\*INPUT, 'r') or die "$PROGRAM: Error: failed to reassign STDIN to '$input_file': $!\n";
|
|
208 }
|
|
209 }
|
|
210
|
|
211 sub process_header_lines()
|
|
212 {
|
|
213 warn "$PROGRAM: Reading $header_lines header lines...\n" if $debug;
|
|
214 for (my $i=0; $i<$header_lines; $i++) {
|
|
215 my $line = read_line_non_buffered();
|
|
216 exit unless defined $line;
|
|
217 print $line;
|
|
218 }
|
|
219 }
|
|
220
|
|
221 sub run_sort()
|
|
222 {
|
|
223 warn "$PROGRAM: Running GNU sort...\n" if $debug;
|
|
224 system('sort', @sort_options);
|
|
225 if ($? == -1) {
|
|
226 die "$PROGRAM: Error: failed to execute 'sort': $!\n";
|
|
227 }
|
|
228 elsif ($? & 127) {
|
|
229 my $signal = ($? & 127);
|
|
230 kill 2, $$ if $signal == 2; ##if sort was interrupted (CTRL-C) - just pass it on and commit suicide
|
|
231 die "$PROGRAM: Error: 'sort' child-process died with signal $signal\n";
|
|
232 }
|
|
233 else {
|
|
234 $sort_exit_code = ($? >> 8);
|
|
235 }
|
|
236 }
|
|
237
|
|
238
|
|
239 sub add_standard_sort_param(@)
|
|
240 {
|
|
241 my ($obj)= @_;
|
|
242 add_standard_sort_param_value($obj, undef);
|
|
243 }
|
|
244
|
|
245 sub add_standard_sort_param_value(@)
|
|
246 {
|
|
247 my ($obj,$value)= @_;
|
|
248
|
|
249 my $option = "" . $obj ; #stringify the optino object, get the option name.
|
|
250
|
|
251 if (length($option)==1) {
|
|
252 $option = "-" . $option ;
|
|
253 } else {
|
|
254 $option = "--" . $option ;
|
|
255 }
|
|
256 push @sort_options, $option ;
|
|
257 push @sort_options, $value if $value;
|
|
258 }
|
|
259
|
|
260 sub forbidden_sort_param(@)
|
|
261 {
|
|
262 my ($obj,$value)= @_;
|
|
263 my $option = "" . $obj ; #stringify the optino object, get the option name.
|
|
264
|
|
265 die "$PROGRAM: Error: option '$option' can not be used with this program. If you must use it, run GNU sort directly. see --help for more details.\n";
|
|
266 }
|
|
267
|
|
268 sub read_line_non_buffered()
|
|
269 {
|
|
270 my $line = '';
|
|
271 while ( 1 ) {
|
|
272 my $c;
|
|
273 my $rc = sysread STDIN, $c, 1;
|
|
274 die "$PROGRAM: STDIN Read error: $!" unless defined $rc;
|
|
275 return $line if $rc==0 && $line;
|
|
276 return undef if $rc==0 && (!$line);
|
|
277 $line .= $c ;
|
|
278 return $line if ( $c eq "\n");
|
|
279 }
|
|
280 }
|
|
281
|