comparison find_and_replace @ 0:5314e5d6f040 draft

Imported from capsule None
author bgruening
date Thu, 29 Jan 2015 07:53:17 -0500
parents
children fb4ff3c42cd3
comparison
equal deleted inserted replaced
-1:000000000000 0:5314e5d6f040
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
4 use Getopt::Std;
5
6 sub parse_command_line();
7 sub build_regex_string();
8 sub usage();
9
10 my $input_file ;
11 my $output_file;
12 my $find_pattern ;
13 my $replace_pattern ;
14 my $find_complete_words ;
15 my $find_pattern_is_regex ;
16 my $find_in_specific_column ;
17 my $find_case_insensitive ;
18 my $replace_global ;
19 my $skip_first_line ;
20
21
22 ##
23 ## Program Start
24 ##
25 usage() if @ARGV<2;
26 parse_command_line();
27 my $regex_string = build_regex_string() ;
28
29 # Allow first line to pass without filtering?
30 if ( $skip_first_line ) {
31 my $line = <$input_file>;
32 print $output_file $line ;
33 }
34
35
36 ##
37 ## Main loop
38 ##
39
40 ## I LOVE PERL (and hate it, at the same time...)
41 ##
42 ## So what's going on with the self-compiling perl code?
43 ##
44 ## 1. The program gets the find-pattern and the replace-pattern from the user (as strings).
45 ## 2. If both the find-pattern and replace-pattern are simple strings (not regex),
46 ## it would be possible to pre-compile a regex (with qr//) and use it in a 's///'
47 ## 3. If the find-pattern is a regex but the replace-pattern is a simple text string (with out back-references)
48 ## it is still possible to pre-compile the regex and use it in a 's///'
49 ## However,
50 ## 4. If the replace-pattern contains back-references, pre-compiling is not possible.
51 ## (in perl, you can't precompile a substitute regex).
52 ## See these examples:
53 ## http://www.perlmonks.org/?node_id=84420
54 ## http://stackoverflow.com/questions/125171/passing-a-regex-substitution-as-a-variable-in-perl
55 ##
56 ## The solution:
57 ## we build the regex string as valid perl code (in 'build_regex()', stored in $regex_string ),
58 ## Then eval() a new perl code that contains the substitution regex as inlined code.
59 ## Gotta love perl!
60
61 my $perl_program ;
62 if ( $find_in_specific_column ) {
63 # Find & replace in specific column
64
65 $perl_program = <<EOF;
66 while ( <STDIN> ) {
67 chomp ;
68 my \@columns = split ;
69
70 #not enough columns in this line - skip it
71 next if ( \@columns < $find_in_specific_column ) ;
72
73 \$columns [ $find_in_specific_column - 1 ] =~ $regex_string ;
74
75 print STDOUT join("\t", \@columns), "\n" ;
76 }
77 EOF
78
79 } else {
80 # Find & replace the entire line
81 $perl_program = <<EOF;
82 while ( <STDIN> ) {
83 $regex_string ;
84 print STDOUT;
85 }
86 EOF
87 }
88
89
90 # The dynamic perl code reads from STDIN and writes to STDOUT,
91 # so connect these handles (if the user didn't specifiy input / output
92 # file names, these might be already be STDIN/OUT, so the whole could be a no-op).
93 *STDIN = $input_file ;
94 *STDOUT = $output_file ;
95 eval $perl_program ;
96
97
98 ##
99 ## Program end
100 ##
101
102
103 sub parse_command_line()
104 {
105 my %opts ;
106 getopts('grsiwc:o:', \%opts) or die "$0: Invalid option specified\n";
107
108 die "$0: missing Find-Pattern argument\n" if (@ARGV==0);
109 $find_pattern = $ARGV[0];
110 die "$0: missing Replace-Pattern argument\n" if (@ARGV==1);
111 $replace_pattern = $ARGV[1];
112
113 $find_complete_words = ( exists $opts{w} ) ;
114 $find_case_insensitive = ( exists $opts{i} ) ;
115 $skip_first_line = ( exists $opts{s} ) ;
116 $find_pattern_is_regex = ( exists $opts{r} ) ;
117 $replace_global = ( exists $opts{g} ) ;
118
119 # Search in specific column ?
120 if ( defined $opts{c} ) {
121 $find_in_specific_column = $opts{c};
122
123 die "$0: invalid column number ($find_in_specific_column).\n"
124 unless $find_in_specific_column =~ /^\d+$/ ;
125
126 die "$0: invalid column number ($find_in_specific_column).\n"
127 if $find_in_specific_column <= 0;
128 }
129 else {
130 $find_in_specific_column = 0 ;
131 }
132
133 # Output File specified (instead of STDOUT) ?
134 if ( defined $opts{o} ) {
135 my $filename = $opts{o};
136 open $output_file, ">$filename" or die "$0: Failed to create output file '$filename': $!\n" ;
137 } else {
138 $output_file = *STDOUT ;
139 }
140
141
142 # Input file Specified (instead of STDIN) ?
143 if ( @ARGV>2 ) {
144 my $filename = $ARGV[2];
145 open $input_file, "<$filename" or die "$0: Failed to open input file '$filename': $!\n" ;
146 } else {
147 $input_file = *STDIN;
148 }
149 }
150
151 sub build_regex_string()
152 {
153 my $find_string ;
154 my $replace_string ;
155
156 if ( $find_pattern_is_regex ) {
157 $find_string = $find_pattern ;
158 $replace_string = $replace_pattern ;
159 } else {
160 $find_string = quotemeta $find_pattern ;
161 $replace_string = quotemeta $replace_pattern;
162 }
163
164 if ( $find_complete_words ) {
165 $find_string = "\\b($find_string)\\b";
166 }
167
168 my $regex_string = "s/$find_string/$replace_string/";
169
170 $regex_string .= "i" if ( $find_case_insensitive );
171 $regex_string .= "g" if ( $replace_global ) ;
172
173
174 return $regex_string;
175 }
176
177 sub usage()
178 {
179 print <<EOF;
180
181 Find and Replace
182 Copyright (C) 2009 - by A. Gordon ( gordon at cshl dot edu )
183
184 Usage: $0 [-o OUTPUT] [-g] [-r] [-w] [-i] [-c N] [-l] FIND-PATTERN REPLACE-PATTERN [INPUT-FILE]
185
186 -g - Global replace - replace all occurences in line/column.
187 Default - replace just the first instance.
188 -w - search for complete words (not partial sub-strings).
189 -i - case insensitive search.
190 -c N - check only column N, instead of entire line (line split by whitespace).
191 -l - skip first line (don't replace anything in it)
192 -r - FIND-PATTERN and REPLACE-PATTERN are perl regular expression,
193 usable inside a 's///' statement.
194 By default, they are used as verbatim text strings.
195 -o OUT - specify output file (default = STDOUT).
196 INPUT-FILE - (optional) read from file (default = from STDIN).
197
198
199 EOF
200
201 exit;
202 }