annotate tools/unix_tools/find_and_replace.pl @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/perl
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 use strict;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 use warnings;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 use Getopt::Std;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 sub parse_command_line();
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 sub build_regex_string();
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 sub usage();
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 my $input_file ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 my $output_file;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 my $find_pattern ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 my $replace_pattern ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 my $find_complete_words ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 my $find_pattern_is_regex ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 my $find_in_specific_column ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 my $find_case_insensitive ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 my $replace_global ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 my $skip_first_line ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 ##
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 ## Program Start
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 ##
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 usage() if @ARGV<2;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 parse_command_line();
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 my $regex_string = build_regex_string() ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 # Allow first line to pass without filtering?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 if ( $skip_first_line ) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 my $line = <$input_file>;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 print $output_file $line ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 ##
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 ## Main loop
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 ##
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 ## I LOVE PERL (and hate it, at the same time...)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 ##
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 ## So what's going on with the self-compiling perl code?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 ##
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 ## 1. The program gets the find-pattern and the replace-pattern from the user (as strings).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 ## 2. If both the find-pattern and replace-pattern are simple strings (not regex),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 ## it would be possible to pre-compile a regex (with qr//) and use it in a 's///'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 ## 3. If the find-pattern is a regex but the replace-pattern is a simple text string (with out back-references)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 ## it is still possible to pre-compile the regex and use it in a 's///'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 ## However,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 ## 4. If the replace-pattern contains back-references, pre-compiling is not possible.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 ## (in perl, you can't precompile a substitute regex).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 ## See these examples:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 ## http://www.perlmonks.org/?node_id=84420
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 ## http://stackoverflow.com/questions/125171/passing-a-regex-substitution-as-a-variable-in-perl
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 ##
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 ## The solution:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 ## we build the regex string as valid perl code (in 'build_regex()', stored in $regex_string ),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 ## Then eval() a new perl code that contains the substitution regex as inlined code.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 ## Gotta love perl!
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 my $perl_program ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 if ( $find_in_specific_column ) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 # Find & replace in specific column
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 $perl_program = <<EOF;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 while ( <STDIN> ) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 chomp ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 my \@columns = split ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 #not enough columns in this line - skip it
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 next if ( \@columns < $find_in_specific_column ) ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 \$columns [ $find_in_specific_column - 1 ] =~ $regex_string ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 print STDOUT join("\t", \@columns), "\n" ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 EOF
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 } else {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 # Find & replace the entire line
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 $perl_program = <<EOF;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 while ( <STDIN> ) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 $regex_string ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 print STDOUT;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 EOF
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 # The dynamic perl code reads from STDIN and writes to STDOUT,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 # so connect these handles (if the user didn't specifiy input / output
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 # file names, these might be already be STDIN/OUT, so the whole could be a no-op).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 *STDIN = $input_file ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 *STDOUT = $output_file ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 eval $perl_program ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 ##
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 ## Program end
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 ##
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 sub parse_command_line()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 my %opts ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 getopts('grsiwc:o:', \%opts) or die "$0: Invalid option specified\n";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 die "$0: missing Find-Pattern argument\n" if (@ARGV==0);
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 $find_pattern = $ARGV[0];
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 die "$0: missing Replace-Pattern argument\n" if (@ARGV==1);
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 $replace_pattern = $ARGV[1];
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 $find_complete_words = ( exists $opts{w} ) ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 $find_case_insensitive = ( exists $opts{i} ) ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 $skip_first_line = ( exists $opts{s} ) ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 $find_pattern_is_regex = ( exists $opts{r} ) ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 $replace_global = ( exists $opts{g} ) ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 # Search in specific column ?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 if ( defined $opts{c} ) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 $find_in_specific_column = $opts{c};
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 die "$0: invalid column number ($find_in_specific_column).\n"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 unless $find_in_specific_column =~ /^\d+$/ ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 die "$0: invalid column number ($find_in_specific_column).\n"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 if $find_in_specific_column <= 0;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 else {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 $find_in_specific_column = 0 ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 # Output File specified (instead of STDOUT) ?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 if ( defined $opts{o} ) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 my $filename = $opts{o};
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 open $output_file, ">$filename" or die "$0: Failed to create output file '$filename': $!\n" ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 } else {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 $output_file = *STDOUT ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 # Input file Specified (instead of STDIN) ?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143 if ( @ARGV>2 ) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 my $filename = $ARGV[2];
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145 open $input_file, "<$filename" or die "$0: Failed to open input file '$filename': $!\n" ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146 } else {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 $input_file = *STDIN;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151 sub build_regex_string()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152 {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153 my $find_string ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154 my $replace_string ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156 if ( $find_pattern_is_regex ) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157 $find_string = $find_pattern ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 $replace_string = $replace_pattern ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159 } else {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160 $find_string = quotemeta $find_pattern ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 $replace_string = quotemeta $replace_pattern;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164 if ( $find_complete_words ) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165 $find_string = "\\b($find_string)\\b";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
168 my $regex_string = "s/$find_string/$replace_string/";
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
169
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
170 $regex_string .= "i" if ( $find_case_insensitive );
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
171 $regex_string .= "g" if ( $replace_global ) ;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
172
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
173
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
174 return $regex_string;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
175 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
176
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
177 sub usage()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
178 {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
179 print <<EOF;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
180
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
181 Find and Replace
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
182 Copyright (C) 2009 - by A. Gordon ( gordon at cshl dot edu )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
183
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
184 Usage: $0 [-o OUTPUT] [-g] [-r] [-w] [-i] [-c N] [-l] FIND-PATTERN REPLACE-PATTERN [INPUT-FILE]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
185
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
186 -g - Global replace - replace all occurences in line/column.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
187 Default - replace just the first instance.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
188 -w - search for complete words (not partial sub-strings).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
189 -i - case insensitive search.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
190 -c N - check only column N, instead of entire line (line split by whitespace).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
191 -l - skip first line (don't replace anything in it)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
192 -r - FIND-PATTERN and REPLACE-PATTERN are perl regular expression,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
193 usable inside a 's///' statement.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
194 By default, they are used as verbatim text strings.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
195 -o OUT - specify output file (default = STDOUT).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
196 INPUT-FILE - (optional) read from file (default = from STDIN).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
197
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
198
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
199 EOF
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
200
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
201 exit;
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
202 }