annotate escape_excel.pl @ 1:7726adcf91c6 draft

Uploaded Escape Excel Perl script
author pstew
date Fri, 17 Feb 2017 16:38:12 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
1 #!/usr/bin/perl -w
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
2
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
3 use Scalar::Util qw(looks_like_number);
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
4
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
5 $date_abbrev_hash{'jan'} = 'january';
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
6 $date_abbrev_hash{'feb'} = 'february';
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
7 $date_abbrev_hash{'mar'} = 'march';
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
8 $date_abbrev_hash{'apr'} = 'april';
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
9 $date_abbrev_hash{'may'} = 'may';
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
10 $date_abbrev_hash{'jun'} = 'jun';
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
11 $date_abbrev_hash{'jul'} = 'july';
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
12 $date_abbrev_hash{'aug'} = 'august';
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
13 $date_abbrev_hash{'sep'} = 'september';
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
14 $date_abbrev_hash{'oct'} = 'october';
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
15 $date_abbrev_hash{'nov'} = 'november';
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
16 $date_abbrev_hash{'dec'} = 'december';
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
17
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
18
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
19 sub is_number
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
20 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
21 # use what Perl thinks is a number first
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
22 if (looks_like_number($_[0]))
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
23 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
24 # Perl treats infinities as numbers, Excel does not
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
25 if ($_[0] =~ /^[+-]*inf/)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
26 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
27 return 0;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
28 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
29
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
30 return 1;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
31 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
32
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
33 # Perl cannot handle American comma separators within long numbers.
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
34 # Excel does, so we have to check for it.
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
35 # Excel doesn't handle European dot separators, at least not when it is
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
36 # set to the US locale (my test environment). I am going to leave this
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
37 # unsupported for now.
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
38 #
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
39 return ($_[0] =~ /^([+-]?)[0-9]+(,\d\d\d)*([Ee]([+-]?[0-9]+))?$/);
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
40 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
41
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
42
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
43 sub has_text_month
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
44 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
45 my $date_str = $_[0];
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
46 my $abbrev;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
47 my $full;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
48 my $xor;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
49 my $prefix_length;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
50
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
51 $candidate = '';
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
52 if ($date_str =~ /^([0-9]{1,4}[- \/]*)?([A-Za-z]{3,9})/)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
53 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
54 $candidate = lc $2;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
55 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
56
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
57 if ($candidate eq '')
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
58 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
59 return 0;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
60 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
61
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
62 $abbrev = substr $candidate, 0, 3;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
63 $full = $date_abbrev_hash{$abbrev};
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
64
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
65 # first three letters are not the start of a month
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
66 if (!defined($full))
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
67 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
68 return 0;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
69 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
70
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
71 # find common prefix
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
72 $xor = "$candidate" ^ "$full";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
73 $xor =~ /^\0*/;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
74 $prefix_length = $+[0];
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
75
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
76 # if the common prefix is the same as the full candidate, it is real
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
77 if (length $candidate eq $prefix_length)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
78 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
79 return 1;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
80 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
81
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
82 return 0;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
83 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
84
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
85 $escape_excel_paranoid_flag = 0;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
86 $escape_sci_flag = 1;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
87 $escape_zeroes_flag = 1;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
88 $escape_dates_flag = 1;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
89
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
90 # read in command line arguments
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
91 $num_files = 0;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
92 for ($i = 0; $i < @ARGV; $i++)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
93 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
94 $field = $ARGV[$i];
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
95
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
96 if ($field =~ /^-/)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
97 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
98 if ($field eq '--paranoid')
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
99 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
100 if ($escape_excel_paranoid_flag == 0)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
101 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
102 $escape_excel_paranoid_flag = 1;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
103 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
104 else
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
105 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
106 $escape_excel_paranoid_flag = 0;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
107 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
108 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
109 elsif ($field eq '--no-sci')
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
110 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
111 $escape_sci_flag = 0;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
112 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
113 elsif ($field eq '--no-zeroes')
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
114 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
115 $escape_zeroes_flag = 0;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
116 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
117 elsif ($field eq '--no-dates')
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
118 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
119 $escape_dates_flag = 0;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
120 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
121 else
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
122 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
123 printf "ABORT -- unknown option %s\n", $field;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
124 $syntax_error_flag = 1;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
125 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
126 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
127 else
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
128 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
129 if ($num_files == 1)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
130 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
131 $outname = $field;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
132 $num_files++;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
133 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
134 if ($num_files == 0)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
135 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
136 $filename = $field;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
137 $num_files++;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
138 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
139 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
140 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
141
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
142 # default to stdin if no filename given
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
143 if ($num_files == 0)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
144 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
145 $filename = '-';
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
146 $num_files = 1;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
147 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
148
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
149
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
150 # print syntax error message
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
151 if ($num_files == 0 || $syntax_error_flag)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
152 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
153 printf STDERR "Syntax: escape_excel.pl [options] tab_delimited_input.txt [output.txt]\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
154 printf STDERR " Options:\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
155 printf STDERR " --no-dates Do not escape text that looks like dates\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
156 printf STDERR " --no-sci Do not escape > #E (ex: 12E4) or >11 digit integer parts\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
157 printf STDERR " --no-zeroes Do not escape leading zeroes (ie. 012345)\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
158 printf STDERR " --paranoid Escape *ALL* non-numeric text\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
159 printf STDERR " WARNING -- Excel can take a LONG time to import\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
160 printf STDERR " text files where most fields are escaped.\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
161 printf STDERR " Copy / Paste Values can become near unusuable....\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
162 printf STDERR "\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
163 printf STDERR " Input file must be tab-delimited.\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
164 printf STDERR " Fields will be stripped of existing =\"\" escapes, enclosing \"\", leading \",\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
165 printf STDERR " and leading/trailing spaces, as they may all cause problems.\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
166 printf STDERR "\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
167 printf STDERR " Defaults to escaping most Excel mis-imported fields.\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
168 printf STDERR " Escapes a few extra date-like formats that Excel does not consider dates.\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
169 printf STDERR " Please send unhandled mis-imported field examples (other than gene symbols\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
170 printf STDERR " with 1-digit scientific notation, such as 2e4) to Eric.Welsh\@moffitt.org.\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
171 printf STDERR "\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
172 printf STDERR " Copy / Paste Values in Excel, after importing, to de-escape back into text.\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
173 exit(1);
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
174 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
175
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
176
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
177 # output to STDOUT
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
178 if ($num_files == 1)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
179 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
180 *OUTFILE = STDOUT;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
181 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
182 # output to specified file name
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
183 if ($num_files == 2)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
184 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
185 open OUTFILE, ">$outname" or die "can't open output $outname\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
186 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
187
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
188
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
189 # read in, escape, and print escaped lines
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
190 open INFILE, "$filename" or die "can't open $filename\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
191 while(defined($line=<INFILE>))
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
192 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
193 # strip newline characters
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
194 $line =~ s/[\r\n]+//g;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
195
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
196 @array = split /\t/, $line;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
197
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
198 # Strip any leading UTF-8 byte order mark so it won't corrupt the
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
199 # first field, since regular Perl I/O is not byte order mark aware.
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
200 #
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
201 # https://en.wikipedia.org/wiki/Byte_order_mark
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
202 #
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
203 # Various Microsoft products can emit these and screw things up....
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
204 #
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
205 for ($i = 0; $i < @array; $i++)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
206 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
207 $line =~ s/^//;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
208 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
209
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
210 for ($i = 0; $i < @array; $i++)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
211 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
212 # continue stripping problematic stuff until all has been stripped
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
213 do
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
214 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
215 $changed_flag = 0;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
216
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
217 # remove pre-existing escapes or start/end double quotes,
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
218 # since either messes up ="" escapes
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
219 while ($array[$i] =~ s/^\=*\"(.*?)\"$/$1/)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
220 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
221 $changed_flag = 1;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
222 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
223
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
224 # remove leading ", since they mess up Excel in general
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
225 #
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
226 # this must be done after "", but before leading/trailing spaces,
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
227 # since removing leading/trailing spaces could result in more
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
228 # full "" enclosures, which would then be messed up by removing
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
229 # only the leading "
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
230 #
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
231 while ($array[$i] =~ s/^\"//)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
232 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
233 $changed_flag = 1;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
234 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
235
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
236 # remove leading spaces, since they won't protect long numbers
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
237 if ($array[$i] =~ s/^\s+//)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
238 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
239 $changed_flag = 1;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
240 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
241
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
242 # remove trailing spaces, since they won't protect dates
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
243 if ($array[$i] =~ s/\s+$//)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
244 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
245 $changed_flag = 1;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
246 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
247 } while ($changed_flag)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
248 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
249
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
250 # escape fields
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
251 for ($i = 0; $i < @array; $i++)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
252 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
253 # Strange but true -- 'text doesn't escape text properly in Excel
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
254 # when you try to use it in a text file to import. It will not
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
255 # auto-strip the leading ' like it does when you type it in a live
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
256 # spreadsheet. "text" doesn't, either. Oddly, ="text" DOES work,
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
257 # but an equation containing just a text string and no actual
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
258 # equation doesn't make much sense. However, it works, so that's
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
259 # what I use here to escape fields into mangle-protected text.
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
260
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
261 # escape numeric problems
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
262 if (is_number($array[$i]))
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
263 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
264 # keep leading zeroes for >1 digit before the decimal point
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
265 if ($escape_zeroes_flag && $array[$i] =~ /^([+-]?)0[0-9]/)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
266 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
267 $array[$i] = sprintf "=\"%s\"", $array[$i];
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
268 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
269
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
270 # Escape scientific notation with >= 2 digits before the E,
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
271 # since they are likely accessions or plate/well identifiers.
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
272 #
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
273 # Also escape numbers with >11 digits before the decimal point.
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
274 # >11 is when it displays scientific notation in General format,
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
275 # which can result in corruption when saved to text.
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
276 # >15 would be the limit at which it loses precision internally.
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
277 #
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
278 # NOTE -- if there is a + or - at the beginning, this rule
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
279 # will not trigger. Undecided if this is desired or not.
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
280 # Probably desired behavior, since +/- would indicate that
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
281 # it is probably a true number, and not an accession or
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
282 # plate/well identifier.
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
283 #
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
284 elsif ($escape_sci_flag)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
285 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
286 # strip commas before counting digits
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
287 $temp = $array[$i];
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
288 $temp =~ s/\,//g;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
289
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
290 if ($temp =~ /^([1-9][0-9]{11,}|[0-9]{2,}[eE])/)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
291 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
292 $array[$i] = sprintf "=\"%s\"", $array[$i];
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
293 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
294 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
295 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
296 # escape all text if paranoid
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
297 elsif ($escape_excel_paranoid_flag)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
298 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
299 $array[$i] = sprintf "=\"%s\"", $array[$i];
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
300 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
301 # escape dates
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
302 elsif ($escape_dates_flag)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
303 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
304 # escape single quote at beginning of line
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
305 if ($array[$i] =~ /^'/)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
306 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
307 $array[$i] = sprintf "=\"%s\"", $array[$i];
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
308 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
309
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
310 # prevent conversion into formulas
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
311 elsif ($array[$i] =~ /^\=/)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
312 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
313 $array[$i] = sprintf "=\"%s\"", $array[$i];
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
314 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
315 # Excel is smart enough to treat all +/- as not an equation
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
316 # but, otherwise, it will convert anything starting with +/-
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
317 # into "#NAME?" as a failed invalid equation
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
318 elsif ($array[$i] =~ /^[+-]/ && !($array[$i] =~ /^[+-]+$/))
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
319 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
320 $array[$i] = sprintf "=\"%s\"", $array[$i];
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
321 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
322
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
323 # check for time and/or date stuff
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
324 else
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
325 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
326 $time = '';
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
327 $date = '';
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
328
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
329 # attempt to guess at how excel might autoconvert into time
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
330 # allow letter/punctuation at end if it could be part of a date
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
331 # it would get too complicated to handle date-ness correctly,
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
332 # since I'm already resorting to negative look-ahead
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
333 if ($array[$i] =~ /\b(([0-9]+\s+(AM|PM|A|P)|[0-9]+:[0-9]+(:[0-9.]+)?)(\s+(AM|PM|A|P))?)(?!([^-\/, 0-9ADFJMNOSadfjmnos]))/)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
334 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
335 $time = $1;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
336 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
337
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
338 $strip_time = $array[$i];
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
339 if ($time =~ /\S/)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
340 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
341 $strip_time =~ s/\Q$time\E//;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
342 $strip_time =~ s/^\s+//;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
343 $strip_time =~ s/\s+$//
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
344 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
345
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
346 # text date, month in the middle
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
347 if ($strip_time =~ /\b([0-9]{1,4}[- \/]*Jan[A-Za-z]{0,6}([- \/]*[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
348 $strip_time =~ /\b([0-9]{1,4}[- \/]*Feb[A-Za-z]{0,6}([- \/]*[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
349 $strip_time =~ /\b([0-9]{1,4}[- \/]*Mar[A-Za-z]{0,6}([- \/]*[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
350 $strip_time =~ /\b([0-9]{1,4}[- \/]*Apr[A-Za-z]{0,6}([- \/]*[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
351 $strip_time =~ /\b([0-9]{1,4}[- \/]*May[A-Za-z]{0,6}([- \/]*[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
352 $strip_time =~ /\b([0-9]{1,4}[- \/]*Jun[A-Za-z]{0,6}([- \/]*[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
353 $strip_time =~ /\b([0-9]{1,4}[- \/]*Jul[A-Za-z]{0,6}([- \/]*[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
354 $strip_time =~ /\b([0-9]{1,4}[- \/]*Aug[A-Za-z]{0,6}([- \/]*[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
355 $strip_time =~ /\b([0-9]{1,4}[- \/]*Sep[A-Za-z]{0,6}([- \/]*[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
356 $strip_time =~ /\b([0-9]{1,4}[- \/]*Oct[A-Za-z]{0,6}([- \/]*[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
357 $strip_time =~ /\b([0-9]{1,4}[- \/]*Nov[A-Za-z]{0,6}([- \/]*[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
358 $strip_time =~ /\b([0-9]{1,4}[- \/]*Dec[A-Za-z]{0,6}([- \/]*[0-9]{1,4})?)\b/i)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
359 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
360 $temp = $1;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
361
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
362 if (has_text_month($temp))
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
363 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
364 $date = $temp;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
365 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
366 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
367
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
368 # text date, month first
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
369 elsif ($strip_time =~ /\b(Jan[A-Za-z]{0,6}[- \/]*[0-9]{1,4}([- \/]+[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
370 $strip_time =~ /\b(Feb[A-Za-z]{0,6}[- \/]*[0-9]{1,4}([- \/]+[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
371 $strip_time =~ /\b(Mar[A-Za-z]{0,6}[- \/]*[0-9]{1,4}([- \/]+[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
372 $strip_time =~ /\b(Apr[A-Za-z]{0,6}[- \/]*[0-9]{1,4}([- \/]+[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
373 $strip_time =~ /\b(May[A-Za-z]{0,6}[- \/]*[0-9]{1,4}([- \/]+[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
374 $strip_time =~ /\b(Jun[A-Za-z]{0,6}[- \/]*[0-9]{1,4}([- \/]+[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
375 $strip_time =~ /\b(Jul[A-Za-z]{0,6}[- \/]*[0-9]{1,4}([- \/]+[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
376 $strip_time =~ /\b(Aug[A-Za-z]{0,6}[- \/]*[0-9]{1,4}([- \/]+[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
377 $strip_time =~ /\b(Sep[A-Za-z]{0,6}[- \/]*[0-9]{1,4}([- \/]+[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
378 $strip_time =~ /\b(Oct[A-Za-z]{0,6}[- \/]*[0-9]{1,4}([- \/]+[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
379 $strip_time =~ /\b(Nov[A-Za-z]{0,6}[- \/]*[0-9]{1,4}([- \/]+[0-9]{1,4})?)\b/i ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
380 $strip_time =~ /\b(Dec[A-Za-z]{0,6}[- \/]*[0-9]{1,4}([- \/]+[0-9]{1,4})?)\b/i)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
381 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
382 $temp = $1;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
383
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
384 if (has_text_month($temp))
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
385 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
386 $date = $temp;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
387 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
388 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
389
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
390 # possibly a numeric date
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
391 elsif ($strip_time =~ /\b([0-9]{1,4}[- \/]+[0-9]{1,2}[- \/]+[0-9]{1,2})\b/ ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
392 $strip_time =~ /\b([0-9]{1,2}[- \/]+[0-9]{1,4}[- \/]+[0-9]{1,2})\b/ ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
393 $strip_time =~ /\b([0-9]{1,2}[- \/]+[0-9]{1,2}[- \/]+[0-9]{1,4})\b/ ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
394 $strip_time =~ /\b([0-9]{1,2}[- \/]+[0-9]{1,4})\b/ ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
395 $strip_time =~ /\b([0-9]{1,4}[- \/]+[0-9]{1,2})\b/)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
396 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
397 $date = $1;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
398 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
399
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
400 # be sure that date and time anchor the ends
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
401 # mix of time and date
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
402 if ($time =~ /\S/ && $date =~ /\S/)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
403 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
404 if ($array[$i] =~ /^\Q$time\E(.*)\Q$date\E$/ ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
405 $array[$i] =~ /^\Q$date\E(.*)\Q$time\E$/)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
406 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
407 $middle = $1;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
408
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
409 # allow blank
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
410 # allow for purely whitespace
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
411 # allow for a single hyphen, slash, comma
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
412 # allow for multiple spaces before and/or after
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
413 if ($middle eq '' ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
414 $middle =~ /^\s+$/ ||
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
415 $middle =~ /^\s*[-\/,]\s*$/)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
416 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
417 $array[$i] = sprintf "=\"%s\"", $array[$i];
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
418 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
419 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
420 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
421 # only time
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
422 elsif ($time =~ /\S/)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
423 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
424 if ($array[$i] =~ /^\Q$time\E$/)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
425 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
426 $array[$i] = sprintf "=\"%s\"", $array[$i];
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
427 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
428 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
429 # only date
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
430 elsif ($date =~ /\S/)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
431 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
432 if ($array[$i] =~ /^\Q$date\E$/)
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
433 {
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
434 $array[$i] = sprintf "=\"%s\"", $array[$i];
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
435 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
436 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
437 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
438 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
439 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
440
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
441 # make the new escaped line
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
442 $line_escaped = join "\t", @array;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
443
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
444 # print it
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
445 print OUTFILE "$line_escaped\n";
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
446 }
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
447 close INFILE;
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
448
7726adcf91c6 Uploaded Escape Excel Perl script
pstew
parents:
diff changeset
449 close OUTFILE;