0
|
1 #!/usr/bin/perl
|
|
2 #parse_primersearch.pl
|
|
3 #reformat EMBOSS primersearch output into columnar Galaxy interval format
|
5
|
4 #usage perl parse_primersearch.pl <input primersearch file> <tabular output file>
|
0
|
5 #Copyright 2012 John McCallum
|
|
6 #New Zealand Institute for Plant and Food Research
|
|
7 #This program is free software: you can redistribute it and/or modify
|
|
8 # it under the terms of the GNU General Public License as published by
|
|
9 # the Free Software Foundation, either version 3 of the License, or
|
|
10 # (at your option) any later version.
|
|
11 #
|
|
12 # This program is distributed in the hope that it will be useful,
|
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15 # GNU General Public License for more details.
|
|
16 #
|
|
17 # You should have received a copy of the GNU General Public License
|
|
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
|
|
20 open (IN, "<$ARGV[0]");
|
|
21 open (OUT, ">$ARGV[1]");
|
|
22
|
5
|
23
|
0
|
24
|
|
25
|
|
26
|
|
27 while (<IN>) {
|
|
28 /^Primer name (\S+)/ && ($name = $1); # get primer set name
|
|
29 # Modified to cope with unnamed sequence input 28/7/05
|
|
30 /Sequence: (\S+)/ && print OUT $name,"\t",$1;
|
|
31 /Sequence:(\s{4,})/ && print OUT $name,"\t","unnamed_seq";
|
|
32 /hits forward strand at (\d+) with (\d) mismatches/ && ($start = $1) && print OUT "\t",$2,"\t",$start,;
|
|
33 /Amplimer length: (\S+)/ && ($amp_length = $1) && print OUT "\t",$start + $amp_length,"\t",$1,"\n";
|
|
34 }
|
|
35
|
|
36 close( IN );
|
|
37 close( OUT );
|