annotate PGAP-1.2.1/Converter_finished.pl @ 3:bbb2c473664c draft

Uploaded
author dereeper
date Thu, 24 Jun 2021 15:08:13 +0000
parents 83e62a1aeeeb
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
2 use strict;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
3 use warnings;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
4 use Getopt::Std;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
5
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
6 my %opt;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
7 getopts('S:I:O:',\%opt);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
8
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
9 my @usage=qq(
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
10 Version: 2016042201
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
11 Usage: perl Converter_finished.pl [options]
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
12
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
13 Options:
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
14
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
15 -S String Input the strains nickname,
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
16 If 2 or more, join them with '+',
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
17 For example: CT18+NC_011834+SPA
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
18 -I String Input file directory
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
19 -O String Output file directory
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
20 );
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
21
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
22 if (!scalar(keys %opt))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
23 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
24 print join("\n",@usage)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
25 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
26 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
27
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
28 my @sp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
29 if (exists($opt{"S"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
30 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
31 @sp=split(/\+/,$opt{"S"});
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
32 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
33 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
34 print "-S could not be empty!";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
35 print join("\n",@usage)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
36 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
37 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
38
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
39 my $output;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
40 if (exists($opt{"O"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
41 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
42 $output=$opt{"O"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
43 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
44 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
45 print "-O could not be empty!";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
46 print join("\n",@usage)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
47 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
48 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
49
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
50 my $input;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
51 if (exists($opt{"I"}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
52 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
53 $input=$opt{"I"};
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
54 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
55 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
56 print "-I could not be empty!";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
57 print join("\n",@usage)."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
58 exit;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
59 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
60
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
61 my $sp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
62 my $line;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
63 my @row;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
64 my @tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
65 my %hash;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
66 my $flag;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
67 my $file;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
68
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
69
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
70 if ((-e $output) and ((-d $output)))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
71 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
72 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
73 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
74 mkdir($output);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
75 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
76
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
77 if ($input!~/\/$/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
78 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
79 $input=$input."/";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
80 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
81
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
82
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
83 if ($output!~/\/$/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
84 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
85 $output=$output."/";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
86 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
87
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
88
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
89 foreach $sp (@sp)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
90 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
91 %hash=();
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
92 $file=$input.$sp.".faa";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
93 open(F,$file) or die "could not open $file";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
94 open(R,">$output$sp.pep");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
95 while ($line=<F>)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
96 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
97 if ($line=~/^>/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
98 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
99 @row=split(/\|/,$line);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
100 print R ">$row[1]\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
101 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
102 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
103 print R $line;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
104 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
105 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
106 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
107 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
108
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
109 $file=$input.$sp.".ptt";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
110 open(F,"$file") or die "could not open $file";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
111 open(R,">$output$sp.function");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
112 $_=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
113 $_=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
114 $_=<F>;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
115 while ($line=<F>)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
116 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
117 chomp($line);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
118 @row=split(/\t/,$line);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
119 print R $row[3]."\t".$row[7]."\t".$row[8]."\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
120 @tmp=split(/\.\./,$row[0]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
121 if ($row[1] eq "+")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
122 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
123 $hash{$tmp[0]."-".$tmp[@tmp-1]}=$row[3];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
124 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
125 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
126 $hash{"c".$tmp[@tmp-1]."-".$tmp[0]}=$row[3];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
127 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
128 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
129 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
130 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
131
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
132 $file=$input.$sp.".ffn";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
133 open(F,"$file") or die "could not open $file";;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
134 open(R,">$output/$sp.nuc");
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
135 while ($line=<F>)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
136 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
137 if ($line=~/^>/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
138 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
139 my $key=&getKey($line);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
140 if (exists($hash{$key}))
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
141 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
142 $flag=1;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
143 print R ">$hash{$key}\n";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
144 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
145 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
146 $flag=0;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
147 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
148 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
149 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
150 if ($flag)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
151 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
152 print R $line;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
153 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
154 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
155 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
156 close(R);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
157 close(F);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
158 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
159
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
160 sub getKey()
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
161 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
162 (my $line)=@_;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
163 my @tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
164 my $strand;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
165 chomp($line);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
166 @tmp=split(/ /,$line);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
167 @tmp=split(/\:/,$tmp[0]);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
168
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
169 if($tmp[@tmp-1]=~/c/)
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
170 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
171 $strand="-";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
172 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
173 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
174 $strand="+";
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
175 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
176 $_=$tmp[@tmp-1];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
177 s/c//g;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
178 s/ //g;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
179 @tmp=split(/\,|-/,$_);
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
180 @tmp=sort{$a<=>$b} @tmp;
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
181 if($strand eq "-")
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
182 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
183 return "c".$tmp[@tmp-1]."-".$tmp[0];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
184 }else
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
185 {
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
186 return $tmp[0]."-".$tmp[@tmp-1];
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
187 }
83e62a1aeeeb Uploaded
dereeper
parents:
diff changeset
188 }