annotate pfam_annot/annota.pl @ 0:68a3648c7d91 draft default tip

Uploaded
author matteoc
date Thu, 22 Dec 2016 04:45:31 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1 #!/usr/bin/perl -w
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
2
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
3 use strict;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
4 my $d_file="/home/inmare/galaxy/tools/pfam_annot/pfamA.txt";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
5 open(IN,$d_file);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
6 my %decode=();
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
7 my %clan_decode;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
8 my $id="";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
9 my %c=();
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
10
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
11
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
12 my $prot_file=shift;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
13 my $pfam_file=shift;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
14 my $prefix=shift;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
15
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
16 while(<IN>)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
17 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
18 if ($_=~/^\d/)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
19 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
20 my @vl=(split(/\t+/));
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
21 $decode{$vl[1]}="$vl[3]<br>";#$vl[8] $vl[9]";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
22 my $cc=0;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
23 my %repeated=();
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
24 foreach my $v (@vl)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
25 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
26 $v=~s/\[\d+\]/ /g;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
27 last if $v=~/hmmbuild/;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
28 last if $cc>10;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
29 next if $v=~/anon/;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
30 next if $v=~/Bates/;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
31 next if $v=~/Cogis/;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
32 next if $v=~/Coggis/;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
33 next if $v=~/Bateman/;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
34 next if $v=~/Sonnhammer/;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
35 next if $v=~/Finn/;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
36 next if $v=~/Studholme/;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
37 next if $v=~/Kerrison/;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
38 next if $repeated{$v};
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
39 next if $v eq $vl[3];
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
40 next unless length($v)>=20 && $cc<=9;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
41 $decode{$vl[1]}.="$v ";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
42 $repeated{$v}++;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
43 $cc++;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
44 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
45 #print "$vl[1] $decode{$vl[1]}\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
46 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
47 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
48 close(IN);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
49
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
50 my $clan_file="/home/inmare/galaxy/tools/pfam_annot/clans.txt";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
51 open(IN,$clan_file);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
52 while(<IN>)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
53 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
54 my @vl=(split(/\t/));
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
55 #$clan_decode{$vl[1]}="$vl[3]";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
56 my $cc=0;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
57 foreach my $v (@vl)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
58 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
59 $cc++;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
60 $v=~s/\[\d+\]/ /g;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
61 $clan_decode{$vl[1]}.="$v " if length($v) >=30 && $cc<=10;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
62 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
63
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
64 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
65 my %plasm=();
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
66 open(IN,"$prot_file");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
67 while(<IN>)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
68 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
69 if ($_=~/^>(.*)/)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
70 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
71 $id=$1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
72 $id=(split(/\s+/,$id))[0];
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
73 if ($id=~/#/)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
74 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
75 my $pid=(split(/\#/,$id))[0];
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
76 $plasm{$pid}++;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
77 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
78 }else{
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
79 chomp;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
80 $c{$id}.=$_;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
81 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
82 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
83 close(IN);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
84
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
85 open(OUT,">$prefix");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
86 print OUT "<html>\n<head>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
87 print OUT "<style type=\"text/css\">\nspan {\n\ttext-decoration:underline;\n\tcolor:blue;\n\tcursor:pointer;\n}\n</style>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
88 print OUT "<script>\nfunction show(elementID) {\n\tvar ele = document.getElementById(elementID);\n\tif (!ele) {\n\t\talert(\"no such element\");\t\treturn;\n\t}\n\tvar pages = document.getElementsByClassName('page');\n\tfor(var i = 0; i < pages.length; i++) {\n\t\tpages[i].style.display = 'none';\n\t}\n\tele.style.display = 'block';\n}\n</script>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
89 #print OUT "<script src=\"script.js\"></script>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
90 #print OUT "<script>\n\tshow(elementID)\n</script>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
91 print OUT "</head>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
92 my $color="\"#czb9dz\"";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
93 my %printed;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
94 open(IN,$pfam_file);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
95 print OUT "Proteins with PFAM domains:\n<br><br>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
96 my @dd=keys %plasm;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
97 if ($#dd>0)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
98 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
99 print OUT "<p>Show results ";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
100 for (my $i=0;$i<=$#dd;$i++)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
101 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
102 if ($i==$#dd)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
103 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
104 print OUT "<span onclick=\"show(\'$dd[$i]\');\">$dd[$i]</span>.</p>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
105 }else{
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
106 print OUT "<span onclick=\"show(\'$dd[$i]\');\">$dd[$i]</span>,\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
107 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
108 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
109 }else{
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
110 print OUT "<div>\n<table cellpadding=\"0\" width=650>\n"; #div per ogni plasmide|
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
111 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
112 my $ntokens=0;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
113 my $prev_plasmid="";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
114 my $curr_plasmid="";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
115 my $np=0;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
116 while(<IN>)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
117 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
118 next if $_=~/^\#/;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
119 my ($name,$domain,$clan)=(split(/\s+/))[0,5,-1];
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
120 next unless $name;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
121 if ($name=~/#/)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
122 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
123 $curr_plasmid=(split(/\#/,$name))[0];
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
124 if ($curr_plasmid ne $prev_plasmid)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
125 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
126 if ($np>0)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
127 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
128 print OUT "</table>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
129 print OUT "</div>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
130 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
131 print OUT "<div id=\"$curr_plasmid\" class=\"page\" style=\"\">\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
132 print OUT "<table cellpadding=\"0\" width=650>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
133 $np++;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
134 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
135 $prev_plasmid=$curr_plasmid;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
136 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
137
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
138 $domain=~s/\.\d+//;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
139 unless ($printed{$name})
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
140 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
141 my $seq=$c{$name};
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
142
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
143 $seq=~s/\*//g;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
144 $seq=form($seq,90);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
145 print OUT "<td>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
146 print OUT "<HR SIZE=3 WIDTH=80%></HR>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
147 print OUT "<center><b>$name</b><br>\n</center>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
148 print OUT "</td>\n<tr></tr>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
149 print OUT "<td bgcolor=$color>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
150 print OUT "<pre> \n$seq\n </pre>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
151 print OUT "</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
152 $ntokens=2;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
153 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
154 my $hd=uc $domain;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
155 #<a href="http://www.canoro.altervista.org/" class="nav" target="_blank">www.canoro.altervista.org</a>
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
156 if ($decode{$domain})
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
157 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
158 my $ddes=$decode{$domain};
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
159 $ddes=~s/\s+/ /g;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
160 if ($ntokens % 2==0)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
161 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
162 print OUT "<td>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
163 }else{
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
164 print OUT "<td bgcolor=$color>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
165 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
166
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
167 print OUT "<p align=\"left\">\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
168 print OUT "<a href=http://pfam.xfam.org/family/$hd> $domain</a>\n<p align=\"justify\">$ddes</p>\n\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
169 print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
170 $ntokens++;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
171 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
172 if ($clan_decode{$clan})
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
173 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
174 my $clanD=$clan_decode{$clan};
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
175 $clanD=~s/\s+/ /g;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
176 next if $decode{$domain} eq $clan_decode{$clan};
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
177 my $ddes=$decode{$domain};
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
178 if ($ntokens % 2==0)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
179 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
180 print OUT "<td>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
181 }else{
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
182 print OUT "<td bgcolor=$color>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
183 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
184
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
185 print OUT "<p align=\"left\">\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
186 print OUT "<a href=http://pfam.xfam.org/clan/$clan> $clan</a>\n <p align=\"justify\">$clanD</p>\n\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
187 print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
188 $ntokens++;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
189 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
190 $printed{$name}=1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
191 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
192
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
193 #if ($curr_plasmid ne "")
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
194 #{
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
195 print OUT "</table>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
196 print OUT "</div>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
197 print OUT "</body>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
198 #}
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
199 #print OUT "<br><br>Proteins without PFAM domains:\n<br>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
200 #foreach my $seq (keys %c)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
201 #{
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
202 # next if $printed{$seq};
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
203 # print OUT "<>$seq</pre>\n\n<br><br><left>\n$c{$seq}</left><br>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
204 # print OUT "<HR SIZE=3 WIDTH=80%>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
205 #}
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
206 #print OUT "</table>\n</div>\n</body>\n</html>\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
207 #close(OUT);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
208
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
209 sub form
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
210 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
211 my $string=$_[0];
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
212 my $len=$_[1];
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
213 my $outS="";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
214 for (my $i=0;$i<=length($string);$i+=$len)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
215 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
216 $outS.=substr($string,$i,$len)."\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
217 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
218 #print "A:$outS";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
219 #$outS=~s/\s+//g;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
220 return $outS;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
221 }