comparison pfam_annot/annota.pl @ 0:68a3648c7d91 draft default tip

Uploaded
author matteoc
date Thu, 22 Dec 2016 04:45:31 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:68a3648c7d91
1 #!/usr/bin/perl -w
2
3 use strict;
4 my $d_file="/home/inmare/galaxy/tools/pfam_annot/pfamA.txt";
5 open(IN,$d_file);
6 my %decode=();
7 my %clan_decode;
8 my $id="";
9 my %c=();
10
11
12 my $prot_file=shift;
13 my $pfam_file=shift;
14 my $prefix=shift;
15
16 while(<IN>)
17 {
18 if ($_=~/^\d/)
19 {
20 my @vl=(split(/\t+/));
21 $decode{$vl[1]}="$vl[3]<br>";#$vl[8] $vl[9]";
22 my $cc=0;
23 my %repeated=();
24 foreach my $v (@vl)
25 {
26 $v=~s/\[\d+\]/ /g;
27 last if $v=~/hmmbuild/;
28 last if $cc>10;
29 next if $v=~/anon/;
30 next if $v=~/Bates/;
31 next if $v=~/Cogis/;
32 next if $v=~/Coggis/;
33 next if $v=~/Bateman/;
34 next if $v=~/Sonnhammer/;
35 next if $v=~/Finn/;
36 next if $v=~/Studholme/;
37 next if $v=~/Kerrison/;
38 next if $repeated{$v};
39 next if $v eq $vl[3];
40 next unless length($v)>=20 && $cc<=9;
41 $decode{$vl[1]}.="$v ";
42 $repeated{$v}++;
43 $cc++;
44 }
45 #print "$vl[1] $decode{$vl[1]}\n";
46 }
47 }
48 close(IN);
49
50 my $clan_file="/home/inmare/galaxy/tools/pfam_annot/clans.txt";
51 open(IN,$clan_file);
52 while(<IN>)
53 {
54 my @vl=(split(/\t/));
55 #$clan_decode{$vl[1]}="$vl[3]";
56 my $cc=0;
57 foreach my $v (@vl)
58 {
59 $cc++;
60 $v=~s/\[\d+\]/ /g;
61 $clan_decode{$vl[1]}.="$v " if length($v) >=30 && $cc<=10;
62 }
63
64 }
65 my %plasm=();
66 open(IN,"$prot_file");
67 while(<IN>)
68 {
69 if ($_=~/^>(.*)/)
70 {
71 $id=$1;
72 $id=(split(/\s+/,$id))[0];
73 if ($id=~/#/)
74 {
75 my $pid=(split(/\#/,$id))[0];
76 $plasm{$pid}++;
77 }
78 }else{
79 chomp;
80 $c{$id}.=$_;
81 }
82 }
83 close(IN);
84
85 open(OUT,">$prefix");
86 print OUT "<html>\n<head>\n";
87 print OUT "<style type=\"text/css\">\nspan {\n\ttext-decoration:underline;\n\tcolor:blue;\n\tcursor:pointer;\n}\n</style>\n";
88 print OUT "<script>\nfunction show(elementID) {\n\tvar ele = document.getElementById(elementID);\n\tif (!ele) {\n\t\talert(\"no such element\");\t\treturn;\n\t}\n\tvar pages = document.getElementsByClassName('page');\n\tfor(var i = 0; i < pages.length; i++) {\n\t\tpages[i].style.display = 'none';\n\t}\n\tele.style.display = 'block';\n}\n</script>\n";
89 #print OUT "<script src=\"script.js\"></script>\n";
90 #print OUT "<script>\n\tshow(elementID)\n</script>\n";
91 print OUT "</head>\n";
92 my $color="\"#czb9dz\"";
93 my %printed;
94 open(IN,$pfam_file);
95 print OUT "Proteins with PFAM domains:\n<br><br>\n";
96 my @dd=keys %plasm;
97 if ($#dd>0)
98 {
99 print OUT "<p>Show results ";
100 for (my $i=0;$i<=$#dd;$i++)
101 {
102 if ($i==$#dd)
103 {
104 print OUT "<span onclick=\"show(\'$dd[$i]\');\">$dd[$i]</span>.</p>\n";
105 }else{
106 print OUT "<span onclick=\"show(\'$dd[$i]\');\">$dd[$i]</span>,\n";
107 }
108 }
109 }else{
110 print OUT "<div>\n<table cellpadding=\"0\" width=650>\n"; #div per ogni plasmide|
111 }
112 my $ntokens=0;
113 my $prev_plasmid="";
114 my $curr_plasmid="";
115 my $np=0;
116 while(<IN>)
117 {
118 next if $_=~/^\#/;
119 my ($name,$domain,$clan)=(split(/\s+/))[0,5,-1];
120 next unless $name;
121 if ($name=~/#/)
122 {
123 $curr_plasmid=(split(/\#/,$name))[0];
124 if ($curr_plasmid ne $prev_plasmid)
125 {
126 if ($np>0)
127 {
128 print OUT "</table>\n";
129 print OUT "</div>\n";
130 }
131 print OUT "<div id=\"$curr_plasmid\" class=\"page\" style=\"\">\n";
132 print OUT "<table cellpadding=\"0\" width=650>\n";
133 $np++;
134 }
135 $prev_plasmid=$curr_plasmid;
136 }
137
138 $domain=~s/\.\d+//;
139 unless ($printed{$name})
140 {
141 my $seq=$c{$name};
142
143 $seq=~s/\*//g;
144 $seq=form($seq,90);
145 print OUT "<td>\n";
146 print OUT "<HR SIZE=3 WIDTH=80%></HR>\n";
147 print OUT "<center><b>$name</b><br>\n</center>\n";
148 print OUT "</td>\n<tr></tr>\n";
149 print OUT "<td bgcolor=$color>\n";
150 print OUT "<pre> \n$seq\n </pre>\n";
151 print OUT "</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n";
152 $ntokens=2;
153 }
154 my $hd=uc $domain;
155 #<a href="http://www.canoro.altervista.org/" class="nav" target="_blank">www.canoro.altervista.org</a>
156 if ($decode{$domain})
157 {
158 my $ddes=$decode{$domain};
159 $ddes=~s/\s+/ /g;
160 if ($ntokens % 2==0)
161 {
162 print OUT "<td>\n";
163 }else{
164 print OUT "<td bgcolor=$color>\n";
165 }
166
167 print OUT "<p align=\"left\">\n";
168 print OUT "<a href=http://pfam.xfam.org/family/$hd> $domain</a>\n<p align=\"justify\">$ddes</p>\n\n";
169 print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n";
170 $ntokens++;
171 }
172 if ($clan_decode{$clan})
173 {
174 my $clanD=$clan_decode{$clan};
175 $clanD=~s/\s+/ /g;
176 next if $decode{$domain} eq $clan_decode{$clan};
177 my $ddes=$decode{$domain};
178 if ($ntokens % 2==0)
179 {
180 print OUT "<td>\n";
181 }else{
182 print OUT "<td bgcolor=$color>\n";
183 }
184
185 print OUT "<p align=\"left\">\n";
186 print OUT "<a href=http://pfam.xfam.org/clan/$clan> $clan</a>\n <p align=\"justify\">$clanD</p>\n\n";
187 print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n";
188 $ntokens++;
189 }
190 $printed{$name}=1;
191 }
192
193 #if ($curr_plasmid ne "")
194 #{
195 print OUT "</table>\n";
196 print OUT "</div>\n";
197 print OUT "</body>\n";
198 #}
199 #print OUT "<br><br>Proteins without PFAM domains:\n<br>\n";
200 #foreach my $seq (keys %c)
201 #{
202 # next if $printed{$seq};
203 # print OUT "<>$seq</pre>\n\n<br><br><left>\n$c{$seq}</left><br>\n";
204 # print OUT "<HR SIZE=3 WIDTH=80%>\n";
205 #}
206 #print OUT "</table>\n</div>\n</body>\n</html>\n";
207 #close(OUT);
208
209 sub form
210 {
211 my $string=$_[0];
212 my $len=$_[1];
213 my $outS="";
214 for (my $i=0;$i<=length($string);$i+=$len)
215 {
216 $outS.=substr($string,$i,$len)."\n";
217 }
218 #print "A:$outS";
219 #$outS=~s/\s+//g;
220 return $outS;
221 }