0
|
1 #!/usr/bin/perl -w
|
|
2
|
|
3 use strict;
|
|
4 my $d_file="/home/inmare/galaxy/tools/pfam_annot/pfamA.txt";
|
|
5 open(IN,$d_file);
|
|
6 my %decode=();
|
|
7 my %clan_decode;
|
|
8 my $id="";
|
|
9 my %c=();
|
|
10
|
|
11
|
|
12 my $prot_file=shift;
|
|
13 my $pfam_file=shift;
|
|
14 my $prefix=shift;
|
|
15
|
|
16 while(<IN>)
|
|
17 {
|
|
18 if ($_=~/^\d/)
|
|
19 {
|
|
20 my @vl=(split(/\t+/));
|
|
21 $decode{$vl[1]}="$vl[3]<br>";#$vl[8] $vl[9]";
|
|
22 my $cc=0;
|
|
23 my %repeated=();
|
|
24 foreach my $v (@vl)
|
|
25 {
|
|
26 $v=~s/\[\d+\]/ /g;
|
|
27 last if $v=~/hmmbuild/;
|
|
28 last if $cc>10;
|
|
29 next if $v=~/anon/;
|
|
30 next if $v=~/Bates/;
|
|
31 next if $v=~/Cogis/;
|
|
32 next if $v=~/Coggis/;
|
|
33 next if $v=~/Bateman/;
|
|
34 next if $v=~/Sonnhammer/;
|
|
35 next if $v=~/Finn/;
|
|
36 next if $v=~/Studholme/;
|
|
37 next if $v=~/Kerrison/;
|
|
38 next if $repeated{$v};
|
|
39 next if $v eq $vl[3];
|
|
40 next unless length($v)>=20 && $cc<=9;
|
|
41 $decode{$vl[1]}.="$v ";
|
|
42 $repeated{$v}++;
|
|
43 $cc++;
|
|
44 }
|
|
45 #print "$vl[1] $decode{$vl[1]}\n";
|
|
46 }
|
|
47 }
|
|
48 close(IN);
|
|
49
|
|
50 my $clan_file="/home/inmare/galaxy/tools/pfam_annot/clans.txt";
|
|
51 open(IN,$clan_file);
|
|
52 while(<IN>)
|
|
53 {
|
|
54 my @vl=(split(/\t/));
|
|
55 #$clan_decode{$vl[1]}="$vl[3]";
|
|
56 my $cc=0;
|
|
57 foreach my $v (@vl)
|
|
58 {
|
|
59 $cc++;
|
|
60 $v=~s/\[\d+\]/ /g;
|
|
61 $clan_decode{$vl[1]}.="$v " if length($v) >=30 && $cc<=10;
|
|
62 }
|
|
63
|
|
64 }
|
|
65 my %plasm=();
|
|
66 open(IN,"$prot_file");
|
|
67 while(<IN>)
|
|
68 {
|
|
69 if ($_=~/^>(.*)/)
|
|
70 {
|
|
71 $id=$1;
|
|
72 $id=(split(/\s+/,$id))[0];
|
|
73 if ($id=~/#/)
|
|
74 {
|
|
75 my $pid=(split(/\#/,$id))[0];
|
|
76 $plasm{$pid}++;
|
|
77 }
|
|
78 }else{
|
|
79 chomp;
|
|
80 $c{$id}.=$_;
|
|
81 }
|
|
82 }
|
|
83 close(IN);
|
|
84
|
|
85 open(OUT,">$prefix");
|
|
86 print OUT "<html>\n<head>\n";
|
|
87 print OUT "<style type=\"text/css\">\nspan {\n\ttext-decoration:underline;\n\tcolor:blue;\n\tcursor:pointer;\n}\n</style>\n";
|
|
88 print OUT "<script>\nfunction show(elementID) {\n\tvar ele = document.getElementById(elementID);\n\tif (!ele) {\n\t\talert(\"no such element\");\t\treturn;\n\t}\n\tvar pages = document.getElementsByClassName('page');\n\tfor(var i = 0; i < pages.length; i++) {\n\t\tpages[i].style.display = 'none';\n\t}\n\tele.style.display = 'block';\n}\n</script>\n";
|
|
89 #print OUT "<script src=\"script.js\"></script>\n";
|
|
90 #print OUT "<script>\n\tshow(elementID)\n</script>\n";
|
|
91 print OUT "</head>\n";
|
|
92 my $color="\"#czb9dz\"";
|
|
93 my %printed;
|
|
94 open(IN,$pfam_file);
|
|
95 print OUT "Proteins with PFAM domains:\n<br><br>\n";
|
|
96 my @dd=keys %plasm;
|
|
97 if ($#dd>0)
|
|
98 {
|
|
99 print OUT "<p>Show results ";
|
|
100 for (my $i=0;$i<=$#dd;$i++)
|
|
101 {
|
|
102 if ($i==$#dd)
|
|
103 {
|
|
104 print OUT "<span onclick=\"show(\'$dd[$i]\');\">$dd[$i]</span>.</p>\n";
|
|
105 }else{
|
|
106 print OUT "<span onclick=\"show(\'$dd[$i]\');\">$dd[$i]</span>,\n";
|
|
107 }
|
|
108 }
|
|
109 }else{
|
|
110 print OUT "<div>\n<table cellpadding=\"0\" width=650>\n"; #div per ogni plasmide|
|
|
111 }
|
|
112 my $ntokens=0;
|
|
113 my $prev_plasmid="";
|
|
114 my $curr_plasmid="";
|
|
115 my $np=0;
|
|
116 while(<IN>)
|
|
117 {
|
|
118 next if $_=~/^\#/;
|
|
119 my ($name,$domain,$clan)=(split(/\s+/))[0,5,-1];
|
|
120 next unless $name;
|
|
121 if ($name=~/#/)
|
|
122 {
|
|
123 $curr_plasmid=(split(/\#/,$name))[0];
|
|
124 if ($curr_plasmid ne $prev_plasmid)
|
|
125 {
|
|
126 if ($np>0)
|
|
127 {
|
|
128 print OUT "</table>\n";
|
|
129 print OUT "</div>\n";
|
|
130 }
|
|
131 print OUT "<div id=\"$curr_plasmid\" class=\"page\" style=\"\">\n";
|
|
132 print OUT "<table cellpadding=\"0\" width=650>\n";
|
|
133 $np++;
|
|
134 }
|
|
135 $prev_plasmid=$curr_plasmid;
|
|
136 }
|
|
137
|
|
138 $domain=~s/\.\d+//;
|
|
139 unless ($printed{$name})
|
|
140 {
|
|
141 my $seq=$c{$name};
|
|
142
|
|
143 $seq=~s/\*//g;
|
|
144 $seq=form($seq,90);
|
|
145 print OUT "<td>\n";
|
|
146 print OUT "<HR SIZE=3 WIDTH=80%></HR>\n";
|
|
147 print OUT "<center><b>$name</b><br>\n</center>\n";
|
|
148 print OUT "</td>\n<tr></tr>\n";
|
|
149 print OUT "<td bgcolor=$color>\n";
|
|
150 print OUT "<pre> \n$seq\n </pre>\n";
|
|
151 print OUT "</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n";
|
|
152 $ntokens=2;
|
|
153 }
|
|
154 my $hd=uc $domain;
|
|
155 #<a href="http://www.canoro.altervista.org/" class="nav" target="_blank">www.canoro.altervista.org</a>
|
|
156 if ($decode{$domain})
|
|
157 {
|
|
158 my $ddes=$decode{$domain};
|
|
159 $ddes=~s/\s+/ /g;
|
|
160 if ($ntokens % 2==0)
|
|
161 {
|
|
162 print OUT "<td>\n";
|
|
163 }else{
|
|
164 print OUT "<td bgcolor=$color>\n";
|
|
165 }
|
|
166
|
|
167 print OUT "<p align=\"left\">\n";
|
|
168 print OUT "<a href=http://pfam.xfam.org/family/$hd> $domain</a>\n<p align=\"justify\">$ddes</p>\n\n";
|
|
169 print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n";
|
|
170 $ntokens++;
|
|
171 }
|
|
172 if ($clan_decode{$clan})
|
|
173 {
|
|
174 my $clanD=$clan_decode{$clan};
|
|
175 $clanD=~s/\s+/ /g;
|
|
176 next if $decode{$domain} eq $clan_decode{$clan};
|
|
177 my $ddes=$decode{$domain};
|
|
178 if ($ntokens % 2==0)
|
|
179 {
|
|
180 print OUT "<td>\n";
|
|
181 }else{
|
|
182 print OUT "<td bgcolor=$color>\n";
|
|
183 }
|
|
184
|
|
185 print OUT "<p align=\"left\">\n";
|
|
186 print OUT "<a href=http://pfam.xfam.org/clan/$clan> $clan</a>\n <p align=\"justify\">$clanD</p>\n\n";
|
|
187 print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n";
|
|
188 $ntokens++;
|
|
189 }
|
|
190 $printed{$name}=1;
|
|
191 }
|
|
192
|
|
193 #if ($curr_plasmid ne "")
|
|
194 #{
|
|
195 print OUT "</table>\n";
|
|
196 print OUT "</div>\n";
|
|
197 print OUT "</body>\n";
|
|
198 #}
|
|
199 #print OUT "<br><br>Proteins without PFAM domains:\n<br>\n";
|
|
200 #foreach my $seq (keys %c)
|
|
201 #{
|
|
202 # next if $printed{$seq};
|
|
203 # print OUT "<>$seq</pre>\n\n<br><br><left>\n$c{$seq}</left><br>\n";
|
|
204 # print OUT "<HR SIZE=3 WIDTH=80%>\n";
|
|
205 #}
|
|
206 #print OUT "</table>\n</div>\n</body>\n</html>\n";
|
|
207 #close(OUT);
|
|
208
|
|
209 sub form
|
|
210 {
|
|
211 my $string=$_[0];
|
|
212 my $len=$_[1];
|
|
213 my $outS="";
|
|
214 for (my $i=0;$i<=length($string);$i+=$len)
|
|
215 {
|
|
216 $outS.=substr($string,$i,$len)."\n";
|
|
217 }
|
|
218 #print "A:$outS";
|
|
219 #$outS=~s/\s+//g;
|
|
220 return $outS;
|
|
221 }
|