Mercurial > repos > matteoc > agame_custom_tools
comparison pfam_annot/annota.pl @ 0:68a3648c7d91 draft default tip
Uploaded
author | matteoc |
---|---|
date | Thu, 22 Dec 2016 04:45:31 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:68a3648c7d91 |
---|---|
1 #!/usr/bin/perl -w | |
2 | |
3 use strict; | |
4 my $d_file="/home/inmare/galaxy/tools/pfam_annot/pfamA.txt"; | |
5 open(IN,$d_file); | |
6 my %decode=(); | |
7 my %clan_decode; | |
8 my $id=""; | |
9 my %c=(); | |
10 | |
11 | |
12 my $prot_file=shift; | |
13 my $pfam_file=shift; | |
14 my $prefix=shift; | |
15 | |
16 while(<IN>) | |
17 { | |
18 if ($_=~/^\d/) | |
19 { | |
20 my @vl=(split(/\t+/)); | |
21 $decode{$vl[1]}="$vl[3]<br>";#$vl[8] $vl[9]"; | |
22 my $cc=0; | |
23 my %repeated=(); | |
24 foreach my $v (@vl) | |
25 { | |
26 $v=~s/\[\d+\]/ /g; | |
27 last if $v=~/hmmbuild/; | |
28 last if $cc>10; | |
29 next if $v=~/anon/; | |
30 next if $v=~/Bates/; | |
31 next if $v=~/Cogis/; | |
32 next if $v=~/Coggis/; | |
33 next if $v=~/Bateman/; | |
34 next if $v=~/Sonnhammer/; | |
35 next if $v=~/Finn/; | |
36 next if $v=~/Studholme/; | |
37 next if $v=~/Kerrison/; | |
38 next if $repeated{$v}; | |
39 next if $v eq $vl[3]; | |
40 next unless length($v)>=20 && $cc<=9; | |
41 $decode{$vl[1]}.="$v "; | |
42 $repeated{$v}++; | |
43 $cc++; | |
44 } | |
45 #print "$vl[1] $decode{$vl[1]}\n"; | |
46 } | |
47 } | |
48 close(IN); | |
49 | |
50 my $clan_file="/home/inmare/galaxy/tools/pfam_annot/clans.txt"; | |
51 open(IN,$clan_file); | |
52 while(<IN>) | |
53 { | |
54 my @vl=(split(/\t/)); | |
55 #$clan_decode{$vl[1]}="$vl[3]"; | |
56 my $cc=0; | |
57 foreach my $v (@vl) | |
58 { | |
59 $cc++; | |
60 $v=~s/\[\d+\]/ /g; | |
61 $clan_decode{$vl[1]}.="$v " if length($v) >=30 && $cc<=10; | |
62 } | |
63 | |
64 } | |
65 my %plasm=(); | |
66 open(IN,"$prot_file"); | |
67 while(<IN>) | |
68 { | |
69 if ($_=~/^>(.*)/) | |
70 { | |
71 $id=$1; | |
72 $id=(split(/\s+/,$id))[0]; | |
73 if ($id=~/#/) | |
74 { | |
75 my $pid=(split(/\#/,$id))[0]; | |
76 $plasm{$pid}++; | |
77 } | |
78 }else{ | |
79 chomp; | |
80 $c{$id}.=$_; | |
81 } | |
82 } | |
83 close(IN); | |
84 | |
85 open(OUT,">$prefix"); | |
86 print OUT "<html>\n<head>\n"; | |
87 print OUT "<style type=\"text/css\">\nspan {\n\ttext-decoration:underline;\n\tcolor:blue;\n\tcursor:pointer;\n}\n</style>\n"; | |
88 print OUT "<script>\nfunction show(elementID) {\n\tvar ele = document.getElementById(elementID);\n\tif (!ele) {\n\t\talert(\"no such element\");\t\treturn;\n\t}\n\tvar pages = document.getElementsByClassName('page');\n\tfor(var i = 0; i < pages.length; i++) {\n\t\tpages[i].style.display = 'none';\n\t}\n\tele.style.display = 'block';\n}\n</script>\n"; | |
89 #print OUT "<script src=\"script.js\"></script>\n"; | |
90 #print OUT "<script>\n\tshow(elementID)\n</script>\n"; | |
91 print OUT "</head>\n"; | |
92 my $color="\"#czb9dz\""; | |
93 my %printed; | |
94 open(IN,$pfam_file); | |
95 print OUT "Proteins with PFAM domains:\n<br><br>\n"; | |
96 my @dd=keys %plasm; | |
97 if ($#dd>0) | |
98 { | |
99 print OUT "<p>Show results "; | |
100 for (my $i=0;$i<=$#dd;$i++) | |
101 { | |
102 if ($i==$#dd) | |
103 { | |
104 print OUT "<span onclick=\"show(\'$dd[$i]\');\">$dd[$i]</span>.</p>\n"; | |
105 }else{ | |
106 print OUT "<span onclick=\"show(\'$dd[$i]\');\">$dd[$i]</span>,\n"; | |
107 } | |
108 } | |
109 }else{ | |
110 print OUT "<div>\n<table cellpadding=\"0\" width=650>\n"; #div per ogni plasmide| | |
111 } | |
112 my $ntokens=0; | |
113 my $prev_plasmid=""; | |
114 my $curr_plasmid=""; | |
115 my $np=0; | |
116 while(<IN>) | |
117 { | |
118 next if $_=~/^\#/; | |
119 my ($name,$domain,$clan)=(split(/\s+/))[0,5,-1]; | |
120 next unless $name; | |
121 if ($name=~/#/) | |
122 { | |
123 $curr_plasmid=(split(/\#/,$name))[0]; | |
124 if ($curr_plasmid ne $prev_plasmid) | |
125 { | |
126 if ($np>0) | |
127 { | |
128 print OUT "</table>\n"; | |
129 print OUT "</div>\n"; | |
130 } | |
131 print OUT "<div id=\"$curr_plasmid\" class=\"page\" style=\"\">\n"; | |
132 print OUT "<table cellpadding=\"0\" width=650>\n"; | |
133 $np++; | |
134 } | |
135 $prev_plasmid=$curr_plasmid; | |
136 } | |
137 | |
138 $domain=~s/\.\d+//; | |
139 unless ($printed{$name}) | |
140 { | |
141 my $seq=$c{$name}; | |
142 | |
143 $seq=~s/\*//g; | |
144 $seq=form($seq,90); | |
145 print OUT "<td>\n"; | |
146 print OUT "<HR SIZE=3 WIDTH=80%></HR>\n"; | |
147 print OUT "<center><b>$name</b><br>\n</center>\n"; | |
148 print OUT "</td>\n<tr></tr>\n"; | |
149 print OUT "<td bgcolor=$color>\n"; | |
150 print OUT "<pre> \n$seq\n </pre>\n"; | |
151 print OUT "</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n"; | |
152 $ntokens=2; | |
153 } | |
154 my $hd=uc $domain; | |
155 #<a href="http://www.canoro.altervista.org/" class="nav" target="_blank">www.canoro.altervista.org</a> | |
156 if ($decode{$domain}) | |
157 { | |
158 my $ddes=$decode{$domain}; | |
159 $ddes=~s/\s+/ /g; | |
160 if ($ntokens % 2==0) | |
161 { | |
162 print OUT "<td>\n"; | |
163 }else{ | |
164 print OUT "<td bgcolor=$color>\n"; | |
165 } | |
166 | |
167 print OUT "<p align=\"left\">\n"; | |
168 print OUT "<a href=http://pfam.xfam.org/family/$hd> $domain</a>\n<p align=\"justify\">$ddes</p>\n\n"; | |
169 print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n"; | |
170 $ntokens++; | |
171 } | |
172 if ($clan_decode{$clan}) | |
173 { | |
174 my $clanD=$clan_decode{$clan}; | |
175 $clanD=~s/\s+/ /g; | |
176 next if $decode{$domain} eq $clan_decode{$clan}; | |
177 my $ddes=$decode{$domain}; | |
178 if ($ntokens % 2==0) | |
179 { | |
180 print OUT "<td>\n"; | |
181 }else{ | |
182 print OUT "<td bgcolor=$color>\n"; | |
183 } | |
184 | |
185 print OUT "<p align=\"left\">\n"; | |
186 print OUT "<a href=http://pfam.xfam.org/clan/$clan> $clan</a>\n <p align=\"justify\">$clanD</p>\n\n"; | |
187 print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n"; | |
188 $ntokens++; | |
189 } | |
190 $printed{$name}=1; | |
191 } | |
192 | |
193 #if ($curr_plasmid ne "") | |
194 #{ | |
195 print OUT "</table>\n"; | |
196 print OUT "</div>\n"; | |
197 print OUT "</body>\n"; | |
198 #} | |
199 #print OUT "<br><br>Proteins without PFAM domains:\n<br>\n"; | |
200 #foreach my $seq (keys %c) | |
201 #{ | |
202 # next if $printed{$seq}; | |
203 # print OUT "<>$seq</pre>\n\n<br><br><left>\n$c{$seq}</left><br>\n"; | |
204 # print OUT "<HR SIZE=3 WIDTH=80%>\n"; | |
205 #} | |
206 #print OUT "</table>\n</div>\n</body>\n</html>\n"; | |
207 #close(OUT); | |
208 | |
209 sub form | |
210 { | |
211 my $string=$_[0]; | |
212 my $len=$_[1]; | |
213 my $outS=""; | |
214 for (my $i=0;$i<=length($string);$i+=$len) | |
215 { | |
216 $outS.=substr($string,$i,$len)."\n"; | |
217 } | |
218 #print "A:$outS"; | |
219 #$outS=~s/\s+//g; | |
220 return $outS; | |
221 } |