Mercurial > repos > matteoc > agame_custom_tools
comparison pfam_search/annota.Filter.pl @ 0:68a3648c7d91 draft default tip
Uploaded
author | matteoc |
---|---|
date | Thu, 22 Dec 2016 04:45:31 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:68a3648c7d91 |
---|---|
1 #!/usr/bin/perl -w | |
2 | |
3 use strict; | |
4 my $d_file="/home/inmare/galaxy/tools/pfam_search/pfamA.txt"; | |
5 open(IN,$d_file); | |
6 my %decode=(); | |
7 my %clan_decode; | |
8 my $id=""; | |
9 my %c=(); | |
10 | |
11 | |
12 my ($prot_file,$pfam_file,$prefix,@search_T)=@ARGV; | |
13 my $searchP=""; | |
14 while(<IN>) | |
15 { | |
16 if ($_=~/^\d/) | |
17 { | |
18 my @vl=(split(/\t+/)); | |
19 $decode{$vl[1]}="$vl[3]<br>";#$vl[8] $vl[9]"; | |
20 my $cc=0; | |
21 my %repeated=(); | |
22 foreach my $v (@vl) | |
23 { | |
24 $v=~s/\[\d+\]/ /g; | |
25 $cc++; | |
26 last if $v=~/hmmbuild/; | |
27 last if $cc>10; | |
28 next if $v=~/anon/; | |
29 next if $v=~/Bates/; | |
30 next if $v=~/Cogis/; | |
31 next if $v=~/Bateman/; | |
32 next if $v=~/Sonnhammer/; | |
33 next if $v=~/Finn/; | |
34 next if $v=~/Studholme/; | |
35 next if $v eq $vl[3]; | |
36 next if $v=~/Kerrison/; | |
37 next if $repeated{$v}; | |
38 #next if length($v)>=30 && $cc<=10; | |
39 $decode{$vl[1]}.="$v " if length($v)>=20 && $cc<=10; | |
40 $repeated{$v}++; | |
41 } | |
42 } | |
43 } | |
44 close(IN); | |
45 | |
46 my $clan_file="/home/inmare/galaxy/tools/pfam_search/clans.txt"; | |
47 open(IN,$clan_file); | |
48 while(<IN>) | |
49 { | |
50 my @vl=(split(/\t/)); | |
51 #$clan_decode{$vl[1]}="$vl[3]"; | |
52 my $cc=0; | |
53 foreach my $v (@vl) | |
54 { | |
55 $cc++; | |
56 $v=~s/\[\d+\]/ /g; | |
57 $clan_decode{$vl[1]}.="$v " if length($v) >=30 && $cc<=10; | |
58 } | |
59 | |
60 } | |
61 close(IN); | |
62 open(IN,"$prot_file"); | |
63 while(<IN>) | |
64 { | |
65 if ($_=~/^>(.*)/) | |
66 { | |
67 $id=$1; | |
68 $id=(split(/\s+/,$id))[0]; | |
69 }else{ | |
70 chomp; | |
71 $c{$id}.=$_; | |
72 } | |
73 } | |
74 close(IN); | |
75 | |
76 foreach my $s (@search_T) | |
77 { | |
78 $searchP.="$s "; | |
79 } | |
80 | |
81 open(OUT,">$prefix"); | |
82 print OUT "<html>\n<head></head>\n<body>\n"; | |
83 my $color="\"#czb9dz\""; | |
84 my %printed; | |
85 open(IN,$pfam_file); | |
86 print OUT "Proteins with PFAM domains matching the keywords:\n<br><br>\n"; | |
87 print OUT "<div>\n<table cellpadding=\"0\" width=650>\n"; | |
88 my $ntokens=0; | |
89 while(<IN>) | |
90 { | |
91 next if $_=~/^\#/; | |
92 my ($name,$domain,$clan)=(split(/\s+/))[0,5,-1]; | |
93 next unless $name; | |
94 next unless $domain; | |
95 $domain=~s/\.\d+//; | |
96 my $sd=$decode{$domain} ? $decode{$domain} : "MagnottaPantaleo§§"; | |
97 my $sc=$clan_decode{$clan} ? $clan_decode{$clan} : "SciarrattaCalogero@@"; | |
98 my $continue=match($searchP,$sd,$sc); | |
99 #print "$name $domain $clan $sd $sc\n"; | |
100 next unless $continue; | |
101 unless ($printed{$name}) | |
102 { | |
103 my $seq=$c{$name}; | |
104 | |
105 $seq=~s/\*//g; | |
106 $seq=form($seq,90); | |
107 print OUT "<td>\n"; | |
108 print OUT "<HR SIZE=3 WIDTH=80%>\n"; | |
109 print OUT "<center><b>$name</b><br>\n</center>\n"; | |
110 print OUT "</td\n<tr></tr>\n"; | |
111 print OUT "<td bgcolor=$color>\n"; | |
112 print OUT "<pre> \n$seq\n </pre>\n"; | |
113 print OUT "</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n"; | |
114 $ntokens=2; | |
115 } | |
116 my $hd=uc $domain; | |
117 #<a href="http://www.canoro.altervista.org/" class="nav" target="_blank">www.canoro.altervista.org</a> | |
118 if ($decode{$domain}) | |
119 { | |
120 my $ddes=$decode{$domain}; | |
121 if ($ntokens % 2==0) | |
122 { | |
123 print OUT "<td>\n"; | |
124 }else{ | |
125 print OUT "<td bgcolor=$color>\n"; | |
126 } | |
127 | |
128 print OUT "<p align=\"left\">\n"; | |
129 print OUT "<a href=http://pfam.xfam.org/family/$hd> $domain</a>\n<p align=\"justify\">$ddes</p>\n\n"; | |
130 print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n"; | |
131 $ntokens++; | |
132 } | |
133 if ($clan_decode{$clan}) | |
134 { | |
135 my $clanD=$clan_decode{$clan}; | |
136 my $ddes=$decode{$domain}; | |
137 if ($ntokens % 2==0) | |
138 { | |
139 print OUT "<td>\n"; | |
140 }else{ | |
141 print OUT "<td bgcolor=$color>\n"; | |
142 } | |
143 | |
144 print OUT "<p align=\"left\">\n"; | |
145 print OUT "<a href=http://pfam.xfam.org/clan/$clan> $clan</a>\n <p align=\"justify\">$clanD</p>\n\n"; | |
146 print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n"; | |
147 $ntokens++; | |
148 } | |
149 $printed{$name}=1; | |
150 } | |
151 | |
152 #print OUT "<br><br>Proteins without PFAM domains:\n<br>\n"; | |
153 #foreach my $seq (keys %c) | |
154 #{ | |
155 # next if $printed{$seq}; | |
156 # print OUT "<>$seq</pre>\n\n<br><br><left>\n$c{$seq}</left><br>\n"; | |
157 # print OUT "<HR SIZE=3 WIDTH=80%>\n"; | |
158 #} | |
159 print OUT "</table>\n</div>\n</body>\n</html>\n"; | |
160 close(OUT); | |
161 | |
162 sub form | |
163 { | |
164 my $string=$_[0]; | |
165 my $len=$_[1]; | |
166 my $outS=""; | |
167 my @vl=split('',$string); | |
168 for (my $i=1;$i<=$#vl;$i++) | |
169 { | |
170 if ($i % $len==0 && $i>0) | |
171 { | |
172 $outS.="$vl[$i-1]\n"; | |
173 }else{ | |
174 $outS.=$vl[$i-1]; | |
175 } | |
176 } | |
177 $outS.="\n"; | |
178 $outS=~s/ //g; | |
179 return $outS; | |
180 } | |
181 | |
182 sub match | |
183 { | |
184 my $terms=$_[0]; | |
185 my $d1=$_[1]; | |
186 my $d2=$_[2]; | |
187 $terms=~s/AND/ /g; | |
188 my @t1s=(split(/OR/,$terms)); | |
189 my $print_out=0; | |
190 foreach my $t (@t1s) | |
191 { | |
192 #print "$t\n"; | |
193 last if $print_out==1; #OR non serve se 1 è verificata; | |
194 #permute | |
195 my @vl=(split(/\s+/,$t)); | |
196 my $nm1=0; | |
197 my $nm2=0; | |
198 foreach my $v (@vl) | |
199 { | |
200 $nm1++ if ($d1=~/$v/i); | |
201 $nm2++ if ($d1=~/$v/i); | |
202 } | |
203 $print_out=1 if ($nm1==($#vl+1)) || ($nm2==($#vl+1)); | |
204 } | |
205 return $print_out; | |
206 } |