comparison pfam_search/annota.Filter.pl @ 0:68a3648c7d91 draft default tip

Uploaded
author matteoc
date Thu, 22 Dec 2016 04:45:31 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:68a3648c7d91
1 #!/usr/bin/perl -w
2
3 use strict;
4 my $d_file="/home/inmare/galaxy/tools/pfam_search/pfamA.txt";
5 open(IN,$d_file);
6 my %decode=();
7 my %clan_decode;
8 my $id="";
9 my %c=();
10
11
12 my ($prot_file,$pfam_file,$prefix,@search_T)=@ARGV;
13 my $searchP="";
14 while(<IN>)
15 {
16 if ($_=~/^\d/)
17 {
18 my @vl=(split(/\t+/));
19 $decode{$vl[1]}="$vl[3]<br>";#$vl[8] $vl[9]";
20 my $cc=0;
21 my %repeated=();
22 foreach my $v (@vl)
23 {
24 $v=~s/\[\d+\]/ /g;
25 $cc++;
26 last if $v=~/hmmbuild/;
27 last if $cc>10;
28 next if $v=~/anon/;
29 next if $v=~/Bates/;
30 next if $v=~/Cogis/;
31 next if $v=~/Bateman/;
32 next if $v=~/Sonnhammer/;
33 next if $v=~/Finn/;
34 next if $v=~/Studholme/;
35 next if $v eq $vl[3];
36 next if $v=~/Kerrison/;
37 next if $repeated{$v};
38 #next if length($v)>=30 && $cc<=10;
39 $decode{$vl[1]}.="$v " if length($v)>=20 && $cc<=10;
40 $repeated{$v}++;
41 }
42 }
43 }
44 close(IN);
45
46 my $clan_file="/home/inmare/galaxy/tools/pfam_search/clans.txt";
47 open(IN,$clan_file);
48 while(<IN>)
49 {
50 my @vl=(split(/\t/));
51 #$clan_decode{$vl[1]}="$vl[3]";
52 my $cc=0;
53 foreach my $v (@vl)
54 {
55 $cc++;
56 $v=~s/\[\d+\]/ /g;
57 $clan_decode{$vl[1]}.="$v " if length($v) >=30 && $cc<=10;
58 }
59
60 }
61 close(IN);
62 open(IN,"$prot_file");
63 while(<IN>)
64 {
65 if ($_=~/^>(.*)/)
66 {
67 $id=$1;
68 $id=(split(/\s+/,$id))[0];
69 }else{
70 chomp;
71 $c{$id}.=$_;
72 }
73 }
74 close(IN);
75
76 foreach my $s (@search_T)
77 {
78 $searchP.="$s ";
79 }
80
81 open(OUT,">$prefix");
82 print OUT "<html>\n<head></head>\n<body>\n";
83 my $color="\"#czb9dz\"";
84 my %printed;
85 open(IN,$pfam_file);
86 print OUT "Proteins with PFAM domains matching the keywords:\n<br><br>\n";
87 print OUT "<div>\n<table cellpadding=\"0\" width=650>\n";
88 my $ntokens=0;
89 while(<IN>)
90 {
91 next if $_=~/^\#/;
92 my ($name,$domain,$clan)=(split(/\s+/))[0,5,-1];
93 next unless $name;
94 next unless $domain;
95 $domain=~s/\.\d+//;
96 my $sd=$decode{$domain} ? $decode{$domain} : "MagnottaPantaleo§§";
97 my $sc=$clan_decode{$clan} ? $clan_decode{$clan} : "SciarrattaCalogero@@";
98 my $continue=match($searchP,$sd,$sc);
99 #print "$name $domain $clan $sd $sc\n";
100 next unless $continue;
101 unless ($printed{$name})
102 {
103 my $seq=$c{$name};
104
105 $seq=~s/\*//g;
106 $seq=form($seq,90);
107 print OUT "<td>\n";
108 print OUT "<HR SIZE=3 WIDTH=80%>\n";
109 print OUT "<center><b>$name</b><br>\n</center>\n";
110 print OUT "</td\n<tr></tr>\n";
111 print OUT "<td bgcolor=$color>\n";
112 print OUT "<pre> \n$seq\n </pre>\n";
113 print OUT "</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n";
114 $ntokens=2;
115 }
116 my $hd=uc $domain;
117 #<a href="http://www.canoro.altervista.org/" class="nav" target="_blank">www.canoro.altervista.org</a>
118 if ($decode{$domain})
119 {
120 my $ddes=$decode{$domain};
121 if ($ntokens % 2==0)
122 {
123 print OUT "<td>\n";
124 }else{
125 print OUT "<td bgcolor=$color>\n";
126 }
127
128 print OUT "<p align=\"left\">\n";
129 print OUT "<a href=http://pfam.xfam.org/family/$hd> $domain</a>\n<p align=\"justify\">$ddes</p>\n\n";
130 print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n";
131 $ntokens++;
132 }
133 if ($clan_decode{$clan})
134 {
135 my $clanD=$clan_decode{$clan};
136 my $ddes=$decode{$domain};
137 if ($ntokens % 2==0)
138 {
139 print OUT "<td>\n";
140 }else{
141 print OUT "<td bgcolor=$color>\n";
142 }
143
144 print OUT "<p align=\"left\">\n";
145 print OUT "<a href=http://pfam.xfam.org/clan/$clan> $clan</a>\n <p align=\"justify\">$clanD</p>\n\n";
146 print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n";
147 $ntokens++;
148 }
149 $printed{$name}=1;
150 }
151
152 #print OUT "<br><br>Proteins without PFAM domains:\n<br>\n";
153 #foreach my $seq (keys %c)
154 #{
155 # next if $printed{$seq};
156 # print OUT "<>$seq</pre>\n\n<br><br><left>\n$c{$seq}</left><br>\n";
157 # print OUT "<HR SIZE=3 WIDTH=80%>\n";
158 #}
159 print OUT "</table>\n</div>\n</body>\n</html>\n";
160 close(OUT);
161
162 sub form
163 {
164 my $string=$_[0];
165 my $len=$_[1];
166 my $outS="";
167 my @vl=split('',$string);
168 for (my $i=1;$i<=$#vl;$i++)
169 {
170 if ($i % $len==0 && $i>0)
171 {
172 $outS.="$vl[$i-1]\n";
173 }else{
174 $outS.=$vl[$i-1];
175 }
176 }
177 $outS.="\n";
178 $outS=~s/ //g;
179 return $outS;
180 }
181
182 sub match
183 {
184 my $terms=$_[0];
185 my $d1=$_[1];
186 my $d2=$_[2];
187 $terms=~s/AND/ /g;
188 my @t1s=(split(/OR/,$terms));
189 my $print_out=0;
190 foreach my $t (@t1s)
191 {
192 #print "$t\n";
193 last if $print_out==1; #OR non serve se 1 è verificata;
194 #permute
195 my @vl=(split(/\s+/,$t));
196 my $nm1=0;
197 my $nm2=0;
198 foreach my $v (@vl)
199 {
200 $nm1++ if ($d1=~/$v/i);
201 $nm2++ if ($d1=~/$v/i);
202 }
203 $print_out=1 if ($nm1==($#vl+1)) || ($nm2==($#vl+1));
204 }
205 return $print_out;
206 }