Mercurial > repos > matteoc > agame_custom_tools
view pfam_search/annota.Filter.pl @ 0:68a3648c7d91 draft default tip
Uploaded
author | matteoc |
---|---|
date | Thu, 22 Dec 2016 04:45:31 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl -w use strict; my $d_file="/home/inmare/galaxy/tools/pfam_search/pfamA.txt"; open(IN,$d_file); my %decode=(); my %clan_decode; my $id=""; my %c=(); my ($prot_file,$pfam_file,$prefix,@search_T)=@ARGV; my $searchP=""; while(<IN>) { if ($_=~/^\d/) { my @vl=(split(/\t+/)); $decode{$vl[1]}="$vl[3]<br>";#$vl[8] $vl[9]"; my $cc=0; my %repeated=(); foreach my $v (@vl) { $v=~s/\[\d+\]/ /g; $cc++; last if $v=~/hmmbuild/; last if $cc>10; next if $v=~/anon/; next if $v=~/Bates/; next if $v=~/Cogis/; next if $v=~/Bateman/; next if $v=~/Sonnhammer/; next if $v=~/Finn/; next if $v=~/Studholme/; next if $v eq $vl[3]; next if $v=~/Kerrison/; next if $repeated{$v}; #next if length($v)>=30 && $cc<=10; $decode{$vl[1]}.="$v " if length($v)>=20 && $cc<=10; $repeated{$v}++; } } } close(IN); my $clan_file="/home/inmare/galaxy/tools/pfam_search/clans.txt"; open(IN,$clan_file); while(<IN>) { my @vl=(split(/\t/)); #$clan_decode{$vl[1]}="$vl[3]"; my $cc=0; foreach my $v (@vl) { $cc++; $v=~s/\[\d+\]/ /g; $clan_decode{$vl[1]}.="$v " if length($v) >=30 && $cc<=10; } } close(IN); open(IN,"$prot_file"); while(<IN>) { if ($_=~/^>(.*)/) { $id=$1; $id=(split(/\s+/,$id))[0]; }else{ chomp; $c{$id}.=$_; } } close(IN); foreach my $s (@search_T) { $searchP.="$s "; } open(OUT,">$prefix"); print OUT "<html>\n<head></head>\n<body>\n"; my $color="\"#czb9dz\""; my %printed; open(IN,$pfam_file); print OUT "Proteins with PFAM domains matching the keywords:\n<br><br>\n"; print OUT "<div>\n<table cellpadding=\"0\" width=650>\n"; my $ntokens=0; while(<IN>) { next if $_=~/^\#/; my ($name,$domain,$clan)=(split(/\s+/))[0,5,-1]; next unless $name; next unless $domain; $domain=~s/\.\d+//; my $sd=$decode{$domain} ? $decode{$domain} : "MagnottaPantaleo§§"; my $sc=$clan_decode{$clan} ? $clan_decode{$clan} : "SciarrattaCalogero@@"; my $continue=match($searchP,$sd,$sc); #print "$name $domain $clan $sd $sc\n"; next unless $continue; unless ($printed{$name}) { my $seq=$c{$name}; $seq=~s/\*//g; $seq=form($seq,90); print OUT "<td>\n"; print OUT "<HR SIZE=3 WIDTH=80%>\n"; print OUT "<center><b>$name</b><br>\n</center>\n"; print OUT "</td\n<tr></tr>\n"; print OUT "<td bgcolor=$color>\n"; print OUT "<pre> \n$seq\n </pre>\n"; print OUT "</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n"; $ntokens=2; } my $hd=uc $domain; #<a href="http://www.canoro.altervista.org/" class="nav" target="_blank">www.canoro.altervista.org</a> if ($decode{$domain}) { my $ddes=$decode{$domain}; if ($ntokens % 2==0) { print OUT "<td>\n"; }else{ print OUT "<td bgcolor=$color>\n"; } print OUT "<p align=\"left\">\n"; print OUT "<a href=http://pfam.xfam.org/family/$hd> $domain</a>\n<p align=\"justify\">$ddes</p>\n\n"; print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n"; $ntokens++; } if ($clan_decode{$clan}) { my $clanD=$clan_decode{$clan}; my $ddes=$decode{$domain}; if ($ntokens % 2==0) { print OUT "<td>\n"; }else{ print OUT "<td bgcolor=$color>\n"; } print OUT "<p align=\"left\">\n"; print OUT "<a href=http://pfam.xfam.org/clan/$clan> $clan</a>\n <p align=\"justify\">$clanD</p>\n\n"; print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n"; $ntokens++; } $printed{$name}=1; } #print OUT "<br><br>Proteins without PFAM domains:\n<br>\n"; #foreach my $seq (keys %c) #{ # next if $printed{$seq}; # print OUT "<>$seq</pre>\n\n<br><br><left>\n$c{$seq}</left><br>\n"; # print OUT "<HR SIZE=3 WIDTH=80%>\n"; #} print OUT "</table>\n</div>\n</body>\n</html>\n"; close(OUT); sub form { my $string=$_[0]; my $len=$_[1]; my $outS=""; my @vl=split('',$string); for (my $i=1;$i<=$#vl;$i++) { if ($i % $len==0 && $i>0) { $outS.="$vl[$i-1]\n"; }else{ $outS.=$vl[$i-1]; } } $outS.="\n"; $outS=~s/ //g; return $outS; } sub match { my $terms=$_[0]; my $d1=$_[1]; my $d2=$_[2]; $terms=~s/AND/ /g; my @t1s=(split(/OR/,$terms)); my $print_out=0; foreach my $t (@t1s) { #print "$t\n"; last if $print_out==1; #OR non serve se 1 è verificata; #permute my @vl=(split(/\s+/,$t)); my $nm1=0; my $nm2=0; foreach my $v (@vl) { $nm1++ if ($d1=~/$v/i); $nm2++ if ($d1=~/$v/i); } $print_out=1 if ($nm1==($#vl+1)) || ($nm2==($#vl+1)); } return $print_out; }