view pfam_annot/annota.pl @ 0:68a3648c7d91 draft default tip

Uploaded
author matteoc
date Thu, 22 Dec 2016 04:45:31 -0500
parents
children
line wrap: on
line source

#!/usr/bin/perl -w

use strict;
my $d_file="/home/inmare/galaxy/tools/pfam_annot/pfamA.txt";
open(IN,$d_file);
my %decode=();
my %clan_decode;
my $id="";
my %c=();


my $prot_file=shift;
my $pfam_file=shift;
my $prefix=shift;

while(<IN>)
{
        if ($_=~/^\d/)
        {
                my @vl=(split(/\t+/));
		$decode{$vl[1]}="$vl[3]<br>";#$vl[8] $vl[9]";
                my $cc=0;
		my %repeated=();
		foreach my $v (@vl)
                {
			$v=~s/\[\d+\]/ /g;
			last if $v=~/hmmbuild/;
			last if $cc>10;
			next if $v=~/anon/;
			next if $v=~/Bates/;
			next if $v=~/Cogis/;
			next if $v=~/Coggis/;
			next if $v=~/Bateman/;
			next if $v=~/Sonnhammer/;
			next if $v=~/Finn/;
			next if $v=~/Studholme/;
			next if $v=~/Kerrison/;
			next if $repeated{$v};
			next if $v eq $vl[3];
			next unless length($v)>=20 && $cc<=9;
			$decode{$vl[1]}.="$v ";
                	$repeated{$v}++;
			$cc++;
		}
		#print "$vl[1] $decode{$vl[1]}\n";
        }
}
close(IN);

my $clan_file="/home/inmare/galaxy/tools/pfam_annot/clans.txt";
open(IN,$clan_file);
while(<IN>)
{
        my @vl=(split(/\t/));
       	#$clan_decode{$vl[1]}="$vl[3]";
	my $cc=0;
        foreach my $v (@vl)
	{
		$cc++;
		$v=~s/\[\d+\]/ /g;
		$clan_decode{$vl[1]}.="$v " if length($v) >=30 && $cc<=10;
	}	

}
my %plasm=();
open(IN,"$prot_file");
while(<IN>)
{
        if ($_=~/^>(.*)/)
        {
                $id=$1;	
		$id=(split(/\s+/,$id))[0];
		if ($id=~/#/)
		{
			my $pid=(split(/\#/,$id))[0];
			$plasm{$pid}++;
		}
        }else{
		chomp;
                $c{$id}.=$_;
        }
}
close(IN);

open(OUT,">$prefix");
print OUT "<html>\n<head>\n";
print OUT "<style type=\"text/css\">\nspan {\n\ttext-decoration:underline;\n\tcolor:blue;\n\tcursor:pointer;\n}\n</style>\n";
print OUT "<script>\nfunction show(elementID) {\n\tvar ele = document.getElementById(elementID);\n\tif (!ele) {\n\t\talert(\"no such element\");\t\treturn;\n\t}\n\tvar pages = document.getElementsByClassName('page');\n\tfor(var i = 0; i < pages.length; i++) {\n\t\tpages[i].style.display = 'none';\n\t}\n\tele.style.display = 'block';\n}\n</script>\n";
#print OUT "<script src=\"script.js\"></script>\n";
#print OUT "<script>\n\tshow(elementID)\n</script>\n";
print OUT "</head>\n";
my $color="\"#czb9dz\"";
my %printed;
open(IN,$pfam_file);
print OUT "Proteins with PFAM domains:\n<br><br>\n";
my @dd=keys %plasm;
if ($#dd>0)
{
	print OUT "<p>Show results ";
	for (my $i=0;$i<=$#dd;$i++)
	{
		if ($i==$#dd)
		{
			print OUT "<span onclick=\"show(\'$dd[$i]\');\">$dd[$i]</span>.</p>\n";
		}else{
			print OUT "<span onclick=\"show(\'$dd[$i]\');\">$dd[$i]</span>,\n"; 
		}
	}
}else{
	print OUT "<div>\n<table cellpadding=\"0\" width=650>\n"; #div per ogni plasmide|
}
my $ntokens=0;
my $prev_plasmid="";
my $curr_plasmid="";
my $np=0;
while(<IN>)
{
        next if $_=~/^\#/;
        my ($name,$domain,$clan)=(split(/\s+/))[0,5,-1];
        next unless $name;
	if ($name=~/#/)
        {
        	$curr_plasmid=(split(/\#/,$name))[0];
		if ($curr_plasmid ne $prev_plasmid)
		{
			if ($np>0)
			{
				print OUT "</table>\n";
				print OUT "</div>\n";
			}
			print OUT "<div id=\"$curr_plasmid\" class=\"page\" style=\"\">\n";
			print OUT "<table cellpadding=\"0\" width=650>\n";
			$np++;
		}
		$prev_plasmid=$curr_plasmid;   
        }

        $domain=~s/\.\d+//;
        unless ($printed{$name})
	{
		my $seq=$c{$name};
		
		$seq=~s/\*//g;
		$seq=form($seq,90);
		print OUT "<td>\n";
		print OUT "<HR SIZE=3 WIDTH=80%></HR>\n";
		print OUT "<center><b>$name</b><br>\n</center>\n";
		print OUT "</td>\n<tr></tr>\n";
                print OUT "<td bgcolor=$color>\n";
		print OUT "<pre> \n$seq\n </pre>\n";
		print OUT "</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n";
		$ntokens=2;
	}
	my $hd=uc $domain;
	#<a href="http://www.canoro.altervista.org/" class="nav" target="_blank">www.canoro.altervista.org</a>
	if ($decode{$domain})
	{
		my $ddes=$decode{$domain};
		$ddes=~s/\s+/ /g;
		if ($ntokens % 2==0)
                {
                        print OUT "<td>\n";
                }else{
                        print OUT "<td bgcolor=$color>\n";
                }
	
		print OUT "<p align=\"left\">\n";
        	print OUT "<a href=http://pfam.xfam.org/family/$hd> $domain</a>\n<p align=\"justify\">$ddes</p>\n\n";
		print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n";
		$ntokens++;
	}
        if ($clan_decode{$clan})
        {
                my $clanD=$clan_decode{$clan};
		$clanD=~s/\s+/ /g;
		next if $decode{$domain} eq $clan_decode{$clan};
		my $ddes=$decode{$domain};
                if ($ntokens % 2==0)
                {
                        print OUT "<td>\n";
                }else{
                        print OUT "<td bgcolor=$color>\n";
                }

                print OUT "<p align=\"left\">\n";
		print OUT "<a href=http://pfam.xfam.org/clan/$clan> $clan</a>\n <p align=\"justify\">$clanD</p>\n\n";
		print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n";
		$ntokens++;
        }
        $printed{$name}=1;
}

#if ($curr_plasmid ne "")
#{
	print OUT "</table>\n";
	print OUT "</div>\n";
	print OUT "</body>\n";
#}
#print OUT "<br><br>Proteins without PFAM domains:\n<br>\n";
#foreach my $seq (keys %c)
#{
#        next if $printed{$seq};
#        print OUT "<>$seq</pre>\n\n<br><br><left>\n$c{$seq}</left><br>\n";
#	print OUT "<HR SIZE=3 WIDTH=80%>\n";
#}
#print OUT "</table>\n</div>\n</body>\n</html>\n";
#close(OUT);

sub form
{
	my $string=$_[0];
	my $len=$_[1];
	my $outS="";
	for (my $i=0;$i<=length($string);$i+=$len)
	{
		$outS.=substr($string,$i,$len)."\n";
	}
	#print "A:$outS";
	#$outS=~s/\s+//g;
	return $outS;
}