Mercurial > repos > matteoc > agame_custom_tools
diff pfam_annot/annota.pl @ 0:68a3648c7d91 draft default tip
Uploaded
author | matteoc |
---|---|
date | Thu, 22 Dec 2016 04:45:31 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pfam_annot/annota.pl Thu Dec 22 04:45:31 2016 -0500 @@ -0,0 +1,221 @@ +#!/usr/bin/perl -w + +use strict; +my $d_file="/home/inmare/galaxy/tools/pfam_annot/pfamA.txt"; +open(IN,$d_file); +my %decode=(); +my %clan_decode; +my $id=""; +my %c=(); + + +my $prot_file=shift; +my $pfam_file=shift; +my $prefix=shift; + +while(<IN>) +{ + if ($_=~/^\d/) + { + my @vl=(split(/\t+/)); + $decode{$vl[1]}="$vl[3]<br>";#$vl[8] $vl[9]"; + my $cc=0; + my %repeated=(); + foreach my $v (@vl) + { + $v=~s/\[\d+\]/ /g; + last if $v=~/hmmbuild/; + last if $cc>10; + next if $v=~/anon/; + next if $v=~/Bates/; + next if $v=~/Cogis/; + next if $v=~/Coggis/; + next if $v=~/Bateman/; + next if $v=~/Sonnhammer/; + next if $v=~/Finn/; + next if $v=~/Studholme/; + next if $v=~/Kerrison/; + next if $repeated{$v}; + next if $v eq $vl[3]; + next unless length($v)>=20 && $cc<=9; + $decode{$vl[1]}.="$v "; + $repeated{$v}++; + $cc++; + } + #print "$vl[1] $decode{$vl[1]}\n"; + } +} +close(IN); + +my $clan_file="/home/inmare/galaxy/tools/pfam_annot/clans.txt"; +open(IN,$clan_file); +while(<IN>) +{ + my @vl=(split(/\t/)); + #$clan_decode{$vl[1]}="$vl[3]"; + my $cc=0; + foreach my $v (@vl) + { + $cc++; + $v=~s/\[\d+\]/ /g; + $clan_decode{$vl[1]}.="$v " if length($v) >=30 && $cc<=10; + } + +} +my %plasm=(); +open(IN,"$prot_file"); +while(<IN>) +{ + if ($_=~/^>(.*)/) + { + $id=$1; + $id=(split(/\s+/,$id))[0]; + if ($id=~/#/) + { + my $pid=(split(/\#/,$id))[0]; + $plasm{$pid}++; + } + }else{ + chomp; + $c{$id}.=$_; + } +} +close(IN); + +open(OUT,">$prefix"); +print OUT "<html>\n<head>\n"; +print OUT "<style type=\"text/css\">\nspan {\n\ttext-decoration:underline;\n\tcolor:blue;\n\tcursor:pointer;\n}\n</style>\n"; +print OUT "<script>\nfunction show(elementID) {\n\tvar ele = document.getElementById(elementID);\n\tif (!ele) {\n\t\talert(\"no such element\");\t\treturn;\n\t}\n\tvar pages = document.getElementsByClassName('page');\n\tfor(var i = 0; i < pages.length; i++) {\n\t\tpages[i].style.display = 'none';\n\t}\n\tele.style.display = 'block';\n}\n</script>\n"; +#print OUT "<script src=\"script.js\"></script>\n"; +#print OUT "<script>\n\tshow(elementID)\n</script>\n"; +print OUT "</head>\n"; +my $color="\"#czb9dz\""; +my %printed; +open(IN,$pfam_file); +print OUT "Proteins with PFAM domains:\n<br><br>\n"; +my @dd=keys %plasm; +if ($#dd>0) +{ + print OUT "<p>Show results "; + for (my $i=0;$i<=$#dd;$i++) + { + if ($i==$#dd) + { + print OUT "<span onclick=\"show(\'$dd[$i]\');\">$dd[$i]</span>.</p>\n"; + }else{ + print OUT "<span onclick=\"show(\'$dd[$i]\');\">$dd[$i]</span>,\n"; + } + } +}else{ + print OUT "<div>\n<table cellpadding=\"0\" width=650>\n"; #div per ogni plasmide| +} +my $ntokens=0; +my $prev_plasmid=""; +my $curr_plasmid=""; +my $np=0; +while(<IN>) +{ + next if $_=~/^\#/; + my ($name,$domain,$clan)=(split(/\s+/))[0,5,-1]; + next unless $name; + if ($name=~/#/) + { + $curr_plasmid=(split(/\#/,$name))[0]; + if ($curr_plasmid ne $prev_plasmid) + { + if ($np>0) + { + print OUT "</table>\n"; + print OUT "</div>\n"; + } + print OUT "<div id=\"$curr_plasmid\" class=\"page\" style=\"\">\n"; + print OUT "<table cellpadding=\"0\" width=650>\n"; + $np++; + } + $prev_plasmid=$curr_plasmid; + } + + $domain=~s/\.\d+//; + unless ($printed{$name}) + { + my $seq=$c{$name}; + + $seq=~s/\*//g; + $seq=form($seq,90); + print OUT "<td>\n"; + print OUT "<HR SIZE=3 WIDTH=80%></HR>\n"; + print OUT "<center><b>$name</b><br>\n</center>\n"; + print OUT "</td>\n<tr></tr>\n"; + print OUT "<td bgcolor=$color>\n"; + print OUT "<pre> \n$seq\n </pre>\n"; + print OUT "</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n"; + $ntokens=2; + } + my $hd=uc $domain; + #<a href="http://www.canoro.altervista.org/" class="nav" target="_blank">www.canoro.altervista.org</a> + if ($decode{$domain}) + { + my $ddes=$decode{$domain}; + $ddes=~s/\s+/ /g; + if ($ntokens % 2==0) + { + print OUT "<td>\n"; + }else{ + print OUT "<td bgcolor=$color>\n"; + } + + print OUT "<p align=\"left\">\n"; + print OUT "<a href=http://pfam.xfam.org/family/$hd> $domain</a>\n<p align=\"justify\">$ddes</p>\n\n"; + print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n"; + $ntokens++; + } + if ($clan_decode{$clan}) + { + my $clanD=$clan_decode{$clan}; + $clanD=~s/\s+/ /g; + next if $decode{$domain} eq $clan_decode{$clan}; + my $ddes=$decode{$domain}; + if ($ntokens % 2==0) + { + print OUT "<td>\n"; + }else{ + print OUT "<td bgcolor=$color>\n"; + } + + print OUT "<p align=\"left\">\n"; + print OUT "<a href=http://pfam.xfam.org/clan/$clan> $clan</a>\n <p align=\"justify\">$clanD</p>\n\n"; + print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n"; + $ntokens++; + } + $printed{$name}=1; +} + +#if ($curr_plasmid ne "") +#{ + print OUT "</table>\n"; + print OUT "</div>\n"; + print OUT "</body>\n"; +#} +#print OUT "<br><br>Proteins without PFAM domains:\n<br>\n"; +#foreach my $seq (keys %c) +#{ +# next if $printed{$seq}; +# print OUT "<>$seq</pre>\n\n<br><br><left>\n$c{$seq}</left><br>\n"; +# print OUT "<HR SIZE=3 WIDTH=80%>\n"; +#} +#print OUT "</table>\n</div>\n</body>\n</html>\n"; +#close(OUT); + +sub form +{ + my $string=$_[0]; + my $len=$_[1]; + my $outS=""; + for (my $i=0;$i<=length($string);$i+=$len) + { + $outS.=substr($string,$i,$len)."\n"; + } + #print "A:$outS"; + #$outS=~s/\s+//g; + return $outS; +}