Mercurial > repos > matteoc > agame_custom_tools
comparison pfam_annot/annota.pl @ 0:68a3648c7d91 draft default tip
Uploaded
| author | matteoc |
|---|---|
| date | Thu, 22 Dec 2016 04:45:31 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:68a3648c7d91 |
|---|---|
| 1 #!/usr/bin/perl -w | |
| 2 | |
| 3 use strict; | |
| 4 my $d_file="/home/inmare/galaxy/tools/pfam_annot/pfamA.txt"; | |
| 5 open(IN,$d_file); | |
| 6 my %decode=(); | |
| 7 my %clan_decode; | |
| 8 my $id=""; | |
| 9 my %c=(); | |
| 10 | |
| 11 | |
| 12 my $prot_file=shift; | |
| 13 my $pfam_file=shift; | |
| 14 my $prefix=shift; | |
| 15 | |
| 16 while(<IN>) | |
| 17 { | |
| 18 if ($_=~/^\d/) | |
| 19 { | |
| 20 my @vl=(split(/\t+/)); | |
| 21 $decode{$vl[1]}="$vl[3]<br>";#$vl[8] $vl[9]"; | |
| 22 my $cc=0; | |
| 23 my %repeated=(); | |
| 24 foreach my $v (@vl) | |
| 25 { | |
| 26 $v=~s/\[\d+\]/ /g; | |
| 27 last if $v=~/hmmbuild/; | |
| 28 last if $cc>10; | |
| 29 next if $v=~/anon/; | |
| 30 next if $v=~/Bates/; | |
| 31 next if $v=~/Cogis/; | |
| 32 next if $v=~/Coggis/; | |
| 33 next if $v=~/Bateman/; | |
| 34 next if $v=~/Sonnhammer/; | |
| 35 next if $v=~/Finn/; | |
| 36 next if $v=~/Studholme/; | |
| 37 next if $v=~/Kerrison/; | |
| 38 next if $repeated{$v}; | |
| 39 next if $v eq $vl[3]; | |
| 40 next unless length($v)>=20 && $cc<=9; | |
| 41 $decode{$vl[1]}.="$v "; | |
| 42 $repeated{$v}++; | |
| 43 $cc++; | |
| 44 } | |
| 45 #print "$vl[1] $decode{$vl[1]}\n"; | |
| 46 } | |
| 47 } | |
| 48 close(IN); | |
| 49 | |
| 50 my $clan_file="/home/inmare/galaxy/tools/pfam_annot/clans.txt"; | |
| 51 open(IN,$clan_file); | |
| 52 while(<IN>) | |
| 53 { | |
| 54 my @vl=(split(/\t/)); | |
| 55 #$clan_decode{$vl[1]}="$vl[3]"; | |
| 56 my $cc=0; | |
| 57 foreach my $v (@vl) | |
| 58 { | |
| 59 $cc++; | |
| 60 $v=~s/\[\d+\]/ /g; | |
| 61 $clan_decode{$vl[1]}.="$v " if length($v) >=30 && $cc<=10; | |
| 62 } | |
| 63 | |
| 64 } | |
| 65 my %plasm=(); | |
| 66 open(IN,"$prot_file"); | |
| 67 while(<IN>) | |
| 68 { | |
| 69 if ($_=~/^>(.*)/) | |
| 70 { | |
| 71 $id=$1; | |
| 72 $id=(split(/\s+/,$id))[0]; | |
| 73 if ($id=~/#/) | |
| 74 { | |
| 75 my $pid=(split(/\#/,$id))[0]; | |
| 76 $plasm{$pid}++; | |
| 77 } | |
| 78 }else{ | |
| 79 chomp; | |
| 80 $c{$id}.=$_; | |
| 81 } | |
| 82 } | |
| 83 close(IN); | |
| 84 | |
| 85 open(OUT,">$prefix"); | |
| 86 print OUT "<html>\n<head>\n"; | |
| 87 print OUT "<style type=\"text/css\">\nspan {\n\ttext-decoration:underline;\n\tcolor:blue;\n\tcursor:pointer;\n}\n</style>\n"; | |
| 88 print OUT "<script>\nfunction show(elementID) {\n\tvar ele = document.getElementById(elementID);\n\tif (!ele) {\n\t\talert(\"no such element\");\t\treturn;\n\t}\n\tvar pages = document.getElementsByClassName('page');\n\tfor(var i = 0; i < pages.length; i++) {\n\t\tpages[i].style.display = 'none';\n\t}\n\tele.style.display = 'block';\n}\n</script>\n"; | |
| 89 #print OUT "<script src=\"script.js\"></script>\n"; | |
| 90 #print OUT "<script>\n\tshow(elementID)\n</script>\n"; | |
| 91 print OUT "</head>\n"; | |
| 92 my $color="\"#czb9dz\""; | |
| 93 my %printed; | |
| 94 open(IN,$pfam_file); | |
| 95 print OUT "Proteins with PFAM domains:\n<br><br>\n"; | |
| 96 my @dd=keys %plasm; | |
| 97 if ($#dd>0) | |
| 98 { | |
| 99 print OUT "<p>Show results "; | |
| 100 for (my $i=0;$i<=$#dd;$i++) | |
| 101 { | |
| 102 if ($i==$#dd) | |
| 103 { | |
| 104 print OUT "<span onclick=\"show(\'$dd[$i]\');\">$dd[$i]</span>.</p>\n"; | |
| 105 }else{ | |
| 106 print OUT "<span onclick=\"show(\'$dd[$i]\');\">$dd[$i]</span>,\n"; | |
| 107 } | |
| 108 } | |
| 109 }else{ | |
| 110 print OUT "<div>\n<table cellpadding=\"0\" width=650>\n"; #div per ogni plasmide| | |
| 111 } | |
| 112 my $ntokens=0; | |
| 113 my $prev_plasmid=""; | |
| 114 my $curr_plasmid=""; | |
| 115 my $np=0; | |
| 116 while(<IN>) | |
| 117 { | |
| 118 next if $_=~/^\#/; | |
| 119 my ($name,$domain,$clan)=(split(/\s+/))[0,5,-1]; | |
| 120 next unless $name; | |
| 121 if ($name=~/#/) | |
| 122 { | |
| 123 $curr_plasmid=(split(/\#/,$name))[0]; | |
| 124 if ($curr_plasmid ne $prev_plasmid) | |
| 125 { | |
| 126 if ($np>0) | |
| 127 { | |
| 128 print OUT "</table>\n"; | |
| 129 print OUT "</div>\n"; | |
| 130 } | |
| 131 print OUT "<div id=\"$curr_plasmid\" class=\"page\" style=\"\">\n"; | |
| 132 print OUT "<table cellpadding=\"0\" width=650>\n"; | |
| 133 $np++; | |
| 134 } | |
| 135 $prev_plasmid=$curr_plasmid; | |
| 136 } | |
| 137 | |
| 138 $domain=~s/\.\d+//; | |
| 139 unless ($printed{$name}) | |
| 140 { | |
| 141 my $seq=$c{$name}; | |
| 142 | |
| 143 $seq=~s/\*//g; | |
| 144 $seq=form($seq,90); | |
| 145 print OUT "<td>\n"; | |
| 146 print OUT "<HR SIZE=3 WIDTH=80%></HR>\n"; | |
| 147 print OUT "<center><b>$name</b><br>\n</center>\n"; | |
| 148 print OUT "</td>\n<tr></tr>\n"; | |
| 149 print OUT "<td bgcolor=$color>\n"; | |
| 150 print OUT "<pre> \n$seq\n </pre>\n"; | |
| 151 print OUT "</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n"; | |
| 152 $ntokens=2; | |
| 153 } | |
| 154 my $hd=uc $domain; | |
| 155 #<a href="http://www.canoro.altervista.org/" class="nav" target="_blank">www.canoro.altervista.org</a> | |
| 156 if ($decode{$domain}) | |
| 157 { | |
| 158 my $ddes=$decode{$domain}; | |
| 159 $ddes=~s/\s+/ /g; | |
| 160 if ($ntokens % 2==0) | |
| 161 { | |
| 162 print OUT "<td>\n"; | |
| 163 }else{ | |
| 164 print OUT "<td bgcolor=$color>\n"; | |
| 165 } | |
| 166 | |
| 167 print OUT "<p align=\"left\">\n"; | |
| 168 print OUT "<a href=http://pfam.xfam.org/family/$hd> $domain</a>\n<p align=\"justify\">$ddes</p>\n\n"; | |
| 169 print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n"; | |
| 170 $ntokens++; | |
| 171 } | |
| 172 if ($clan_decode{$clan}) | |
| 173 { | |
| 174 my $clanD=$clan_decode{$clan}; | |
| 175 $clanD=~s/\s+/ /g; | |
| 176 next if $decode{$domain} eq $clan_decode{$clan}; | |
| 177 my $ddes=$decode{$domain}; | |
| 178 if ($ntokens % 2==0) | |
| 179 { | |
| 180 print OUT "<td>\n"; | |
| 181 }else{ | |
| 182 print OUT "<td bgcolor=$color>\n"; | |
| 183 } | |
| 184 | |
| 185 print OUT "<p align=\"left\">\n"; | |
| 186 print OUT "<a href=http://pfam.xfam.org/clan/$clan> $clan</a>\n <p align=\"justify\">$clanD</p>\n\n"; | |
| 187 print OUT "</p>\n</td>\n<tr></tr>\n<td></td>\n<tr></tr>\n"; | |
| 188 $ntokens++; | |
| 189 } | |
| 190 $printed{$name}=1; | |
| 191 } | |
| 192 | |
| 193 #if ($curr_plasmid ne "") | |
| 194 #{ | |
| 195 print OUT "</table>\n"; | |
| 196 print OUT "</div>\n"; | |
| 197 print OUT "</body>\n"; | |
| 198 #} | |
| 199 #print OUT "<br><br>Proteins without PFAM domains:\n<br>\n"; | |
| 200 #foreach my $seq (keys %c) | |
| 201 #{ | |
| 202 # next if $printed{$seq}; | |
| 203 # print OUT "<>$seq</pre>\n\n<br><br><left>\n$c{$seq}</left><br>\n"; | |
| 204 # print OUT "<HR SIZE=3 WIDTH=80%>\n"; | |
| 205 #} | |
| 206 #print OUT "</table>\n</div>\n</body>\n</html>\n"; | |
| 207 #close(OUT); | |
| 208 | |
| 209 sub form | |
| 210 { | |
| 211 my $string=$_[0]; | |
| 212 my $len=$_[1]; | |
| 213 my $outS=""; | |
| 214 for (my $i=0;$i<=length($string);$i+=$len) | |
| 215 { | |
| 216 $outS.=substr($string,$i,$len)."\n"; | |
| 217 } | |
| 218 #print "A:$outS"; | |
| 219 #$outS=~s/\s+//g; | |
| 220 return $outS; | |
| 221 } |
