Mercurial > repos > matteoc > agame_custom_tools
comparison pfamScan/Bio/Pfam/HMM/HMMIO.pm @ 0:68a3648c7d91 draft default tip
Uploaded
| author | matteoc |
|---|---|
| date | Thu, 22 Dec 2016 04:45:31 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:68a3648c7d91 |
|---|---|
| 1 # HMM.pm | |
| 2 # | |
| 3 # Author: finnr | |
| 4 # Maintainer: $Id: HMMIO.pm,v 1.3 2010-01-12 17:00:26 jm14 Exp $ | |
| 5 # Version: $Revision: 1.3 $ | |
| 6 # Created: Nov 24, 2008 | |
| 7 # Last Modified: $Date: 2010-01-12 17:00:26 $ | |
| 8 =head1 NAME | |
| 9 | |
| 10 Template - a short description of the class | |
| 11 | |
| 12 =cut | |
| 13 | |
| 14 package Bio::Pfam::HMM::HMMIO; | |
| 15 | |
| 16 =head1 DESCRIPTION | |
| 17 | |
| 18 A more detailed description of what this class does and how it does it. | |
| 19 | |
| 20 $Id: HMMIO.pm,v 1.3 2010-01-12 17:00:26 jm14 Exp $ | |
| 21 | |
| 22 =head1 COPYRIGHT | |
| 23 | |
| 24 File: HMM.pm | |
| 25 | |
| 26 Copyright (c) 2007: Genome Research Ltd. | |
| 27 | |
| 28 Authors: Rob Finn (rdf@sanger.ac.uk), John Tate (jt6@sanger.ac.uk) | |
| 29 | |
| 30 This is free software; you can redistribute it and/or | |
| 31 modify it under the terms of the GNU General Public License | |
| 32 as published by the Free Software Foundation; either version 2 | |
| 33 of the License, or (at your option) any later version. | |
| 34 | |
| 35 This program is distributed in the hope that it will be useful, | |
| 36 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 37 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 38 GNU General Public License for more details. | |
| 39 | |
| 40 You should have received a copy of the GNU General Public License | |
| 41 along with this program; if not, write to the Free Software | |
| 42 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
| 43 or see the on-line version at http://www.gnu.org/copyleft/gpl.txt | |
| 44 | |
| 45 =cut | |
| 46 | |
| 47 use strict; | |
| 48 use warnings; | |
| 49 | |
| 50 use Moose; | |
| 51 use Moose::Util::TypeConstraints; | |
| 52 use Carp; | |
| 53 use Bio::Pfam::HMM::HMM; | |
| 54 | |
| 55 #------------------------------------------------------------------------------- | |
| 56 | |
| 57 =head1 METHODS | |
| 58 | |
| 59 =cut | |
| 60 sub readHMM { | |
| 61 my ($this, $hmm) = @_; | |
| 62 | |
| 63 unless($hmm){ | |
| 64 confess("No HMM passed in!"); | |
| 65 } | |
| 66 chomp($hmm); | |
| 67 | |
| 68 my @input; | |
| 69 if(ref($hmm) eq 'GLOB'){ | |
| 70 @input = <$hmm>; | |
| 71 }elsif($hmm !~ /\n/ and -e $hmm and -s $hmm){ | |
| 72 #Assume that we have a filename and try and open it; | |
| 73 open(HMM, $hmm) || confess("Could not open $hmm:[$!]"); | |
| 74 @input = <HMM>; | |
| 75 }else{ | |
| 76 @input = split(/\n/, $hmm); | |
| 77 } | |
| 78 | |
| 79 | |
| 80 | |
| 81 #Parse the header section! | |
| 82 #HMMER3/f [3.1b1 | May 2013] | |
| 83 #NAME SEED | |
| 84 #ACC PF000001.1 | |
| 85 #DESC A description | |
| 86 #LENG 55 | |
| 87 #ALPH amino | |
| 88 #RF no | |
| 89 #MM no | |
| 90 #CONS yes | |
| 91 #CS no | |
| 92 #MAP yes | |
| 93 #DATE Fri Nov 21 09:58:16 2008 | |
| 94 #COM [1] /Users/finnr/Work/Software/hmmer-3.0.20081101/bin/hmmbuild -o hmmbuild.log HMM SEED | |
| 95 #NSEQ 279 | |
| 96 #EFFN 4.966292 | |
| 97 #STATS LOCAL MSV -11.4716 0.69948 | |
| 98 #STATS LOCAL VITERBI -12.3713 0.69948 | |
| 99 #STATS LOCAL FORWARD -5.5807 0.69948 | |
| 100 | |
| 101 #To add GA, TC, NC, CKSUM, DESC | |
| 102 my($objHash); | |
| 103 my $i =0; | |
| 104 foreach ( @input ){ | |
| 105 if(my ($version) = $_ =~ /(HMMER3.*)/){ | |
| 106 $objHash->{version} = $version; | |
| 107 }elsif(my ($acc) = $_ =~ /^ACC\s+(PF\d+\.\d+)$/){ | |
| 108 $objHash->{accession} = $acc; | |
| 109 }elsif(/NAME\s+(\S+)/){ | |
| 110 $objHash->{name} = $1 ; | |
| 111 }elsif(/DESC\s+(.*)/){ | |
| 112 $objHash->{description} = $1 ; | |
| 113 }elsif(my ($length) = $_ =~ /^LENG\s+(\d+)/){ | |
| 114 $objHash->{length} = $length; | |
| 115 }elsif( my ($alpha) = $_ =~ /^ALPH\s+(\S+)/){ | |
| 116 $objHash->{alpha} = $alpha; | |
| 117 }elsif( my ($rf) = $_ =~ /^RF\s+(no|yes)/){ | |
| 118 $objHash->{rf} = ($rf eq "no") ? 0 : 1; | |
| 119 }elsif( my ($mm) = $_ =~ /^MM\s+(no|yes)/){ | |
| 120 $objHash->{mm} = ($mm eq "no") ? 0 : 1; | |
| 121 }elsif( my ($cons) = $_ =~ /^CONS\s+(no|yes)/){ | |
| 122 $objHash->{cons} = ($cons eq "no") ? 0 : 1; | |
| 123 }elsif(my ($cs) = $_ =~ /^CS\s+(no|yes)/ ){ | |
| 124 $objHash->{cs} = ($cs eq "no") ? 0 : 1; | |
| 125 }elsif(my ($map) = $_ =~ /^MAP\s+(no|yes)/){ | |
| 126 $objHash->{map} = ($map eq "no") ? 0 : 1; | |
| 127 }elsif(my ($date) = $_ =~ /^DATE\s+(.*)/){ | |
| 128 $objHash->{date} = $date; | |
| 129 }elsif(my ($sm) = $_ =~ /^SM\s+(.*)/){ | |
| 130 $objHash->{searchMethod} = $sm; | |
| 131 | |
| 132 }elsif(my ($options, $hmmName, $alignName) = $input[$i] =~ /^BM.*hmmbuild(.*)? (\S+) (\S+)$/){ | |
| 133 $objHash->{buildLine} = { cmd => 'hmmbuild', | |
| 134 options => $options, | |
| 135 name => $hmmName, | |
| 136 align => $alignName } ; | |
| 137 }elsif(my($noSeqs) = $_ =~ /^NSEQ\s+(\d+)/){ | |
| 138 $objHash->{nSeq} = $noSeqs; | |
| 139 }elsif( my($effn) = $_ =~ /^EFFN\s+(\d+\.\d+)/){ | |
| 140 #EFFN 4.966292 | |
| 141 $objHash->{effn} = $effn ; | |
| 142 }elsif( my ( $cksum ) = $_ =~ /^CKSUM (\d+)/){ | |
| 143 $objHash->{cksum} = $cksum ; | |
| 144 }elsif(/GA\s+(\S+)\s+(\S+)\;/){ | |
| 145 $objHash->{seqGA} = $1; | |
| 146 $objHash->{domGA} = $2; | |
| 147 }elsif(/TC\s+(\S+)\s+(\S+)\;/){ | |
| 148 $objHash->{seqTC} = $1; | |
| 149 $objHash->{domTC} = $2; | |
| 150 }elsif(/NC\s+(\S+)\s+(\S+)\;/){ | |
| 151 $objHash->{seqNC} = $1; | |
| 152 $objHash->{domNC} = $2; | |
| 153 }elsif( my ($msv_mu, $msv_lambda ) = $_ =~ /^STATS LOCAL MSV\s+(\S+)\s+(0\.\d+)/){ | |
| 154 $objHash->{msvStats} = { mu => $msv_mu, lambda => $msv_lambda}; | |
| 155 }elsif( my ($viterbi_mu, $viterbi_lambda ) = $_ =~ /^STATS LOCAL VITERBI\s+(\S+)\s+(0\.\d+)/){ | |
| 156 $objHash->{viterbiStats} = { mu => $viterbi_mu, lambda => $viterbi_lambda }; | |
| 157 }elsif( my ($forward_tau, $forward_lambda ) = $_ =~ /^STATS LOCAL FORWARD\s+(\S+)\s+(0\.\d+)/){ | |
| 158 $objHash->{forwardStats} = {tau => $forward_tau, lambda => $forward_lambda}; | |
| 159 }elsif( $_ =~ /^HMM\s+A/){ | |
| 160 last; | |
| 161 }else{ | |
| 162 confess("Got a bad HMM header line:$input[$i]\n"); | |
| 163 } | |
| 164 $i++; | |
| 165 } | |
| 166 | |
| 167 my $hmmObj = Bio::Pfam::HMM::HMM->new($objHash); | |
| 168 | |
| 169 | |
| 170 #The next two lines are stand lines | |
| 171 #HMM A C D E F G H I K L M N P Q R S T V W Y | |
| 172 # m->m m->i m->d i->m i->i d->m d->d | |
| 173 $i++; | |
| 174 | |
| 175 | |
| 176 #Add the comp line | |
| 177 for( my $line = 0; $line <=2; $line++){ | |
| 178 $i++; | |
| 179 my @l = split(/\s+/, $input[$i]); | |
| 180 my @c; | |
| 181 if($line == 0 ){ | |
| 182 @c = @l[2..21]; | |
| 183 }elsif( $line == 1){ | |
| 184 @c = @l[1..20]; | |
| 185 }elsif($line == 2){ | |
| 186 @c = @l[1..7]; | |
| 187 } | |
| 188 $hmmObj->compLines->[$line] = \@c; | |
| 189 } | |
| 190 | |
| 191 | |
| 192 | |
| 193 for(my $pos = 0; $pos < $hmmObj->length; $pos++){ | |
| 194 #There are three lines per position - match emission line, insert emission line, state transition line | |
| 195 for( my $line = 0; $line <=2; $line++){ | |
| 196 $i++; | |
| 197 my @l = split(/\s+/, $input[$i]); | |
| 198 my @e; | |
| 199 if($line == 0 ){ | |
| 200 @e = @l[2..21]; | |
| 201 if($hmmObj->map){ | |
| 202 $hmmObj->mapPos->[$pos] = $l[22]; | |
| 203 } | |
| 204 }elsif( $line == 1){ | |
| 205 @e = @l[1..20]; | |
| 206 }elsif($line == 2){ | |
| 207 @e = @l[1..7]; | |
| 208 } | |
| 209 $hmmObj->emissionLines->[$pos]->[$line] = \@e; | |
| 210 } | |
| 211 } | |
| 212 | |
| 213 if($input[$i++] =~ /^\/\/$/){ | |
| 214 confess("Expected file terminator //, but got $input[$i]\n"); | |
| 215 } | |
| 216 | |
| 217 #No veryifiy that we have COMP line and the the number of emissionlines is equivalent to length | |
| 218 unless(scalar( @{ $hmmObj->emissionLines } ) == $hmmObj->length){ | |
| 219 confess("Number of emssionLines does not match the length of the model, got ".scalar( @{ $hmmObj->emissionLines} ). | |
| 220 " expected ".$hmmObj->length); | |
| 221 } | |
| 222 | |
| 223 unless($hmmObj->compLines){ | |
| 224 confess("No compLine set on HMM"); | |
| 225 } | |
| 226 | |
| 227 if($hmmObj->map){ | |
| 228 unless(scalar(@{$hmmObj->mapPos}) == $hmmObj->length ){ | |
| 229 confess("HMM object had map set, but the number of map positions does not match the length of the HMM"); | |
| 230 }; | |
| 231 } | |
| 232 return $hmmObj; | |
| 233 } | |
| 234 | |
| 235 | |
| 236 | |
| 237 sub writeHMM { | |
| 238 my ($this, $hmm, $hmmObj) = @_; | |
| 239 | |
| 240 unless($hmm){ | |
| 241 confess("No HMM out file passed in!"); | |
| 242 } | |
| 243 | |
| 244 unless(ref($hmm) eq 'GLOB'){ | |
| 245 my $hmmFile = $hmm; | |
| 246 $hmm = undef; | |
| 247 #Assume that we have a filename and try and open it; | |
| 248 open($hmm, ">$hmmFile") || confess("Could not open $hmmFile:[$!]"); | |
| 249 } | |
| 250 | |
| 251 print $hmm $hmmObj->version."\n"; | |
| 252 printf $hmm ("%-5s %s\n", "NAME", $hmmObj->name); | |
| 253 printf $hmm ("%-5s %s\n", "ACC", $hmmObj->accession) if($hmmObj->accession); | |
| 254 printf $hmm ("%-5s %s\n", "DESC", $hmmObj->description) if($hmmObj->description); | |
| 255 printf $hmm ("%-5s %d\n", "LENG", $hmmObj->length); | |
| 256 printf $hmm ("%-5s %s\n", "ALPH", $hmmObj->alpha); | |
| 257 printf $hmm ("%-5s %s\n", "RF", ($hmmObj->rf ? "yes" : "no")); | |
| 258 printf $hmm ("%-5s %s\n", "MM", ($hmmObj->mm ? "yes" : "no")); | |
| 259 printf $hmm ("%-5s %s\n", "CONS", ($hmmObj->cons ? "yes" : "no")); | |
| 260 printf $hmm ("%-5s %s\n", "CS", ($hmmObj->cs ? "yes" : "no")); | |
| 261 printf $hmm ("%-5s %s\n", "MAP", ($hmmObj->map ? "yes" : "no")); | |
| 262 printf $hmm ("%-5s %s\n", "DATE", $hmmObj->date); | |
| 263 printf $hmm ("%-5s %d\n", "NSEQ", $hmmObj->nSeq); | |
| 264 printf $hmm ("%-5s %f\n", "EFFN", $hmmObj->effn); | |
| 265 printf $hmm ("%-5s %d\n", "CKSUM", $hmmObj->cksum); | |
| 266 printf $hmm ("%-5s %.2f %.2f;\n", "GA", $hmmObj->seqGA, $hmmObj->domGA) if(defined($hmmObj->seqGA)); | |
| 267 printf $hmm ("%-5s %.2f %.2f;\n", "TC", $hmmObj->seqTC, $hmmObj->domTC) if(defined($hmmObj->seqTC)); | |
| 268 printf $hmm ("%-5s %.2f %.2f;\n", "NC", $hmmObj->seqNC, $hmmObj->domNC) if(defined($hmmObj->seqNC)); | |
| 269 | |
| 270 printf $hmm ("%-5s %s %-9s %.4f %.5f\n", "STATS", "LOCAL", "MSV", $hmmObj->msvStats->{mu}, $hmmObj->msvStats->{lambda}); | |
| 271 printf $hmm ("%-5s %s %-9s %.4f %.5f\n", "STATS", "LOCAL", "VITERBI", $hmmObj->viterbiStats->{mu}, $hmmObj->viterbiStats->{lambda}); | |
| 272 printf $hmm ("%-5s %s %-9s %.4f %.5f\n", "STATS", "LOCAL", "FORWARD", $hmmObj->forwardStats->{tau}, $hmmObj->forwardStats->{lambda}); | |
| 273 | |
| 274 print $hmm <<EOF; | |
| 275 HMM A C D E F G H I K L M N P Q R S T V W Y | |
| 276 m->m m->i m->d i->m i->i d->m d->d | |
| 277 EOF | |
| 278 | |
| 279 printf $hmm ("%7s ", "COMPO"); | |
| 280 foreach my $s (@{$hmmObj->compLines->[0]}){ | |
| 281 printf $hmm (" %7s", $s); | |
| 282 } | |
| 283 print $hmm "\n"; | |
| 284 | |
| 285 print $hmm (" " x 8); | |
| 286 foreach my $s (@{$hmmObj->compLines->[1]}){ | |
| 287 printf $hmm (" %7s", $s); | |
| 288 } | |
| 289 print $hmm "\n"; | |
| 290 | |
| 291 print $hmm (" " x 8); | |
| 292 foreach my $s (@{$hmmObj->compLines->[2]}){ | |
| 293 printf $hmm (" %7s", $s); | |
| 294 } | |
| 295 print $hmm "\n"; | |
| 296 | |
| 297 | |
| 298 my $pos = 1; | |
| 299 foreach my $el (@{ $hmmObj->emissionLines }){ | |
| 300 printf $hmm ("%7s ", $pos); | |
| 301 foreach my $s (@{$el->[0]}){ | |
| 302 printf $hmm (" %7s", $s); | |
| 303 } | |
| 304 if($hmmObj->map){ | |
| 305 printf $hmm ("%7s - -\n", $hmmObj->mapPos->[$pos - 1]); | |
| 306 }else{ | |
| 307 print $hmm "\n"; | |
| 308 } | |
| 309 print $hmm (" " x 8); | |
| 310 foreach my $s (@{$el->[1]}){ | |
| 311 printf $hmm (" %7s", $s); | |
| 312 } | |
| 313 print $hmm "\n"; | |
| 314 print $hmm (" " x 8); | |
| 315 foreach my $s (@{$el->[2]}){ | |
| 316 printf $hmm (" %7s", $s); | |
| 317 } | |
| 318 print $hmm "\n"; | |
| 319 $pos++; | |
| 320 } | |
| 321 | |
| 322 | |
| 323 print $hmm "//\n"; | |
| 324 } | |
| 325 | |
| 326 | |
| 327 | |
| 328 1; | |
| 329 |
