| 
0
 | 
     1 # HMM.pm
 | 
| 
 | 
     2 #
 | 
| 
 | 
     3 # Author:        finnr
 | 
| 
 | 
     4 # Maintainer:    $Id: HMMIO.pm,v 1.3 2010-01-12 17:00:26 jm14 Exp $
 | 
| 
 | 
     5 # Version:       $Revision: 1.3 $
 | 
| 
 | 
     6 # Created:       Nov 24, 2008
 | 
| 
 | 
     7 # Last Modified: $Date: 2010-01-12 17:00:26 $
 | 
| 
 | 
     8 =head1 NAME
 | 
| 
 | 
     9 
 | 
| 
 | 
    10 Template - a short description of the class
 | 
| 
 | 
    11 
 | 
| 
 | 
    12 =cut
 | 
| 
 | 
    13 
 | 
| 
 | 
    14 package Bio::Pfam::HMM::HMMIO;
 | 
| 
 | 
    15 
 | 
| 
 | 
    16 =head1 DESCRIPTION
 | 
| 
 | 
    17 
 | 
| 
 | 
    18 A more detailed description of what this class does and how it does it.
 | 
| 
 | 
    19 
 | 
| 
 | 
    20 $Id: HMMIO.pm,v 1.3 2010-01-12 17:00:26 jm14 Exp $
 | 
| 
 | 
    21 
 | 
| 
 | 
    22 =head1 COPYRIGHT
 | 
| 
 | 
    23 
 | 
| 
 | 
    24 File: HMM.pm
 | 
| 
 | 
    25 
 | 
| 
 | 
    26 Copyright (c) 2007: Genome Research Ltd.
 | 
| 
 | 
    27 
 | 
| 
 | 
    28 Authors: Rob Finn (rdf@sanger.ac.uk), John Tate (jt6@sanger.ac.uk)
 | 
| 
 | 
    29 
 | 
| 
 | 
    30  This is free software; you can redistribute it and/or
 | 
| 
 | 
    31  modify it under the terms of the GNU General Public License
 | 
| 
 | 
    32  as published by the Free Software Foundation; either version 2
 | 
| 
 | 
    33  of the License, or (at your option) any later version.
 | 
| 
 | 
    34  
 | 
| 
 | 
    35  This program is distributed in the hope that it will be useful,
 | 
| 
 | 
    36  but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
| 
 | 
    37  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
| 
 | 
    38  GNU General Public License for more details.
 | 
| 
 | 
    39  
 | 
| 
 | 
    40  You should have received a copy of the GNU General Public License
 | 
| 
 | 
    41  along with this program; if not, write to the Free Software
 | 
| 
 | 
    42  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
| 
 | 
    43  or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
 | 
| 
 | 
    44  
 | 
| 
 | 
    45 =cut
 | 
| 
 | 
    46 
 | 
| 
 | 
    47 use strict;
 | 
| 
 | 
    48 use warnings;
 | 
| 
 | 
    49 
 | 
| 
 | 
    50 use Moose;
 | 
| 
 | 
    51 use Moose::Util::TypeConstraints;
 | 
| 
 | 
    52 use Carp;
 | 
| 
 | 
    53 use Bio::Pfam::HMM::HMM;
 | 
| 
 | 
    54 
 | 
| 
 | 
    55 #-------------------------------------------------------------------------------
 | 
| 
 | 
    56 
 | 
| 
 | 
    57 =head1 METHODS
 | 
| 
 | 
    58 
 | 
| 
 | 
    59 =cut
 | 
| 
 | 
    60 sub readHMM {
 | 
| 
 | 
    61   my ($this, $hmm) = @_;
 | 
| 
 | 
    62   
 | 
| 
 | 
    63   unless($hmm){
 | 
| 
 | 
    64     confess("No HMM passed in!"); 
 | 
| 
 | 
    65   }
 | 
| 
 | 
    66   chomp($hmm);
 | 
| 
 | 
    67   
 | 
| 
 | 
    68   my @input;
 | 
| 
 | 
    69   if(ref($hmm) eq 'GLOB'){
 | 
| 
 | 
    70     @input = <$hmm>;
 | 
| 
 | 
    71   }elsif($hmm !~ /\n/ and -e $hmm and -s $hmm){
 | 
| 
 | 
    72     #Assume that we have a filename and try and open it;
 | 
| 
 | 
    73     open(HMM, $hmm) || confess("Could not open $hmm:[$!]");
 | 
| 
 | 
    74     @input = <HMM>;
 | 
| 
 | 
    75   }else{
 | 
| 
 | 
    76     @input = split(/\n/, $hmm); 
 | 
| 
 | 
    77   }
 | 
| 
 | 
    78   
 | 
| 
 | 
    79   
 | 
| 
 | 
    80   
 | 
| 
 | 
    81   #Parse the header section!  
 | 
| 
 | 
    82   #HMMER3/f [3.1b1 | May 2013]
 | 
| 
 | 
    83   #NAME  SEED
 | 
| 
 | 
    84   #ACC   PF000001.1
 | 
| 
 | 
    85   #DESC  A description
 | 
| 
 | 
    86   #LENG  55
 | 
| 
 | 
    87   #ALPH  amino
 | 
| 
 | 
    88   #RF    no
 | 
| 
 | 
    89   #MM    no
 | 
| 
 | 
    90   #CONS  yes
 | 
| 
 | 
    91   #CS    no
 | 
| 
 | 
    92   #MAP   yes
 | 
| 
 | 
    93   #DATE  Fri Nov 21 09:58:16 2008
 | 
| 
 | 
    94   #COM   [1] /Users/finnr/Work/Software/hmmer-3.0.20081101/bin/hmmbuild -o hmmbuild.log HMM SEED
 | 
| 
 | 
    95   #NSEQ  279
 | 
| 
 | 
    96   #EFFN  4.966292
 | 
| 
 | 
    97   #STATS LOCAL MSV      -11.4716  0.69948
 | 
| 
 | 
    98   #STATS LOCAL VITERBI  -12.3713  0.69948
 | 
| 
 | 
    99   #STATS LOCAL FORWARD   -5.5807  0.69948
 | 
| 
 | 
   100 
 | 
| 
 | 
   101   #To add GA, TC, NC, CKSUM, DESC
 | 
| 
 | 
   102   my($objHash);
 | 
| 
 | 
   103   my $i =0; 
 | 
| 
 | 
   104   foreach ( @input ){  
 | 
| 
 | 
   105     if(my ($version) = $_ =~ /(HMMER3.*)/){
 | 
| 
 | 
   106       $objHash->{version} = $version;
 | 
| 
 | 
   107     }elsif(my ($acc) = $_ =~ /^ACC\s+(PF\d+\.\d+)$/){
 | 
| 
 | 
   108       $objHash->{accession} = $acc;
 | 
| 
 | 
   109     }elsif(/NAME\s+(\S+)/){ 
 | 
| 
 | 
   110       $objHash->{name} =  $1 ;
 | 
| 
 | 
   111     }elsif(/DESC\s+(.*)/){ 
 | 
| 
 | 
   112       $objHash->{description} =   $1 ;
 | 
| 
 | 
   113     }elsif(my ($length) = $_ =~ /^LENG\s+(\d+)/){
 | 
| 
 | 
   114       $objHash->{length} = $length;
 | 
| 
 | 
   115     }elsif( my ($alpha) = $_ =~ /^ALPH\s+(\S+)/){
 | 
| 
 | 
   116       $objHash->{alpha} = $alpha;
 | 
| 
 | 
   117     }elsif( my ($rf) = $_ =~ /^RF\s+(no|yes)/){
 | 
| 
 | 
   118       $objHash->{rf} = ($rf eq "no") ? 0 : 1; 
 | 
| 
 | 
   119     }elsif( my ($mm) = $_ =~ /^MM\s+(no|yes)/){
 | 
| 
 | 
   120       $objHash->{mm} = ($mm eq "no") ? 0 : 1; 
 | 
| 
 | 
   121     }elsif( my ($cons) = $_ =~ /^CONS\s+(no|yes)/){
 | 
| 
 | 
   122       $objHash->{cons} = ($cons eq "no") ? 0 : 1; 
 | 
| 
 | 
   123     }elsif(my ($cs) = $_ =~ /^CS\s+(no|yes)/ ){
 | 
| 
 | 
   124       $objHash->{cs} =  ($cs eq "no") ? 0 : 1; 
 | 
| 
 | 
   125     }elsif(my ($map) = $_ =~ /^MAP\s+(no|yes)/){
 | 
| 
 | 
   126       $objHash->{map} = ($map eq "no") ? 0 : 1; 
 | 
| 
 | 
   127     }elsif(my ($date) = $_ =~ /^DATE\s+(.*)/){
 | 
| 
 | 
   128       $objHash->{date} =  $date; 
 | 
| 
 | 
   129     }elsif(my ($sm) = $_ =~ /^SM\s+(.*)/){
 | 
| 
 | 
   130       $objHash->{searchMethod} =  $sm; 
 | 
| 
 | 
   131     
 | 
| 
 | 
   132     }elsif(my ($options, $hmmName, $alignName) = $input[$i] =~ /^BM.*hmmbuild(.*)? (\S+) (\S+)$/){
 | 
| 
 | 
   133       $objHash->{buildLine} = { cmd     => 'hmmbuild', 
 | 
| 
 | 
   134                                 options => $options,
 | 
| 
 | 
   135                                 name    => $hmmName,
 | 
| 
 | 
   136                                 align   => $alignName } ;
 | 
| 
 | 
   137     }elsif(my($noSeqs) = $_ =~ /^NSEQ\s+(\d+)/){
 | 
| 
 | 
   138       $objHash->{nSeq} = $noSeqs;
 | 
| 
 | 
   139     }elsif( my($effn) = $_ =~ /^EFFN\s+(\d+\.\d+)/){
 | 
| 
 | 
   140        #EFFN  4.966292
 | 
| 
 | 
   141       $objHash->{effn} =  $effn ;
 | 
| 
 | 
   142     }elsif( my ( $cksum ) = $_ =~ /^CKSUM (\d+)/){
 | 
| 
 | 
   143       $objHash->{cksum} = $cksum ;
 | 
| 
 | 
   144     }elsif(/GA\s+(\S+)\s+(\S+)\;/){ 
 | 
| 
 | 
   145       $objHash->{seqGA} = $1;
 | 
| 
 | 
   146       $objHash->{domGA} = $2;
 | 
| 
 | 
   147     }elsif(/TC\s+(\S+)\s+(\S+)\;/){ 
 | 
| 
 | 
   148       $objHash->{seqTC} = $1;
 | 
| 
 | 
   149       $objHash->{domTC} = $2;
 | 
| 
 | 
   150     }elsif(/NC\s+(\S+)\s+(\S+)\;/){ 
 | 
| 
 | 
   151       $objHash->{seqNC} = $1;
 | 
| 
 | 
   152       $objHash->{domNC} = $2;
 | 
| 
 | 
   153     }elsif( my ($msv_mu, $msv_lambda ) = $_ =~ /^STATS LOCAL MSV\s+(\S+)\s+(0\.\d+)/){
 | 
| 
 | 
   154 	$objHash->{msvStats} = { mu => $msv_mu, lambda => $msv_lambda};
 | 
| 
 | 
   155     }elsif( my ($viterbi_mu, $viterbi_lambda ) = $_ =~ /^STATS LOCAL VITERBI\s+(\S+)\s+(0\.\d+)/){
 | 
| 
 | 
   156 	$objHash->{viterbiStats} = { mu => $viterbi_mu, lambda => $viterbi_lambda };
 | 
| 
 | 
   157     }elsif( my ($forward_tau, $forward_lambda ) = $_ =~ /^STATS LOCAL FORWARD\s+(\S+)\s+(0\.\d+)/){
 | 
| 
 | 
   158 	$objHash->{forwardStats} = {tau => $forward_tau, lambda => $forward_lambda};
 | 
| 
 | 
   159     }elsif( $_ =~ /^HMM\s+A/){
 | 
| 
 | 
   160       last;
 | 
| 
 | 
   161     }else{
 | 
| 
 | 
   162       confess("Got a bad HMM header line:$input[$i]\n"); 
 | 
| 
 | 
   163     }
 | 
| 
 | 
   164     $i++;
 | 
| 
 | 
   165   }
 | 
| 
 | 
   166 
 | 
| 
 | 
   167   my $hmmObj = Bio::Pfam::HMM::HMM->new($objHash);
 | 
| 
 | 
   168   
 | 
| 
 | 
   169   
 | 
| 
 | 
   170   #The next two lines are stand lines
 | 
| 
 | 
   171   #HMM          A        C        D        E        F        G        H        I        K        L        M        N        P        Q        R        S        T        V        W        Y   
 | 
| 
 | 
   172   #          m->m     m->i     m->d     i->m     i->i     d->m     d->d
 | 
| 
 | 
   173   $i++;
 | 
| 
 | 
   174 
 | 
| 
 | 
   175 
 | 
| 
 | 
   176   #Add the comp line
 | 
| 
 | 
   177   for( my $line = 0; $line <=2; $line++){
 | 
| 
 | 
   178     $i++;
 | 
| 
 | 
   179     my @l = split(/\s+/, $input[$i]);
 | 
| 
 | 
   180     my @c;
 | 
| 
 | 
   181     if($line == 0 ){
 | 
| 
 | 
   182       @c = @l[2..21];
 | 
| 
 | 
   183     }elsif( $line == 1){
 | 
| 
 | 
   184       @c = @l[1..20]; 
 | 
| 
 | 
   185     }elsif($line == 2){
 | 
| 
 | 
   186       @c = @l[1..7];  
 | 
| 
 | 
   187     }
 | 
| 
 | 
   188     $hmmObj->compLines->[$line] = \@c;
 | 
| 
 | 
   189   }
 | 
| 
 | 
   190   
 | 
| 
 | 
   191 
 | 
| 
 | 
   192   
 | 
| 
 | 
   193   for(my $pos = 0; $pos < $hmmObj->length; $pos++){
 | 
| 
 | 
   194     #There are three lines per position - match emission line, insert emission line, state transition line
 | 
| 
 | 
   195     for( my $line = 0; $line <=2; $line++){
 | 
| 
 | 
   196       $i++;
 | 
| 
 | 
   197       my @l = split(/\s+/, $input[$i]);
 | 
| 
 | 
   198       my @e;
 | 
| 
 | 
   199       if($line == 0 ){
 | 
| 
 | 
   200         @e = @l[2..21];
 | 
| 
 | 
   201         if($hmmObj->map){
 | 
| 
 | 
   202            $hmmObj->mapPos->[$pos] = $l[22];
 | 
| 
 | 
   203         } 
 | 
| 
 | 
   204       }elsif( $line == 1){
 | 
| 
 | 
   205         @e = @l[1..20]; 
 | 
| 
 | 
   206       }elsif($line == 2){
 | 
| 
 | 
   207         @e = @l[1..7];  
 | 
| 
 | 
   208       }
 | 
| 
 | 
   209       $hmmObj->emissionLines->[$pos]->[$line] = \@e;
 | 
| 
 | 
   210     }
 | 
| 
 | 
   211   }
 | 
| 
 | 
   212   
 | 
| 
 | 
   213   if($input[$i++] =~ /^\/\/$/){
 | 
| 
 | 
   214     confess("Expected file terminator //, but got $input[$i]\n"); 
 | 
| 
 | 
   215   }
 | 
| 
 | 
   216  
 | 
| 
 | 
   217   #No veryifiy that we have COMP line and the the number of emissionlines is equivalent to length 
 | 
| 
 | 
   218   unless(scalar( @{ $hmmObj->emissionLines } ) == $hmmObj->length){
 | 
| 
 | 
   219     confess("Number of emssionLines does not match the length of the model, got ".scalar( @{ $hmmObj->emissionLines} ).
 | 
| 
 | 
   220         " expected ".$hmmObj->length);  
 | 
| 
 | 
   221   }
 | 
| 
 | 
   222   
 | 
| 
 | 
   223   unless($hmmObj->compLines){
 | 
| 
 | 
   224     confess("No compLine set on HMM"); 
 | 
| 
 | 
   225   }
 | 
| 
 | 
   226   
 | 
| 
 | 
   227   if($hmmObj->map){
 | 
| 
 | 
   228     unless(scalar(@{$hmmObj->mapPos}) == $hmmObj->length ){
 | 
| 
 | 
   229       confess("HMM object had map set, but the number of map positions does not match the length of the HMM");  
 | 
| 
 | 
   230     }; 
 | 
| 
 | 
   231   }
 | 
| 
 | 
   232   return $hmmObj;  
 | 
| 
 | 
   233 }
 | 
| 
 | 
   234 
 | 
| 
 | 
   235 
 | 
| 
 | 
   236 
 | 
| 
 | 
   237 sub writeHMM {
 | 
| 
 | 
   238   my ($this, $hmm, $hmmObj) = @_;
 | 
| 
 | 
   239   
 | 
| 
 | 
   240   unless($hmm){
 | 
| 
 | 
   241     confess("No HMM out file passed in!"); 
 | 
| 
 | 
   242   }
 | 
| 
 | 
   243   
 | 
| 
 | 
   244   unless(ref($hmm) eq 'GLOB'){
 | 
| 
 | 
   245     my $hmmFile = $hmm;
 | 
| 
 | 
   246     $hmm = undef;
 | 
| 
 | 
   247     #Assume that we have a filename and try and open it;
 | 
| 
 | 
   248     open($hmm, ">$hmmFile") || confess("Could not open $hmmFile:[$!]");
 | 
| 
 | 
   249   }
 | 
| 
 | 
   250 
 | 
| 
 | 
   251   print  $hmm $hmmObj->version."\n";
 | 
| 
 | 
   252   printf $hmm ("%-5s %s\n", "NAME", $hmmObj->name);
 | 
| 
 | 
   253   printf $hmm ("%-5s %s\n", "ACC",  $hmmObj->accession) if($hmmObj->accession);
 | 
| 
 | 
   254   printf $hmm ("%-5s %s\n", "DESC", $hmmObj->description) if($hmmObj->description);
 | 
| 
 | 
   255   printf $hmm ("%-5s %d\n", "LENG", $hmmObj->length);
 | 
| 
 | 
   256   printf $hmm ("%-5s %s\n", "ALPH", $hmmObj->alpha);
 | 
| 
 | 
   257   printf $hmm ("%-5s %s\n", "RF", ($hmmObj->rf ? "yes" : "no"));
 | 
| 
 | 
   258   printf $hmm ("%-5s %s\n", "MM", ($hmmObj->mm ? "yes" : "no"));
 | 
| 
 | 
   259   printf $hmm ("%-5s %s\n", "CONS", ($hmmObj->cons ? "yes" : "no"));
 | 
| 
 | 
   260   printf $hmm ("%-5s %s\n", "CS", ($hmmObj->cs ? "yes" : "no"));
 | 
| 
 | 
   261   printf $hmm ("%-5s %s\n", "MAP", ($hmmObj->map ? "yes" : "no"));
 | 
| 
 | 
   262   printf $hmm ("%-5s %s\n", "DATE", $hmmObj->date);
 | 
| 
 | 
   263   printf $hmm ("%-5s %d\n", "NSEQ", $hmmObj->nSeq);
 | 
| 
 | 
   264   printf $hmm ("%-5s %f\n", "EFFN", $hmmObj->effn);
 | 
| 
 | 
   265   printf $hmm ("%-5s %d\n", "CKSUM", $hmmObj->cksum);
 | 
| 
 | 
   266   printf $hmm ("%-5s %.2f %.2f;\n", "GA", $hmmObj->seqGA, $hmmObj->domGA) if(defined($hmmObj->seqGA));
 | 
| 
 | 
   267   printf $hmm ("%-5s %.2f %.2f;\n", "TC", $hmmObj->seqTC, $hmmObj->domTC) if(defined($hmmObj->seqTC));
 | 
| 
 | 
   268   printf $hmm ("%-5s %.2f %.2f;\n", "NC", $hmmObj->seqNC, $hmmObj->domNC) if(defined($hmmObj->seqNC));
 | 
| 
 | 
   269   
 | 
| 
 | 
   270   printf $hmm ("%-5s %s %-9s %.4f  %.5f\n", "STATS", "LOCAL", "MSV", $hmmObj->msvStats->{mu},  $hmmObj->msvStats->{lambda});
 | 
| 
 | 
   271   printf $hmm ("%-5s %s %-9s %.4f  %.5f\n", "STATS", "LOCAL", "VITERBI", $hmmObj->viterbiStats->{mu},  $hmmObj->viterbiStats->{lambda});
 | 
| 
 | 
   272   printf $hmm ("%-5s %s %-9s %.4f  %.5f\n", "STATS", "LOCAL", "FORWARD", $hmmObj->forwardStats->{tau},  $hmmObj->forwardStats->{lambda});
 | 
| 
 | 
   273   
 | 
| 
 | 
   274   print $hmm <<EOF;
 | 
| 
 | 
   275 HMM          A        C        D        E        F        G        H        I        K        L        M        N        P        Q        R        S        T        V        W        Y   
 | 
| 
 | 
   276             m->m     m->i     m->d     i->m     i->i     d->m     d->d
 | 
| 
 | 
   277 EOF
 | 
| 
 | 
   278   
 | 
| 
 | 
   279   printf $hmm ("%7s ", "COMPO");
 | 
| 
 | 
   280    foreach my $s (@{$hmmObj->compLines->[0]}){
 | 
| 
 | 
   281        printf $hmm ("  %7s", $s);
 | 
| 
 | 
   282     }
 | 
| 
 | 
   283     print $hmm "\n";
 | 
| 
 | 
   284      
 | 
| 
 | 
   285     print $hmm (" " x 8);
 | 
| 
 | 
   286     foreach my $s (@{$hmmObj->compLines->[1]}){
 | 
| 
 | 
   287        printf $hmm ("  %7s", $s);
 | 
| 
 | 
   288     }
 | 
| 
 | 
   289     print $hmm "\n";
 | 
| 
 | 
   290     
 | 
| 
 | 
   291     print $hmm (" " x 8);
 | 
| 
 | 
   292     foreach my $s (@{$hmmObj->compLines->[2]}){
 | 
| 
 | 
   293        printf $hmm ("  %7s", $s);
 | 
| 
 | 
   294     }
 | 
| 
 | 
   295     print $hmm "\n";
 | 
| 
 | 
   296     
 | 
| 
 | 
   297   
 | 
| 
 | 
   298   my $pos = 1;
 | 
| 
 | 
   299   foreach my $el (@{ $hmmObj->emissionLines }){
 | 
| 
 | 
   300     printf $hmm ("%7s ", $pos);
 | 
| 
 | 
   301     foreach my $s (@{$el->[0]}){
 | 
| 
 | 
   302        printf $hmm ("  %7s", $s);
 | 
| 
 | 
   303     }
 | 
| 
 | 
   304     if($hmmObj->map){
 | 
| 
 | 
   305       printf $hmm ("%7s - -\n", $hmmObj->mapPos->[$pos - 1]);  
 | 
| 
 | 
   306     }else{
 | 
| 
 | 
   307       print $hmm "\n";
 | 
| 
 | 
   308     } 
 | 
| 
 | 
   309     print $hmm (" " x 8);
 | 
| 
 | 
   310     foreach my $s (@{$el->[1]}){
 | 
| 
 | 
   311        printf $hmm ("  %7s", $s);
 | 
| 
 | 
   312     }
 | 
| 
 | 
   313     print $hmm "\n";
 | 
| 
 | 
   314     print $hmm (" " x 8);
 | 
| 
 | 
   315     foreach my $s (@{$el->[2]}){
 | 
| 
 | 
   316        printf $hmm ("  %7s", $s);
 | 
| 
 | 
   317     }
 | 
| 
 | 
   318     print $hmm "\n"; 
 | 
| 
 | 
   319     $pos++; 
 | 
| 
 | 
   320   }
 | 
| 
 | 
   321   
 | 
| 
 | 
   322 
 | 
| 
 | 
   323   print $hmm "//\n";
 | 
| 
 | 
   324 }
 | 
| 
 | 
   325 
 | 
| 
 | 
   326 
 | 
| 
 | 
   327 
 | 
| 
 | 
   328 1;
 | 
| 
 | 
   329 
 |