diff pfamScan/Bio/Pfam/HMM/HMMIO.pm @ 0:68a3648c7d91 draft default tip

Uploaded
author matteoc
date Thu, 22 Dec 2016 04:45:31 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pfamScan/Bio/Pfam/HMM/HMMIO.pm	Thu Dec 22 04:45:31 2016 -0500
@@ -0,0 +1,329 @@
+# HMM.pm
+#
+# Author:        finnr
+# Maintainer:    $Id: HMMIO.pm,v 1.3 2010-01-12 17:00:26 jm14 Exp $
+# Version:       $Revision: 1.3 $
+# Created:       Nov 24, 2008
+# Last Modified: $Date: 2010-01-12 17:00:26 $
+=head1 NAME
+
+Template - a short description of the class
+
+=cut
+
+package Bio::Pfam::HMM::HMMIO;
+
+=head1 DESCRIPTION
+
+A more detailed description of what this class does and how it does it.
+
+$Id: HMMIO.pm,v 1.3 2010-01-12 17:00:26 jm14 Exp $
+
+=head1 COPYRIGHT
+
+File: HMM.pm
+
+Copyright (c) 2007: Genome Research Ltd.
+
+Authors: Rob Finn (rdf@sanger.ac.uk), John Tate (jt6@sanger.ac.uk)
+
+ This is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
+ 
+=cut
+
+use strict;
+use warnings;
+
+use Moose;
+use Moose::Util::TypeConstraints;
+use Carp;
+use Bio::Pfam::HMM::HMM;
+
+#-------------------------------------------------------------------------------
+
+=head1 METHODS
+
+=cut
+sub readHMM {
+  my ($this, $hmm) = @_;
+  
+  unless($hmm){
+    confess("No HMM passed in!"); 
+  }
+  chomp($hmm);
+  
+  my @input;
+  if(ref($hmm) eq 'GLOB'){
+    @input = <$hmm>;
+  }elsif($hmm !~ /\n/ and -e $hmm and -s $hmm){
+    #Assume that we have a filename and try and open it;
+    open(HMM, $hmm) || confess("Could not open $hmm:[$!]");
+    @input = <HMM>;
+  }else{
+    @input = split(/\n/, $hmm); 
+  }
+  
+  
+  
+  #Parse the header section!  
+  #HMMER3/f [3.1b1 | May 2013]
+  #NAME  SEED
+  #ACC   PF000001.1
+  #DESC  A description
+  #LENG  55
+  #ALPH  amino
+  #RF    no
+  #MM    no
+  #CONS  yes
+  #CS    no
+  #MAP   yes
+  #DATE  Fri Nov 21 09:58:16 2008
+  #COM   [1] /Users/finnr/Work/Software/hmmer-3.0.20081101/bin/hmmbuild -o hmmbuild.log HMM SEED
+  #NSEQ  279
+  #EFFN  4.966292
+  #STATS LOCAL MSV      -11.4716  0.69948
+  #STATS LOCAL VITERBI  -12.3713  0.69948
+  #STATS LOCAL FORWARD   -5.5807  0.69948
+
+  #To add GA, TC, NC, CKSUM, DESC
+  my($objHash);
+  my $i =0; 
+  foreach ( @input ){  
+    if(my ($version) = $_ =~ /(HMMER3.*)/){
+      $objHash->{version} = $version;
+    }elsif(my ($acc) = $_ =~ /^ACC\s+(PF\d+\.\d+)$/){
+      $objHash->{accession} = $acc;
+    }elsif(/NAME\s+(\S+)/){ 
+      $objHash->{name} =  $1 ;
+    }elsif(/DESC\s+(.*)/){ 
+      $objHash->{description} =   $1 ;
+    }elsif(my ($length) = $_ =~ /^LENG\s+(\d+)/){
+      $objHash->{length} = $length;
+    }elsif( my ($alpha) = $_ =~ /^ALPH\s+(\S+)/){
+      $objHash->{alpha} = $alpha;
+    }elsif( my ($rf) = $_ =~ /^RF\s+(no|yes)/){
+      $objHash->{rf} = ($rf eq "no") ? 0 : 1; 
+    }elsif( my ($mm) = $_ =~ /^MM\s+(no|yes)/){
+      $objHash->{mm} = ($mm eq "no") ? 0 : 1; 
+    }elsif( my ($cons) = $_ =~ /^CONS\s+(no|yes)/){
+      $objHash->{cons} = ($cons eq "no") ? 0 : 1; 
+    }elsif(my ($cs) = $_ =~ /^CS\s+(no|yes)/ ){
+      $objHash->{cs} =  ($cs eq "no") ? 0 : 1; 
+    }elsif(my ($map) = $_ =~ /^MAP\s+(no|yes)/){
+      $objHash->{map} = ($map eq "no") ? 0 : 1; 
+    }elsif(my ($date) = $_ =~ /^DATE\s+(.*)/){
+      $objHash->{date} =  $date; 
+    }elsif(my ($sm) = $_ =~ /^SM\s+(.*)/){
+      $objHash->{searchMethod} =  $sm; 
+    
+    }elsif(my ($options, $hmmName, $alignName) = $input[$i] =~ /^BM.*hmmbuild(.*)? (\S+) (\S+)$/){
+      $objHash->{buildLine} = { cmd     => 'hmmbuild', 
+                                options => $options,
+                                name    => $hmmName,
+                                align   => $alignName } ;
+    }elsif(my($noSeqs) = $_ =~ /^NSEQ\s+(\d+)/){
+      $objHash->{nSeq} = $noSeqs;
+    }elsif( my($effn) = $_ =~ /^EFFN\s+(\d+\.\d+)/){
+       #EFFN  4.966292
+      $objHash->{effn} =  $effn ;
+    }elsif( my ( $cksum ) = $_ =~ /^CKSUM (\d+)/){
+      $objHash->{cksum} = $cksum ;
+    }elsif(/GA\s+(\S+)\s+(\S+)\;/){ 
+      $objHash->{seqGA} = $1;
+      $objHash->{domGA} = $2;
+    }elsif(/TC\s+(\S+)\s+(\S+)\;/){ 
+      $objHash->{seqTC} = $1;
+      $objHash->{domTC} = $2;
+    }elsif(/NC\s+(\S+)\s+(\S+)\;/){ 
+      $objHash->{seqNC} = $1;
+      $objHash->{domNC} = $2;
+    }elsif( my ($msv_mu, $msv_lambda ) = $_ =~ /^STATS LOCAL MSV\s+(\S+)\s+(0\.\d+)/){
+	$objHash->{msvStats} = { mu => $msv_mu, lambda => $msv_lambda};
+    }elsif( my ($viterbi_mu, $viterbi_lambda ) = $_ =~ /^STATS LOCAL VITERBI\s+(\S+)\s+(0\.\d+)/){
+	$objHash->{viterbiStats} = { mu => $viterbi_mu, lambda => $viterbi_lambda };
+    }elsif( my ($forward_tau, $forward_lambda ) = $_ =~ /^STATS LOCAL FORWARD\s+(\S+)\s+(0\.\d+)/){
+	$objHash->{forwardStats} = {tau => $forward_tau, lambda => $forward_lambda};
+    }elsif( $_ =~ /^HMM\s+A/){
+      last;
+    }else{
+      confess("Got a bad HMM header line:$input[$i]\n"); 
+    }
+    $i++;
+  }
+
+  my $hmmObj = Bio::Pfam::HMM::HMM->new($objHash);
+  
+  
+  #The next two lines are stand lines
+  #HMM          A        C        D        E        F        G        H        I        K        L        M        N        P        Q        R        S        T        V        W        Y   
+  #          m->m     m->i     m->d     i->m     i->i     d->m     d->d
+  $i++;
+
+
+  #Add the comp line
+  for( my $line = 0; $line <=2; $line++){
+    $i++;
+    my @l = split(/\s+/, $input[$i]);
+    my @c;
+    if($line == 0 ){
+      @c = @l[2..21];
+    }elsif( $line == 1){
+      @c = @l[1..20]; 
+    }elsif($line == 2){
+      @c = @l[1..7];  
+    }
+    $hmmObj->compLines->[$line] = \@c;
+  }
+  
+
+  
+  for(my $pos = 0; $pos < $hmmObj->length; $pos++){
+    #There are three lines per position - match emission line, insert emission line, state transition line
+    for( my $line = 0; $line <=2; $line++){
+      $i++;
+      my @l = split(/\s+/, $input[$i]);
+      my @e;
+      if($line == 0 ){
+        @e = @l[2..21];
+        if($hmmObj->map){
+           $hmmObj->mapPos->[$pos] = $l[22];
+        } 
+      }elsif( $line == 1){
+        @e = @l[1..20]; 
+      }elsif($line == 2){
+        @e = @l[1..7];  
+      }
+      $hmmObj->emissionLines->[$pos]->[$line] = \@e;
+    }
+  }
+  
+  if($input[$i++] =~ /^\/\/$/){
+    confess("Expected file terminator //, but got $input[$i]\n"); 
+  }
+ 
+  #No veryifiy that we have COMP line and the the number of emissionlines is equivalent to length 
+  unless(scalar( @{ $hmmObj->emissionLines } ) == $hmmObj->length){
+    confess("Number of emssionLines does not match the length of the model, got ".scalar( @{ $hmmObj->emissionLines} ).
+        " expected ".$hmmObj->length);  
+  }
+  
+  unless($hmmObj->compLines){
+    confess("No compLine set on HMM"); 
+  }
+  
+  if($hmmObj->map){
+    unless(scalar(@{$hmmObj->mapPos}) == $hmmObj->length ){
+      confess("HMM object had map set, but the number of map positions does not match the length of the HMM");  
+    }; 
+  }
+  return $hmmObj;  
+}
+
+
+
+sub writeHMM {
+  my ($this, $hmm, $hmmObj) = @_;
+  
+  unless($hmm){
+    confess("No HMM out file passed in!"); 
+  }
+  
+  unless(ref($hmm) eq 'GLOB'){
+    my $hmmFile = $hmm;
+    $hmm = undef;
+    #Assume that we have a filename and try and open it;
+    open($hmm, ">$hmmFile") || confess("Could not open $hmmFile:[$!]");
+  }
+
+  print  $hmm $hmmObj->version."\n";
+  printf $hmm ("%-5s %s\n", "NAME", $hmmObj->name);
+  printf $hmm ("%-5s %s\n", "ACC",  $hmmObj->accession) if($hmmObj->accession);
+  printf $hmm ("%-5s %s\n", "DESC", $hmmObj->description) if($hmmObj->description);
+  printf $hmm ("%-5s %d\n", "LENG", $hmmObj->length);
+  printf $hmm ("%-5s %s\n", "ALPH", $hmmObj->alpha);
+  printf $hmm ("%-5s %s\n", "RF", ($hmmObj->rf ? "yes" : "no"));
+  printf $hmm ("%-5s %s\n", "MM", ($hmmObj->mm ? "yes" : "no"));
+  printf $hmm ("%-5s %s\n", "CONS", ($hmmObj->cons ? "yes" : "no"));
+  printf $hmm ("%-5s %s\n", "CS", ($hmmObj->cs ? "yes" : "no"));
+  printf $hmm ("%-5s %s\n", "MAP", ($hmmObj->map ? "yes" : "no"));
+  printf $hmm ("%-5s %s\n", "DATE", $hmmObj->date);
+  printf $hmm ("%-5s %d\n", "NSEQ", $hmmObj->nSeq);
+  printf $hmm ("%-5s %f\n", "EFFN", $hmmObj->effn);
+  printf $hmm ("%-5s %d\n", "CKSUM", $hmmObj->cksum);
+  printf $hmm ("%-5s %.2f %.2f;\n", "GA", $hmmObj->seqGA, $hmmObj->domGA) if(defined($hmmObj->seqGA));
+  printf $hmm ("%-5s %.2f %.2f;\n", "TC", $hmmObj->seqTC, $hmmObj->domTC) if(defined($hmmObj->seqTC));
+  printf $hmm ("%-5s %.2f %.2f;\n", "NC", $hmmObj->seqNC, $hmmObj->domNC) if(defined($hmmObj->seqNC));
+  
+  printf $hmm ("%-5s %s %-9s %.4f  %.5f\n", "STATS", "LOCAL", "MSV", $hmmObj->msvStats->{mu},  $hmmObj->msvStats->{lambda});
+  printf $hmm ("%-5s %s %-9s %.4f  %.5f\n", "STATS", "LOCAL", "VITERBI", $hmmObj->viterbiStats->{mu},  $hmmObj->viterbiStats->{lambda});
+  printf $hmm ("%-5s %s %-9s %.4f  %.5f\n", "STATS", "LOCAL", "FORWARD", $hmmObj->forwardStats->{tau},  $hmmObj->forwardStats->{lambda});
+  
+  print $hmm <<EOF;
+HMM          A        C        D        E        F        G        H        I        K        L        M        N        P        Q        R        S        T        V        W        Y   
+            m->m     m->i     m->d     i->m     i->i     d->m     d->d
+EOF
+  
+  printf $hmm ("%7s ", "COMPO");
+   foreach my $s (@{$hmmObj->compLines->[0]}){
+       printf $hmm ("  %7s", $s);
+    }
+    print $hmm "\n";
+     
+    print $hmm (" " x 8);
+    foreach my $s (@{$hmmObj->compLines->[1]}){
+       printf $hmm ("  %7s", $s);
+    }
+    print $hmm "\n";
+    
+    print $hmm (" " x 8);
+    foreach my $s (@{$hmmObj->compLines->[2]}){
+       printf $hmm ("  %7s", $s);
+    }
+    print $hmm "\n";
+    
+  
+  my $pos = 1;
+  foreach my $el (@{ $hmmObj->emissionLines }){
+    printf $hmm ("%7s ", $pos);
+    foreach my $s (@{$el->[0]}){
+       printf $hmm ("  %7s", $s);
+    }
+    if($hmmObj->map){
+      printf $hmm ("%7s - -\n", $hmmObj->mapPos->[$pos - 1]);  
+    }else{
+      print $hmm "\n";
+    } 
+    print $hmm (" " x 8);
+    foreach my $s (@{$el->[1]}){
+       printf $hmm ("  %7s", $s);
+    }
+    print $hmm "\n";
+    print $hmm (" " x 8);
+    foreach my $s (@{$el->[2]}){
+       printf $hmm ("  %7s", $s);
+    }
+    print $hmm "\n"; 
+    $pos++; 
+  }
+  
+
+  print $hmm "//\n";
+}
+
+
+
+1;
+