view pfamScan/Bio/Pfam/HMM/HMMResultsIO.pm @ 0:68a3648c7d91 draft default tip

Uploaded
author matteoc
date Thu, 22 Dec 2016 04:45:31 -0500
parents
children
line wrap: on
line source

# HMMResultsIO.pm
#
# Author:        rdf
# Maintainer:    $Id: HMMResultsIO.pm,v 1.2 2009-12-01 15:42:20 jt6 Exp $
# Version:       $Revision: 1.2 $
# Created:       Nov 16, 2008
# Last Modified: $Date: 2009-12-01 15:42:20 $

=head1 NAME

Template - a short description of the class

=cut

package Bio::Pfam::HMM::HMMResultsIO;

=head1 DESCRIPTION

A more detailed description of what this class does and how it does it.

$Id: HMMResultsIO.pm,v 1.2 2009-12-01 15:42:20 jt6 Exp $

=head1 COPYRIGHT

File: HMMResultsIO.pm

Copyright (c) 2007: Genome Research Ltd.

Authors: Rob Finn (rdf@sanger.ac.uk), John Tate (jt6@sanger.ac.uk)

 This is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.
 
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
 
=cut

use strict;
use warnings;
use Moose;
use Carp;

#All the things we need to objectfy the search results
use Bio::Pfam::HMM::HMMResults;
use Bio::Pfam::HMM::HMMSequence;
use Bio::Pfam::HMM::HMMUnit;

#-------------------------------------------------------------------------------

=head1 ATTRIBUTES



=cut

has 'align' => (
  isa     => 'Int',
  is      => 'rw',
  default => 0
);

has 'outfile' => (
  isa     => 'Str',
  is      => 'rw',
  default => 'OUTPUT'
);

has 'pfamout' => (
  isa     => 'Str',
  is      => 'rw',
  default => 'PFAMOUT'
);

has 'scores' => (
  isa     => 'Str',
  is      => 'rw',
  default => 'scores'
);

#-------------------------------------------------------------------------------

=head1 METHODS

=head2 parseHMMER3

  Title    : parseHMMER 
  Usage    : $hmmResIO->parseHMMSearch( filename )
  Function : Parse the output from a HMMER3 search results 
  Args     : Filename containing the search 
  Returns  : A Bio::Pfam::HMM::HMMResults object
  
=cut

sub parseHMMER3 {
  my ( $self, $filename ) = @_;
  my $fh;
 
  if(ref($filename) eq 'GLOB'){
    $fh = $filename;
  }else{
    open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
  }
  
#  open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
  my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
  $self->_readHeader( $fh, $hmmRes );
  $self->_readSeqHits( $fh, $hmmRes );
  $self->_readUnitHits( $fh, $hmmRes );
  $self->_readFooter($fh, $hmmRes);
  return ($hmmRes);
}



sub parseMultiHMMER3 {
  my ( $self, $filename ) = @_;
  my $fh;
  
  if(ref($filename) eq 'GLOB'){
    $fh = $filename;
  }elsif( ref($filename) and $filename->isa('IO::File') ) {
      $fh = $filename;
  }else{
    open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
  }  
  
  my @hmmResAll;
  my $program;
  while(!eof($fh)){
    my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
    my $eof = $self->_readHeader( $fh, $hmmRes ); 
    last if($eof);
    push(@hmmResAll, $hmmRes);
    if($hmmRes->program) {
	$program = $hmmRes->program;
    }
    else {
	$hmmRes->program($program);
    }
    $self->_readSeqHits( $fh, $hmmRes );
    $self->_readUnitHits( $fh, $hmmRes );
    $self->_readFooter($fh, $hmmRes);
  }
  return (\@hmmResAll);
}

sub parseSplitHMMER3 {
  my($self, $files ) = @_;
  
  my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
  
  foreach my $filename (@{$files}){
    my ($fh);
    open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
    $self->_readHeader( $fh, $hmmRes );
    $self->_readSeqHits( $fh, $hmmRes );
    $self->_readUnitHits( $fh, $hmmRes );
    $self->_readFooter($fh, $hmmRes);
  }
  
  return ( $hmmRes );
    
}


#-------------------------------------------------------------------------------

=head2 convertHMMSearch 

  Title    : convertHMMSearch
  Usage    : $hmmResIO->convertHMMSearch('SEARCHFILE') 
  Function : This wraps up a couple of methods to convert the more complex hmmsearch 
           : results in to nice clean format that we Pfam-ers are used to. 
  Args     : The filename of the hmmsearch output file
  Returns  : Nothing
  
=cut

sub convertHMMSearch {
  my ( $self, $filename ) = @_;

  unless ($filename) {
    confess "No filename passed in to convertHMMSearch\n";
  }
  unless ( -s $filename ) {
    confess "$filename does not exists\n";
  }

  #Now parse in the raw HMM output and write out the results as a PFAMOUT
  my $hmmRes = $self->parseHMMER3($filename);
  $self->writePFAMOUT($hmmRes);
  return $hmmRes;
}

#-------------------------------------------------------------------------------

=head2 writePFAMOUT 

  Title    : writePFAMOUT
  Usage    : $hmmResIO->writePFAMOUT( $hmmRes ) 
  Function : Writes a Bio::Pfam::HMM:HMMResults object in to a PFAMOUT file. 
  Args     : A Bio::Pfam::HMM:HMMResults
  Returns  : Nothing
  
=cut

sub writePFAMOUT {
  my ( $self, $hmmRes ) = @_;

  unless ($hmmRes) {
    confess "A Bio::Pfam::HMM::HMMResults object was not parsed in\n";
  }
  unless ( $hmmRes->isa("Bio::Pfam::HMM::HMMResults") ) {
    confess("Variable passed in is not a Bio::Pfam::HMM::Results object");
  }

  my $fh;
  open( $fh, ">" . $self->pfamout )
    or confess "Could not open " . $self->pfamout . ":[$!]\n";

  print $fh <<HEAD;
# ===========
# Pfam output
# ===========
#
# Sequence scores
# ---------------
#
# name           description                                   bits      evalue   n   exp  bias  
 
HEAD

  foreach
    my $seq ( sort { $b->bits <=> $a->bits } ( @{ $hmmRes->eachHMMSeq } ) )
  {
    $_ = $seq->desc;
    my ($desc) = /^(.{1,42})/;
    $desc = uc($desc);
    printf $fh (
      "%-15s  %-42s %8.1f  %9s %3d %5.1f %5.1f\n",
      $seq->name,
      $desc,
      $seq->bits,
      $seq->evalue,
      scalar( @{ $seq->hmmUnits } ),
      defined( $seq->exp )  ? $seq->exp  : "-",
      defined( $seq->bias ) ? $seq->bias : "-"
    );
  }

  print $fh <<HEAD;
#
# Domain scores
# -------------
#
# name           env-st  env-en  ali-st  ali-en  hmm-st  hmm-en   bits     evalue    hit   bias
#

HEAD

  foreach my $dom ( sort { $b->bits <=> $a->bits } @{ $hmmRes->units } ) {
    
    
    printf $fh (
      "%-15s  %6d  %6d  %6d  %6d  %6s  %6s %6.1f  %9s %6d %6.1f\n",
      $dom->name,
      $dom->envFrom,
      $dom->envTo,
      $dom->seqFrom,
      $dom->seqTo,
      $dom->hmmFrom,
      $dom->hmmTo,
      $dom->bits,
      $dom->evalue,
      $dom->domain,
      defined( $dom->bias ) ? $dom->bias : "-",

    );
  }
}

#-------------------------------------------------------------------------------

=head2 parsePFAMOUT 

  Title    : parsePFAMOUT
  Usage    : $self->parsePFAMOUT($filename)
  Function : Reads in a PFAMOUT file.  This file contains the minimal amount of information
           : require to constrcut a pfam ALIGN file.   
  Args     : A filename. Normally this is filename
  Returns  : A Bio::Pfam::HMM::HMMResults object
  
=cut

sub parsePFAMOUT {
  my $self     = shift;
  my $filename = shift;

  unless ($filename) {
    confess('No filename or filehandle passed to parsePFAMOUT');
  }

  my $fh;
  if ( ref($filename) eq 'GLOB' ) {
    $fh = $filename;
  }
  else {
    open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
  }
  my $hmmRes = Bio::Pfam::HMM::HMMResults->new;

  while (<$fh>) {
    /^# Domain scores/ && last;

    #if (/^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\d+)\s*$/) {
    if (/^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\d+)\s+\S+\s+(\S+)\s*$/) {
      
      $hmmRes->addHMMSeq(
        Bio::Pfam::HMM::HMMSequence->new(
          {
            name       => $1,
            desc       => $2,
            bits       => $3,
            evalue     => $4,
            numberHits => $5,
	    bias       => $6
          }
        )
      );
    }
    elsif (/^#|^\s+$/) {
      next;
    }
    else {
      warn "Did not parse|$_|\n";
    }
  }
  while (<$fh>) {

    #if (/^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s*$/) {
    if (
      /^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+\S+\s+(\S+)/)
    {
      $hmmRes->addHMMUnit(
        Bio::Pfam::HMM::HMMUnit->new(
          {
            name     => $1,
            envFrom  => $2,
            envTo    => $3,
            seqFrom  => $4,
            seqTo    => $5,
            hmmFrom  => $6,
            hmmTo    => $7,
            bits     => $8,
            evalue   => $9,
	    bias     => $10
          }
        )
      );
    }
    elsif (/^#|^\s+$/) {
      next;
    }
    elsif (/^$/) {
      next;
    }
    else {
      warn "Did not parse: |$_|";
    }
  }
  close($fh);
  return ($hmmRes);
}

#-------------------------------------------------------------------------------

=head2 _readHeader 

  Title    : _readHeader
  Usage    : Private method.  $self->_readHeader(\*FH, $hmmResults)
  Function : Reads the header section from a HMMER3 hmmsearch   
  Args     : The file handle to hmmsearch output, a Bio::Pfam::HMM::HMMResults object
  Returns  : Nothing
  
=cut

#Parse the header part of the output first;
sub _readHeader {
  my ( $self, $hs, $hmmRes ) = @_;

  #Check the $hs is defined and a GLOB

  while (<$hs>) {
    if (/^Scores for complete/) {
      last;
    }
    elsif (/^# query HMM file:\s+(\S+)/) {
      $hmmRes->hmmName($1);
    }
    elsif (/^# target sequence database:\s+(\S+)/) {
      $hmmRes->seqDB($1);
    }
    elsif (/^output directed to file:\s+(\S+)/) {
      $hmmRes->thisFile($1);
    }
    elsif (/^Query:\s+(\S+)\s+\[M\=(\d+)\]/) {
      $hmmRes->seedName($1);
      $hmmRes->hmmLength($2);
    }elsif(/^Query:\s+(\S+)\s+\[L\=(\d+)\]/) {
	$hmmRes->seqName($1); 
      $hmmRes->seqLength($2);
    }elsif (/^sequence E-value threshold: <= (\d+)/) {
      $hmmRes->evalueThr($1);
    }
    elsif (/^# Random generator seed:      (\d+)/) {
      $hmmRes->randSeedNum($1);
    }elsif(/^Description:\s+(.*)/){
      $hmmRes->description($1);  
    }elsif(/^# (phmmer|hmmsearch|hmmscan|jackhmmer)/){
      $hmmRes->program($1);
    }elsif (/(^#)|(^$)/) {
      next;
    }elsif(/^Accession/){
      next; 
    } elsif(/^\[ok\]/) {
      return(1);
    } else {
      die "Failed to parse hmmsearch results |$_| in header section\n";
    }
  }
}

#-------------------------------------------------------------------------------

=head2 _readSeqHits 

  Title    : _readSeqHits
  Usage    : Private method.  $self->_readSeqHits(\*FH, $hmmResults)
  Function : Reads the sequence hits from a HMMER3 hmmsearch   
  Args     : The file handle to hmmsearch output, a Bio::Pfam::HMM::HMMResults object
  Returns  : Nothing
  
=cut

sub _readSeqHits {
  my ( $self, $hs, $hmmRes ) = @_;
  while (<$hs>) {

#Match a line like this
# E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
#    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------
#      4e-83  285.8  10.0    5.3e-83  285.5   7.0    1.1  1  Q14SN3.1 Q14SN3_9HEPC Polyprotein (Fragment).
    if (/^Domain annotation for each [sequence|model]/) { # This is the format for HMMER3b3
      last;
    }
    elsif (/^Domain and alignment annotation for each [sequence|model]/) {  #This is the format for HMMER3b2 - can be removed later
	last;
    }
    elsif (/^\s+(E-value|---)/) {
      next;
    }
    elsif (/^$/) {
      next;
    }
    else {
      next if(/No hits detected that satisfy reporting thresholds/);

      #Assume that we have a sequence match
      my @sMatch = split( /\s+/, $_ );
      unless ( scalar(@sMatch) >= 10 ) {
        die "Expected at least 10 pieces of data: $_;\n";
      }
      my $desc;
      if ( scalar(@sMatch) >= 11 ) {
        $desc = join( " ", @sMatch[ 10 .. $#sMatch ] );
      }

      $hmmRes->addHMMSeq(
        Bio::Pfam::HMM::HMMSequence->new(
          {
            evalue     => $sMatch[1],
            bits       => $sMatch[2],
            bias       => $sMatch[3],
            exp        => $sMatch[7],
            numberHits => $sMatch[8],
            name       => $sMatch[9],
            desc       => defined($desc) ? $desc : "-",
          }
        )
      );
      
      next;
    }
    die "Failed to parse $_ in sequence section\n";
  }

}

#------------------------------------------------------------------------------

=head2 _readUnitHits 

  Title    : _readUnitHits
  Usage    : Private method.  $self->_readUnitHits(\*FH, $hmmResults)
  Function : Reads the unit (domain) hits from a HMMER3 hmmsearch   
  Args     : The file handle to hmmsearch output, a Bio::Pfam::HMM::HMMResults object
  Returns  : Nothing
  
=cut

no warnings 'recursion';

sub _readUnitHits {
  my ( $self, $hs, $hmmRes ) = @_;
  
  if($hmmRes->eof){
    return; 
  }

#Parse the domain hits section
#>> P37935.1  MAAY4_SCHCO Mating-type protein A-alpha Y4.
#     # bit score    bias    E-value ind Evalue hmm from   hmm to    ali from   ali to    env from   env to    ali-acc
#   --- --------- ------- ---------- ---------- -------- --------    -------- --------    -------- --------    -------
#     1     244.0     0.5    9.5e-76    1.7e-70        1      146 [.        1      145 [.        1      146 [.    0.99
#
#  Alignments for each domain:
#  == domain 1    score: 244.0 bits;  conditional E-value: 9.5e-76
#      SEED   1 medrlallkaisasakdlvalaasrGaksipspvkttavkfdplptPdldalrtrlkeaklPakaiksalsayekaCarWrsdleeafdktaksvsPanlhllealrirlyteqvekWlvqvlevaerWkaemekqrahiaatmgp 146
#               m+++la+l++isa+akd++ala+srGa+++ +p++tt+++fd+l++P+ld++rtrl+ea+lP+kaik++lsaye+aCarW++dleeafd+ta+s+sP+n+++l++lr+rly+eqv+kWl++vl+v+erWkaemekqrahi+atmgp
#  P37935.1   1 MAELLACLQSISAHAKDMMALARSRGATGS-RPTPTTLPHFDELLPPNLDFVRTRLQEARLPPKAIKGTLSAYESACARWKHDLEEAFDRTAHSISPHNFQRLAQLRTRLYVEQVQKWLYEVLQVPERWKAEMEKQRAHINATMGP 145
#               899***************************.******************************************************************************************************************8 PP

  while (<$hs>) {
    if (/^Internal/) {
      last;
    }
    elsif (/\>\>\s+(\S+)/) {
      my $seqId = $1;    
      $self->_readUnitData( $seqId, $hs, $hmmRes );
      if($hmmRes->eof){
        return; 
      }
    }
  }
}

sub _readUnitData {
  my ( $self, $id, $hs, $hmmRes ) = @_;
  
  if($hmmRes->eof){
    return; 
  }
  my $hmmName = $hmmRes->seedName();

  my $seqName = $hmmRes->seqName;

# bit score    bias    E-value ind Evalue hmm from   hmm to    ali from   ali to    env from   env to    ali-acc
#   --- --------- ------- ---------- ---------- -------- --------    -------- --------    -------- --------    -------
#     1     244.0     0.5    9.5e-76    1.7e-70        1      146 [.        1      145 [.        1      146 [.    0.99
#
#  Alignments for each domain:

  my @units;
  my $align   = 1;
  my $recurse = 0;
  my $eof = 0;
  my ($nextSeqId);
  while (<$hs>) {
    if (/^[(\/\/|Internal)]/ ) {
      $align   = 0;
      $recurse = 0;
      $eof = 1;
      last;
    }
    elsif (/^\>\>\s+(\S+)/) {
      $nextSeqId = $1;
      $align     = 0;
      $recurse   = 1;
      last;
    }
    elsif (/^\s+Alignments for each domain:/) {
      $align   = 1;
      $recurse = 0;
      last;
    }
    elsif (/^\s+(#\s+score|---)/){

      #Two human readable lines
      next;
    }
    elsif (/^$/) {

      #blank line
      next;
    }
    elsif (/^\s+\d+\s+/) {
      my @dMatch = split( /\s+/, $_ );
      unless ( scalar(@dMatch) == 17 ) {
        die "Expected 16 elements of data: $_\n";
      }

      push(
        @units,
        Bio::Pfam::HMM::HMMUnit->new(
          {
            name      => $id,
            domain    => $dMatch[1],
            bits      => $dMatch[3],
            bias      => $dMatch[4],
            domEvalue => $dMatch[5],
            evalue    => $dMatch[6],
            hmmFrom   => $dMatch[7],
            hmmTo     => $dMatch[8],
            seqFrom   => $dMatch[10],
            seqTo     => $dMatch[11],
            envFrom   => $dMatch[13],
            envTo     => $dMatch[14],
            aliAcc    => $dMatch[16]
          }
        )
      );
      
      next;
    }
    elsif(/^\s+\[No individual domains/) {
	$align=0;
	next;
    }
    else {
      confess("Did not parse line: $_");
    }
  }

#  == domain 1    score: 244.0 bits;  conditional E-value: 9.5e-76
#      SEED   1 medrlallkaisasakdlvalaasrGaksipspvkttavkfdplptPdldalrtrlkeaklPakaiksalsayekaCarWrsdleeafdktaksvsPanlhllealrirlyteqvekWlvqvlevaerWkaemekqrahiaatmgp 146
#               m+++la+l++isa+akd++ala+srGa+++ +p++tt+++fd+l++P+ld++rtrl+ea+lP+kaik++lsaye+aCarW++dleeafd+ta+s+sP+n+++l++lr+rly+eqv+kWl++vl+v+erWkaemekqrahi+atmgp
#  P37935.1   1 MAELLACLQSISAHAKDMMALARSRGATGS-RPTPTTLPHFDELLPPNLDFVRTRLQEARLPPKAIKGTLSAYESACARWKHDLEEAFDRTAHSISPHNFQRLAQLRTRLYVEQVQKWLYEVLQVPERWKAEMEKQRAHINATMGP 145
#               899***************************.******************************************************************************************************************8 PP
#
# OR....
#
#  == domain 1    score: 27.6 bits;  conditional E-value: 7.4e-10
#   PF00018  17 LsfkkGdvitvleksee.eWwkaelkdg.keGlvPsnYvep 55 
#               L++++Gd+++++++++e++Ww++++++++++G++P+n+v+p
#  P15498.4 617 LRLNPGDIVELTKAEAEqNWWEGRNTSTnEIGWFPCNRVKP 657
#               7899**********9999*******************9987 PP


  if ($align) {
    my ($pattern1, $pattern2);

    if($hmmName and $hmmRes->program eq 'hmmsearch'){
      $pattern1 = qr/^\s+$hmmName\s+\d+\s+(\S+)\s+\d+/;
      $id =~ s/(\W)/\\$1/g; # escape any non-word character
      # $id =~ s/\|/\\|/g;  #Escape '|', '[' and ']' characters
      # $id =~ s/\[/\\[/g;
      # $id =~ s/\]/\\]/g;
      $pattern2 = qr/^\s+$id\s+\d+\s+(\S+)\s+\d+/;
    }elsif($seqName and $hmmRes->program eq 'hmmscan'){
      my $tmpSeqName = $seqName;
      $tmpSeqName =~ s/(\W)/\\$1/g; # escape any non-word character
      # $tmpSeqName =~ s/\|/\\|/g; #Escape '|', '[' and ']' characters
      # $tmpSeqName =~ s/\[/\\[/g;  
      # $tmpSeqName =~ s/\]/\\]/g;
      $pattern1 = qr/^\s+$id\s+\d+\s+(\S+)\s+\d+/;
      $pattern2 = qr/^\s+$tmpSeqName\s+\d+\s+(\S+)\s+\d+/;
    }elsif($seqName and ($hmmRes->program eq 'phmmer' or $hmmRes->program eq 'jackhmmer') ){
      $seqName =~ s/(\W)/\\$1/g; # escape any non-word character
      # $seqName =~ s/\|/\|/g; #Escape '|', '[' and ']' characters
      # $seqName =~ s/\[/\\[/g;
      # $seqName =~ s/\]/\\]/g;
      $pattern1 = qr/^\s+$seqName\s+\d+\s+(\S+)\s+\d+/;
      $pattern2 = qr/^\s+$id\s+\d+\s+(\S+)\s+\d+/;
    }
  

    $recurse = 0;
    my $matchNo;
    my $hmmlen = 0;
    while (<$hs>) {
      if (/$pattern1/) {
        $units[ $matchNo - 1 ]->hmmalign->{hmm} .= $1;
        $hmmlen = length($1);
      }
      elsif (/$pattern2/) {
        $units[ $matchNo - 1 ]->hmmalign->{seq} .= $1;
      }
      elsif (/^\s+([x\.]+)\s+RF$/) {
        my $rf = $1;
        $units[ $matchNo - 1 ]->hmmalign->{rf} .= $rf;
      }
      elsif (/^\s+([0-9\*\.]+)\s+PP$/) {
        my $pp = $1;
        $units[ $matchNo - 1 ]->hmmalign->{pp} .= $pp;
      }elsif (/^\s+(\S+)\s+CS$/) {
        my $cs = $1;
        $units[ $matchNo - 1 ]->hmmalign->{cs} .= $cs;
      }elsif (/^\s+==\s+domain\s+(\d+)/) {
        $matchNo = $1;
      }
      elsif (/^\s+(.*)\s+$/) {
      	# $1 is *not* the match - this fails if there are prepended
      	# or appended spaces
        # $units[ $matchNo - 1 ]->hmmalign->{match} .= $1;
        # Let's get a right substring based on the HMM length
        chomp;
        my $m1 = substr($_,-$hmmlen);
        $units[ $matchNo - 1 ]->hmmalign->{match} .= $m1;
      }elsif (/^$/) {
        next;
      }
      elsif (/^[(\/\/|Internal)]/) {
        $align   = 0;
        $recurse = 0;
        $eof = 1;
        last;
      }
      elsif (/^\>\>\s+(\S+)/) {
        $nextSeqId = $1;
        $recurse   = 1;
        last;
      }
     
      else {
        confess("Did not parse |$_| in units");
      }
    }
  }

  foreach my $u (@units) {
    $hmmRes->addHMMUnit($u);
  }
      
  $hmmRes->eof($eof);

  if ($recurse and $nextSeqId) {
    $self->_readUnitData( $nextSeqId, $hs, $hmmRes );
  }
  return;
}
use warnings 'recursion';

#-------------------------------------------------------------------------------

=head2 parseHMMER2 

  Title    : parseHMMER2
  Usage    : $self->parseHMMER2(\*FH )
  Function : This is a minimal parser for reading in the output of HMMER2 hmmsearch   
  Args     : The file handle to hmmsearch output
  Returns  : A Bio::Pfam::HMM::HMMResults object
  
=cut

sub parseHMMER2 {
  my $self = shift;
  my $file = shift;

  my $hmmRes = Bio::Pfam::HMM::HMMResults->new;

  my %seqh;
  my $count = 0;

  while (<$file>) {
    /^Scores for complete sequences/ && last;
  }

  while (<$file>) {
    /^Parsed for domains/ && last;
    if ( my ( $id, $de, $sc, $ev, $hits ) =
      /^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\d+)\s*$/ )
    {
      $hmmRes->addHMMSeq(
        Bio::Pfam::HMM::HMMSequence->new(
          {
            bits       => $sc,
            evalue     => $ev,
            name       => $id,
            desc       => $de,
            numberHits => $hits
          }
        )
      );

      $seqh{$id} = $sc;
    }
  }

  while (<$file>) {
    /^Histogram of all scores/ && last;
    if ( my ( $id, $sqfrom, $sqto, $hmmf, $hmmt, $sc, $ev ) =
      /^(\S+)\s+\S+\s+(\d+)\s+(\d+).+?(\d+)\s+(\d+)\s+\S+\s+(\S+)\s+(\S+)\s*$/ )
    {
      $hmmRes->addHMMUnit(
        Bio::Pfam::HMM::HMMUnit->new(
          {
            name    => $id,
            seqFrom => $sqfrom,
            seqTo   => $sqto,
            hmmFrom => $hmmf,
            hmmTo   => $hmmt,
            bits    => $sc,
            evalue  => $ev
          }
        )
      );

    }
  }

  return $hmmRes;
}

#-------------------------------------------------------------------------------

=head2 parseHMMER1

  Title    : parseHMMER1
  Usage    : $self->parseHMMER1(\*FH )
  Function : This is a minimal parser for reading in the output of HMMER1 hmmsearch.
           : There are a few hacks to get round some of them requirements     
  Args     : The file handle to hmmsearch output
  Returns  : A Bio::Pfam::HMM::HMMResults object
  
=cut

sub parseHMMER1 {
  my $self = shift;
  my $file = shift;

  my $hmmRes = Bio::Pfam::HMM::HMMResults->new;

  my %seqh;
  my $count = 0;

  while (<$file>) {
    if ( my ( $bits, $s, $e, $id, $de ) =
/^(-?\d+\.?\d*)\s+\(bits\)\s+f:\s+(\d+)\s+t:\s+(\d+)\s+Target:\s+(\S+)\s+(.*)/
      )
    {
      if ( $id =~ /(\S+)\/(\d+)-(\d+)/ ) {
        $id = $1;
        $s  = $2 + $s - 1;
        $e  = $2 + $e - 1;
      }

      if ( !$hmmRes->seqs->{$id} ) {
        $hmmRes->addHMMSeq(
          Bio::Pfam::HMM::HMMSequence->new(
            {
              bits       => $bits,
              evalue     => 1,
              name       => $id,
              desc       => $de,
              numberHits => 1
            }
          )
        );
      }
      $hmmRes->addHMMUnit(
        Bio::Pfam::HMM::HMMUnit->new(
          {
            name    => $id,
            seqFrom => $s,
            seqTo   => $e,
            hmmFrom => "1",
            hmmTo   => "1",
            bits    => $bits,
            evalue  => "1"
          }
        )
      );
      if ( $bits > $hmmRes->seqs->{$id}->bits ) {
        $hmmRes->seqs->{$id}->bits($bits);
      }
    }
  }
  return $hmmRes;
}

#-------------------------------------------------------------------------------

=head2 writeScoresFile

  Title    : writeScoresFile
  Usage    : $hmmResIO->writeScoresFile( $hmmRes)
  Function : Writes a scores file for a Bio::Pfam::HMM::HMMResults object.
  Args     : Bio::Pfam::HMM::HMMResults
  Returns  : Nothing
  
=cut

sub writeScoresFile {
  my ( $self, $hmmRes ) = @_;

  unless ($hmmRes) {
    confess "A Bio::Pfam::HMM::HMMResults object was not parsed in\n";
  }
  unless ( $hmmRes->isa("Bio::Pfam::HMM::HMMResults") ) {
    confess("Variable passed in is not a Bio::Pfam::HMM::Results object");
  }

  my $fh;
  open( $fh, ">" . $self->scores )
    or confess "Could not open " . $self->scores . ":[$!]\n";

  my ( $lowSeq, $lowDom, $highSeq, $highDom );
  $lowSeq  = $lowDom  =  999999.99;
  $highSeq = $highDom = -999999.99;
  unless ( defined $hmmRes->domThr and defined $hmmRes->seqThr ) {
    warn "No threshold set, setting to 25.0 bits\n";
    $hmmRes->domThr("25.0");
    $hmmRes->seqThr("25.0");
  }

  my @sigUnits;

  foreach my $seqId ( keys %{ $hmmRes->seqs } ) {

    #Does this sequence score above or equal to the sequence threshold?
    if ( $hmmRes->seqs->{$seqId}->bits >= $hmmRes->seqThr ) {

      #Is this the lowest sequence thresh
      if ( $hmmRes->seqs->{$seqId}->bits < $lowSeq ) {
        $lowSeq = $hmmRes->seqs->{$seqId}->bits;
      }

#For each of the regions found on the sequence, look to see if the match is great
#than the domain threshold.  If it is, is it lower than we we have seen previously
      foreach my $unit ( @{ $hmmRes->seqs->{$seqId}->hmmUnits } ) {
        if ( $unit->bits >= $hmmRes->domThr ) {
          push( @sigUnits, $unit );
          if ( $unit->bits < $lowDom ) {
            $lowDom = $unit->bits();
          }
        }
        elsif ( $unit->bits > $highDom ) {
          $highDom = $unit->bits;
        }
      }
    }
    else {

      #Is this the highest sequence thres below the cut-off
      if ( $hmmRes->seqs->{$seqId}->bits > $highSeq ) {
        $highSeq = $hmmRes->seqs->{$seqId}->bits;
      }

#For each of the regions found on the sequence, look to see if the match is great
#than the domain threshold.  If it is, is it lower than we we have seen previously
      foreach my $unit ( @{ $hmmRes->seqs->{$seqId}->hmmUnits } ) {
        if ( $unit->bits < $hmmRes->domThr && $unit->bits > $highDom ) {
          $highDom = $unit->bits;
        }
      }
    }
  }

  $hmmRes->domTC($lowDom);
  $hmmRes->seqTC($lowSeq);
  $hmmRes->domNC($highDom);
  $hmmRes->seqNC($highSeq);

  #Print the domains to the scores file
  foreach my $u ( sort { $b->bits <=> $a->bits } @sigUnits ) {
    print $fh
      sprintf( "%.1f %s/%s-%s %s-%s %s\n", $u->bits, $u->name, $u->envFrom, $u->envTo, $u->seqFrom, $u->seqTo, $u->evalue );
  }
  close($fh);

}

#-------------------------------------------------------------------------------

#TODO - write _readAlign

=head2 _readAlign

  Title    :
  Usage    :  
  Function :
  Args     :
  Returns  :
  
=cut

sub _readAlign {
  my ( $self, $fh, $hmmRes ) = @_;

}

#Parse the alignment section
#if($pp){

#}else{
#  while(<HS>){
#    last if(/^\/\//)
#  }
#}



sub _readFooter {
  my($self, $fh, $hmmRes ) = @_;
 
  # We are going to parse something like this! 
    
  #  Internal pipeline statistics summary:
#-------------------------------------
#Query sequence(s):                         1  (360 residues)
#Target model(s):                           7  (836 nodes)
#Passed MSV filter:                         2  (0.285714); expected 0.1 (0.02)
#Passed Vit filter:                         1  (0.142857); expected 0.0 (0.001)
#Passed Fwd filter:                         1  (0.142857); expected 0.0 (1e-05)
#Initial search space (Z):                  7  [actual number of targets]
#Domain search space  (domZ):               1  [number of targets reported over threshold]
## CPU time: 0.00u 0.00s 00:00:00.00 Elapsed: 00:00:00
## Mc/sec: inf
#//  

  while(<$fh>){
    if(/\/\//){
      last;
    }
  }
}


#Parse the internal summary section
#Internal statistics summary:
#----------------------------
#Query HMM(s):      1  (0 nodes)
#Target sequences:  5323441  (0 residues)
#Passed MSV filter: 116519  (-37389918065567040729448769671768824784852036328367855636063687997915136.000; expected 19991592792512146725679052970637918208.000)
#Passed Vit filter: 7579  (-0.0000; expected -35982214160587876085407389642471051723332987952235753317595472501307733302049608744822636544.0000)
#Passed Fwd filter: 1687  (8.3e+165; expected -7.5e-266)
#Mc/sec:            828.85
# CPU time: 115.36u 4.45s 00:01:59.81 Elapsed: 00:03:01

#sub writeHMMSearch {
#  my ( $self, $hmmRes ) = @_;
#  my $fh;
#  open($fh, ">".$self->outfile."\n");
#
#  $self->_writeHeader($fh, $hmmRes);
#  $self->_writeSeqHits( $fh, $hmmRes);
#  $self->_writeDomHits( $fh, $hmmRes);
#  $self->_writeAlign( $fh, $hmmRes) if($self->align);
#  $self->_writeInternalSummary( $fh, $hmmRes);
#}
#sub mergeHMMSearch {
#  my ( $self, $filenames ) = @_;
#}


sub write_ascii_out {

    my ($self, $HMMResults, $fh, $scanData, $e_seq, $e_dom, $b_seq, $b_dom) = @_;


    $scanData->{_max_seqname} = 20 unless($scanData->{_max_seqname} or $scanData->{_max_seqname} < 1);
    
    my $ga;

    if($e_seq or $e_dom) {
	$e_seq = $e_dom unless($e_seq);
	$e_dom = "10" unless($e_dom);
    } 
    elsif($b_seq or $b_dom) {
	$b_seq = $b_dom unless($b_seq);
	$b_dom = "0" unless($b_dom);
    }
    else {
	$ga = 1;
    }


    foreach my $unit ( sort { $a->seqFrom <=> $b->seqFrom } @{ $HMMResults->units } ) {    

        if($unit->name =~ /Pfam\-B/) {

	    next unless($HMMResults->seqs->{$unit->name}->evalue <= "0.001" and $unit->evalue <= "0.001");


	    printf $fh "%-".$scanData->{_max_seqname}."s %6d %6d %6d %6d %-11s %-16s %7s %5d %5d %5d %8s %9s %3s %-8s\n",
	    $HMMResults->seqName,
	    $unit->seqFrom,
	    $unit->seqTo,
	    $unit->envFrom,
	    $unit->envTo,
	    $scanData->{_accmap}->{ $unit->name },
	    $unit->name,
	    "Pfam-B",
	    $unit->hmmFrom,
	    $unit->hmmTo,
	    $scanData->{_model_len}->{ $unit->name },
	    $unit->bits,
	    $unit->evalue,
	    "NA",
	    "NA";


	}
	else {

            #Filter results based on thresholds
	    if($ga) {
		next unless($unit->sig);
	    }
	    if($e_seq) {
		next unless($HMMResults->seqs->{$unit->name}->evalue <= $e_seq and $unit->evalue <= $e_dom);
	    }
	    if($b_seq) {
		
		next unless($HMMResults->seqs->{$unit->name}->bits >= $b_seq and $unit->bits >= $b_dom);
	    }
	    
	    my $clan = $scanData->{_clanmap}->{ $unit->name } || "No_clan";
	    
	    
	    printf $fh "%-".$scanData->{_max_seqname}."s %6d %6d %6d %6d %-11s %-16s %7s %5d %5d %5d %8s %9s %3d %-8s ",
	    $HMMResults->seqName,
	    $unit->seqFrom,
	    $unit->seqTo,
	    $unit->envFrom,
	    $unit->envTo,
	    $scanData->{_accmap}->{ $unit->name },
	    $unit->name,
	    $scanData->{_type}->{ $unit->name },
	    $unit->hmmFrom,
	    $unit->hmmTo,
	    $scanData->{_model_len}->{ $unit->name },
	    $unit->bits,
	    $unit->evalue,
	    $unit->sig, 
	    $clan;
	
	    
	    if($unit->{'act_site'}) {
		local $" = ",";
		print $fh "predicted_active_site[@{$unit->{'act_site'}}]";
	    }
	
	    if($scanData->{_translate}){
	      my $strand = '?';
	      my $start = '-';
	      my $end   = '-';
	      if(exists($scanData->{_orf}->{$HMMResults->seqName})){
	       $strand = $scanData->{_orf}->{$HMMResults->seqName}->{strand};  
	       if($strand eq '+'){
	         $start = $scanData->{_orf}->{$HMMResults->seqName}->{start} + ($unit->envFrom * 3) - 3;
	         $end = $scanData->{_orf}->{$HMMResults->seqName}->{start} + ($unit->envTo * 3) - 3;
	       }elsif($strand eq '-'){
	         $start = $scanData->{_orf}->{$HMMResults->seqName}->{start} - ($unit->envFrom * 3) + 3;
           $end = $scanData->{_orf}->{$HMMResults->seqName}->{start} - ($unit->envTo * 3) + 3;
	       }
	      }
	      print $fh "$strand $start $end";
	    }
	
	    print $fh "\n";
	}

	if($scanData->{_align}){
	    print $fh sprintf( "%-10s %s\n", "#HMM",   $unit->hmmalign->{hmm} );
	    print $fh sprintf( "%-10s %s\n", "#MATCH", $unit->hmmalign->{match} );
	    print $fh sprintf( "%-10s %s\n", "#PP",   $unit->hmmalign->{pp});
	    print $fh sprintf( "%-10s %s\n", "#SEQ",   $unit->hmmalign->{seq});
	    print $fh sprintf( "%-10s %s\n", "#CS",   $unit->hmmalign->{cs}) if($unit->hmmalign->{cs});
	}
	
    }
    
}

1;