diff pfamScan/Bio/Pfam/HMM/HMMResultsIO.pm @ 0:68a3648c7d91 draft default tip

Uploaded
author matteoc
date Thu, 22 Dec 2016 04:45:31 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pfamScan/Bio/Pfam/HMM/HMMResultsIO.pm	Thu Dec 22 04:45:31 2016 -0500
@@ -0,0 +1,1186 @@
+# HMMResultsIO.pm
+#
+# Author:        rdf
+# Maintainer:    $Id: HMMResultsIO.pm,v 1.2 2009-12-01 15:42:20 jt6 Exp $
+# Version:       $Revision: 1.2 $
+# Created:       Nov 16, 2008
+# Last Modified: $Date: 2009-12-01 15:42:20 $
+
+=head1 NAME
+
+Template - a short description of the class
+
+=cut
+
+package Bio::Pfam::HMM::HMMResultsIO;
+
+=head1 DESCRIPTION
+
+A more detailed description of what this class does and how it does it.
+
+$Id: HMMResultsIO.pm,v 1.2 2009-12-01 15:42:20 jt6 Exp $
+
+=head1 COPYRIGHT
+
+File: HMMResultsIO.pm
+
+Copyright (c) 2007: Genome Research Ltd.
+
+Authors: Rob Finn (rdf@sanger.ac.uk), John Tate (jt6@sanger.ac.uk)
+
+ This is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
+ 
+=cut
+
+use strict;
+use warnings;
+use Moose;
+use Carp;
+
+#All the things we need to objectfy the search results
+use Bio::Pfam::HMM::HMMResults;
+use Bio::Pfam::HMM::HMMSequence;
+use Bio::Pfam::HMM::HMMUnit;
+
+#-------------------------------------------------------------------------------
+
+=head1 ATTRIBUTES
+
+
+
+=cut
+
+has 'align' => (
+  isa     => 'Int',
+  is      => 'rw',
+  default => 0
+);
+
+has 'outfile' => (
+  isa     => 'Str',
+  is      => 'rw',
+  default => 'OUTPUT'
+);
+
+has 'pfamout' => (
+  isa     => 'Str',
+  is      => 'rw',
+  default => 'PFAMOUT'
+);
+
+has 'scores' => (
+  isa     => 'Str',
+  is      => 'rw',
+  default => 'scores'
+);
+
+#-------------------------------------------------------------------------------
+
+=head1 METHODS
+
+=head2 parseHMMER3
+
+  Title    : parseHMMER 
+  Usage    : $hmmResIO->parseHMMSearch( filename )
+  Function : Parse the output from a HMMER3 search results 
+  Args     : Filename containing the search 
+  Returns  : A Bio::Pfam::HMM::HMMResults object
+  
+=cut
+
+sub parseHMMER3 {
+  my ( $self, $filename ) = @_;
+  my $fh;
+ 
+  if(ref($filename) eq 'GLOB'){
+    $fh = $filename;
+  }else{
+    open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
+  }
+  
+#  open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
+  my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
+  $self->_readHeader( $fh, $hmmRes );
+  $self->_readSeqHits( $fh, $hmmRes );
+  $self->_readUnitHits( $fh, $hmmRes );
+  $self->_readFooter($fh, $hmmRes);
+  return ($hmmRes);
+}
+
+
+
+sub parseMultiHMMER3 {
+  my ( $self, $filename ) = @_;
+  my $fh;
+  
+  if(ref($filename) eq 'GLOB'){
+    $fh = $filename;
+  }elsif( ref($filename) and $filename->isa('IO::File') ) {
+      $fh = $filename;
+  }else{
+    open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
+  }  
+  
+  my @hmmResAll;
+  my $program;
+  while(!eof($fh)){
+    my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
+    my $eof = $self->_readHeader( $fh, $hmmRes ); 
+    last if($eof);
+    push(@hmmResAll, $hmmRes);
+    if($hmmRes->program) {
+	$program = $hmmRes->program;
+    }
+    else {
+	$hmmRes->program($program);
+    }
+    $self->_readSeqHits( $fh, $hmmRes );
+    $self->_readUnitHits( $fh, $hmmRes );
+    $self->_readFooter($fh, $hmmRes);
+  }
+  return (\@hmmResAll);
+}
+
+sub parseSplitHMMER3 {
+  my($self, $files ) = @_;
+  
+  my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
+  
+  foreach my $filename (@{$files}){
+    my ($fh);
+    open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
+    $self->_readHeader( $fh, $hmmRes );
+    $self->_readSeqHits( $fh, $hmmRes );
+    $self->_readUnitHits( $fh, $hmmRes );
+    $self->_readFooter($fh, $hmmRes);
+  }
+  
+  return ( $hmmRes );
+    
+}
+
+
+#-------------------------------------------------------------------------------
+
+=head2 convertHMMSearch 
+
+  Title    : convertHMMSearch
+  Usage    : $hmmResIO->convertHMMSearch('SEARCHFILE') 
+  Function : This wraps up a couple of methods to convert the more complex hmmsearch 
+           : results in to nice clean format that we Pfam-ers are used to. 
+  Args     : The filename of the hmmsearch output file
+  Returns  : Nothing
+  
+=cut
+
+sub convertHMMSearch {
+  my ( $self, $filename ) = @_;
+
+  unless ($filename) {
+    confess "No filename passed in to convertHMMSearch\n";
+  }
+  unless ( -s $filename ) {
+    confess "$filename does not exists\n";
+  }
+
+  #Now parse in the raw HMM output and write out the results as a PFAMOUT
+  my $hmmRes = $self->parseHMMER3($filename);
+  $self->writePFAMOUT($hmmRes);
+  return $hmmRes;
+}
+
+#-------------------------------------------------------------------------------
+
+=head2 writePFAMOUT 
+
+  Title    : writePFAMOUT
+  Usage    : $hmmResIO->writePFAMOUT( $hmmRes ) 
+  Function : Writes a Bio::Pfam::HMM:HMMResults object in to a PFAMOUT file. 
+  Args     : A Bio::Pfam::HMM:HMMResults
+  Returns  : Nothing
+  
+=cut
+
+sub writePFAMOUT {
+  my ( $self, $hmmRes ) = @_;
+
+  unless ($hmmRes) {
+    confess "A Bio::Pfam::HMM::HMMResults object was not parsed in\n";
+  }
+  unless ( $hmmRes->isa("Bio::Pfam::HMM::HMMResults") ) {
+    confess("Variable passed in is not a Bio::Pfam::HMM::Results object");
+  }
+
+  my $fh;
+  open( $fh, ">" . $self->pfamout )
+    or confess "Could not open " . $self->pfamout . ":[$!]\n";
+
+  print $fh <<HEAD;
+# ===========
+# Pfam output
+# ===========
+#
+# Sequence scores
+# ---------------
+#
+# name           description                                   bits      evalue   n   exp  bias  
+ 
+HEAD
+
+  foreach
+    my $seq ( sort { $b->bits <=> $a->bits } ( @{ $hmmRes->eachHMMSeq } ) )
+  {
+    $_ = $seq->desc;
+    my ($desc) = /^(.{1,42})/;
+    $desc = uc($desc);
+    printf $fh (
+      "%-15s  %-42s %8.1f  %9s %3d %5.1f %5.1f\n",
+      $seq->name,
+      $desc,
+      $seq->bits,
+      $seq->evalue,
+      scalar( @{ $seq->hmmUnits } ),
+      defined( $seq->exp )  ? $seq->exp  : "-",
+      defined( $seq->bias ) ? $seq->bias : "-"
+    );
+  }
+
+  print $fh <<HEAD;
+#
+# Domain scores
+# -------------
+#
+# name           env-st  env-en  ali-st  ali-en  hmm-st  hmm-en   bits     evalue    hit   bias
+#
+
+HEAD
+
+  foreach my $dom ( sort { $b->bits <=> $a->bits } @{ $hmmRes->units } ) {
+    
+    
+    printf $fh (
+      "%-15s  %6d  %6d  %6d  %6d  %6s  %6s %6.1f  %9s %6d %6.1f\n",
+      $dom->name,
+      $dom->envFrom,
+      $dom->envTo,
+      $dom->seqFrom,
+      $dom->seqTo,
+      $dom->hmmFrom,
+      $dom->hmmTo,
+      $dom->bits,
+      $dom->evalue,
+      $dom->domain,
+      defined( $dom->bias ) ? $dom->bias : "-",
+
+    );
+  }
+}
+
+#-------------------------------------------------------------------------------
+
+=head2 parsePFAMOUT 
+
+  Title    : parsePFAMOUT
+  Usage    : $self->parsePFAMOUT($filename)
+  Function : Reads in a PFAMOUT file.  This file contains the minimal amount of information
+           : require to constrcut a pfam ALIGN file.   
+  Args     : A filename. Normally this is filename
+  Returns  : A Bio::Pfam::HMM::HMMResults object
+  
+=cut
+
+sub parsePFAMOUT {
+  my $self     = shift;
+  my $filename = shift;
+
+  unless ($filename) {
+    confess('No filename or filehandle passed to parsePFAMOUT');
+  }
+
+  my $fh;
+  if ( ref($filename) eq 'GLOB' ) {
+    $fh = $filename;
+  }
+  else {
+    open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
+  }
+  my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
+
+  while (<$fh>) {
+    /^# Domain scores/ && last;
+
+    #if (/^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\d+)\s*$/) {
+    if (/^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\d+)\s+\S+\s+(\S+)\s*$/) {
+      
+      $hmmRes->addHMMSeq(
+        Bio::Pfam::HMM::HMMSequence->new(
+          {
+            name       => $1,
+            desc       => $2,
+            bits       => $3,
+            evalue     => $4,
+            numberHits => $5,
+	    bias       => $6
+          }
+        )
+      );
+    }
+    elsif (/^#|^\s+$/) {
+      next;
+    }
+    else {
+      warn "Did not parse|$_|\n";
+    }
+  }
+  while (<$fh>) {
+
+    #if (/^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s*$/) {
+    if (
+      /^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+\S+\s+(\S+)/)
+    {
+      $hmmRes->addHMMUnit(
+        Bio::Pfam::HMM::HMMUnit->new(
+          {
+            name     => $1,
+            envFrom  => $2,
+            envTo    => $3,
+            seqFrom  => $4,
+            seqTo    => $5,
+            hmmFrom  => $6,
+            hmmTo    => $7,
+            bits     => $8,
+            evalue   => $9,
+	    bias     => $10
+          }
+        )
+      );
+    }
+    elsif (/^#|^\s+$/) {
+      next;
+    }
+    elsif (/^$/) {
+      next;
+    }
+    else {
+      warn "Did not parse: |$_|";
+    }
+  }
+  close($fh);
+  return ($hmmRes);
+}
+
+#-------------------------------------------------------------------------------
+
+=head2 _readHeader 
+
+  Title    : _readHeader
+  Usage    : Private method.  $self->_readHeader(\*FH, $hmmResults)
+  Function : Reads the header section from a HMMER3 hmmsearch   
+  Args     : The file handle to hmmsearch output, a Bio::Pfam::HMM::HMMResults object
+  Returns  : Nothing
+  
+=cut
+
+#Parse the header part of the output first;
+sub _readHeader {
+  my ( $self, $hs, $hmmRes ) = @_;
+
+  #Check the $hs is defined and a GLOB
+
+  while (<$hs>) {
+    if (/^Scores for complete/) {
+      last;
+    }
+    elsif (/^# query HMM file:\s+(\S+)/) {
+      $hmmRes->hmmName($1);
+    }
+    elsif (/^# target sequence database:\s+(\S+)/) {
+      $hmmRes->seqDB($1);
+    }
+    elsif (/^output directed to file:\s+(\S+)/) {
+      $hmmRes->thisFile($1);
+    }
+    elsif (/^Query:\s+(\S+)\s+\[M\=(\d+)\]/) {
+      $hmmRes->seedName($1);
+      $hmmRes->hmmLength($2);
+    }elsif(/^Query:\s+(\S+)\s+\[L\=(\d+)\]/) {
+	$hmmRes->seqName($1); 
+      $hmmRes->seqLength($2);
+    }elsif (/^sequence E-value threshold: <= (\d+)/) {
+      $hmmRes->evalueThr($1);
+    }
+    elsif (/^# Random generator seed:      (\d+)/) {
+      $hmmRes->randSeedNum($1);
+    }elsif(/^Description:\s+(.*)/){
+      $hmmRes->description($1);  
+    }elsif(/^# (phmmer|hmmsearch|hmmscan|jackhmmer)/){
+      $hmmRes->program($1);
+    }elsif (/(^#)|(^$)/) {
+      next;
+    }elsif(/^Accession/){
+      next; 
+    } elsif(/^\[ok\]/) {
+      return(1);
+    } else {
+      die "Failed to parse hmmsearch results |$_| in header section\n";
+    }
+  }
+}
+
+#-------------------------------------------------------------------------------
+
+=head2 _readSeqHits 
+
+  Title    : _readSeqHits
+  Usage    : Private method.  $self->_readSeqHits(\*FH, $hmmResults)
+  Function : Reads the sequence hits from a HMMER3 hmmsearch   
+  Args     : The file handle to hmmsearch output, a Bio::Pfam::HMM::HMMResults object
+  Returns  : Nothing
+  
+=cut
+
+sub _readSeqHits {
+  my ( $self, $hs, $hmmRes ) = @_;
+  while (<$hs>) {
+
+#Match a line like this
+# E-value  score  bias    E-value  score  bias    exp  N  Sequence Description
+#    ------- ------ -----    ------- ------ -----   ---- --  -------- -----------
+#      4e-83  285.8  10.0    5.3e-83  285.5   7.0    1.1  1  Q14SN3.1 Q14SN3_9HEPC Polyprotein (Fragment).
+    if (/^Domain annotation for each [sequence|model]/) { # This is the format for HMMER3b3
+      last;
+    }
+    elsif (/^Domain and alignment annotation for each [sequence|model]/) {  #This is the format for HMMER3b2 - can be removed later
+	last;
+    }
+    elsif (/^\s+(E-value|---)/) {
+      next;
+    }
+    elsif (/^$/) {
+      next;
+    }
+    else {
+      next if(/No hits detected that satisfy reporting thresholds/);
+
+      #Assume that we have a sequence match
+      my @sMatch = split( /\s+/, $_ );
+      unless ( scalar(@sMatch) >= 10 ) {
+        die "Expected at least 10 pieces of data: $_;\n";
+      }
+      my $desc;
+      if ( scalar(@sMatch) >= 11 ) {
+        $desc = join( " ", @sMatch[ 10 .. $#sMatch ] );
+      }
+
+      $hmmRes->addHMMSeq(
+        Bio::Pfam::HMM::HMMSequence->new(
+          {
+            evalue     => $sMatch[1],
+            bits       => $sMatch[2],
+            bias       => $sMatch[3],
+            exp        => $sMatch[7],
+            numberHits => $sMatch[8],
+            name       => $sMatch[9],
+            desc       => defined($desc) ? $desc : "-",
+          }
+        )
+      );
+      
+      next;
+    }
+    die "Failed to parse $_ in sequence section\n";
+  }
+
+}
+
+#------------------------------------------------------------------------------
+
+=head2 _readUnitHits 
+
+  Title    : _readUnitHits
+  Usage    : Private method.  $self->_readUnitHits(\*FH, $hmmResults)
+  Function : Reads the unit (domain) hits from a HMMER3 hmmsearch   
+  Args     : The file handle to hmmsearch output, a Bio::Pfam::HMM::HMMResults object
+  Returns  : Nothing
+  
+=cut
+
+no warnings 'recursion';
+
+sub _readUnitHits {
+  my ( $self, $hs, $hmmRes ) = @_;
+  
+  if($hmmRes->eof){
+    return; 
+  }
+
+#Parse the domain hits section
+#>> P37935.1  MAAY4_SCHCO Mating-type protein A-alpha Y4.
+#     # bit score    bias    E-value ind Evalue hmm from   hmm to    ali from   ali to    env from   env to    ali-acc
+#   --- --------- ------- ---------- ---------- -------- --------    -------- --------    -------- --------    -------
+#     1     244.0     0.5    9.5e-76    1.7e-70        1      146 [.        1      145 [.        1      146 [.    0.99
+#
+#  Alignments for each domain:
+#  == domain 1    score: 244.0 bits;  conditional E-value: 9.5e-76
+#      SEED   1 medrlallkaisasakdlvalaasrGaksipspvkttavkfdplptPdldalrtrlkeaklPakaiksalsayekaCarWrsdleeafdktaksvsPanlhllealrirlyteqvekWlvqvlevaerWkaemekqrahiaatmgp 146
+#               m+++la+l++isa+akd++ala+srGa+++ +p++tt+++fd+l++P+ld++rtrl+ea+lP+kaik++lsaye+aCarW++dleeafd+ta+s+sP+n+++l++lr+rly+eqv+kWl++vl+v+erWkaemekqrahi+atmgp
+#  P37935.1   1 MAELLACLQSISAHAKDMMALARSRGATGS-RPTPTTLPHFDELLPPNLDFVRTRLQEARLPPKAIKGTLSAYESACARWKHDLEEAFDRTAHSISPHNFQRLAQLRTRLYVEQVQKWLYEVLQVPERWKAEMEKQRAHINATMGP 145
+#               899***************************.******************************************************************************************************************8 PP
+
+  while (<$hs>) {
+    if (/^Internal/) {
+      last;
+    }
+    elsif (/\>\>\s+(\S+)/) {
+      my $seqId = $1;    
+      $self->_readUnitData( $seqId, $hs, $hmmRes );
+      if($hmmRes->eof){
+        return; 
+      }
+    }
+  }
+}
+
+sub _readUnitData {
+  my ( $self, $id, $hs, $hmmRes ) = @_;
+  
+  if($hmmRes->eof){
+    return; 
+  }
+  my $hmmName = $hmmRes->seedName();
+
+  my $seqName = $hmmRes->seqName;
+
+# bit score    bias    E-value ind Evalue hmm from   hmm to    ali from   ali to    env from   env to    ali-acc
+#   --- --------- ------- ---------- ---------- -------- --------    -------- --------    -------- --------    -------
+#     1     244.0     0.5    9.5e-76    1.7e-70        1      146 [.        1      145 [.        1      146 [.    0.99
+#
+#  Alignments for each domain:
+
+  my @units;
+  my $align   = 1;
+  my $recurse = 0;
+  my $eof = 0;
+  my ($nextSeqId);
+  while (<$hs>) {
+    if (/^[(\/\/|Internal)]/ ) {
+      $align   = 0;
+      $recurse = 0;
+      $eof = 1;
+      last;
+    }
+    elsif (/^\>\>\s+(\S+)/) {
+      $nextSeqId = $1;
+      $align     = 0;
+      $recurse   = 1;
+      last;
+    }
+    elsif (/^\s+Alignments for each domain:/) {
+      $align   = 1;
+      $recurse = 0;
+      last;
+    }
+    elsif (/^\s+(#\s+score|---)/){
+
+      #Two human readable lines
+      next;
+    }
+    elsif (/^$/) {
+
+      #blank line
+      next;
+    }
+    elsif (/^\s+\d+\s+/) {
+      my @dMatch = split( /\s+/, $_ );
+      unless ( scalar(@dMatch) == 17 ) {
+        die "Expected 16 elements of data: $_\n";
+      }
+
+      push(
+        @units,
+        Bio::Pfam::HMM::HMMUnit->new(
+          {
+            name      => $id,
+            domain    => $dMatch[1],
+            bits      => $dMatch[3],
+            bias      => $dMatch[4],
+            domEvalue => $dMatch[5],
+            evalue    => $dMatch[6],
+            hmmFrom   => $dMatch[7],
+            hmmTo     => $dMatch[8],
+            seqFrom   => $dMatch[10],
+            seqTo     => $dMatch[11],
+            envFrom   => $dMatch[13],
+            envTo     => $dMatch[14],
+            aliAcc    => $dMatch[16]
+          }
+        )
+      );
+      
+      next;
+    }
+    elsif(/^\s+\[No individual domains/) {
+	$align=0;
+	next;
+    }
+    else {
+      confess("Did not parse line: $_");
+    }
+  }
+
+#  == domain 1    score: 244.0 bits;  conditional E-value: 9.5e-76
+#      SEED   1 medrlallkaisasakdlvalaasrGaksipspvkttavkfdplptPdldalrtrlkeaklPakaiksalsayekaCarWrsdleeafdktaksvsPanlhllealrirlyteqvekWlvqvlevaerWkaemekqrahiaatmgp 146
+#               m+++la+l++isa+akd++ala+srGa+++ +p++tt+++fd+l++P+ld++rtrl+ea+lP+kaik++lsaye+aCarW++dleeafd+ta+s+sP+n+++l++lr+rly+eqv+kWl++vl+v+erWkaemekqrahi+atmgp
+#  P37935.1   1 MAELLACLQSISAHAKDMMALARSRGATGS-RPTPTTLPHFDELLPPNLDFVRTRLQEARLPPKAIKGTLSAYESACARWKHDLEEAFDRTAHSISPHNFQRLAQLRTRLYVEQVQKWLYEVLQVPERWKAEMEKQRAHINATMGP 145
+#               899***************************.******************************************************************************************************************8 PP
+#
+# OR....
+#
+#  == domain 1    score: 27.6 bits;  conditional E-value: 7.4e-10
+#   PF00018  17 LsfkkGdvitvleksee.eWwkaelkdg.keGlvPsnYvep 55 
+#               L++++Gd+++++++++e++Ww++++++++++G++P+n+v+p
+#  P15498.4 617 LRLNPGDIVELTKAEAEqNWWEGRNTSTnEIGWFPCNRVKP 657
+#               7899**********9999*******************9987 PP
+
+
+  if ($align) {
+    my ($pattern1, $pattern2);
+
+    if($hmmName and $hmmRes->program eq 'hmmsearch'){
+      $pattern1 = qr/^\s+$hmmName\s+\d+\s+(\S+)\s+\d+/;
+      $id =~ s/(\W)/\\$1/g; # escape any non-word character
+      # $id =~ s/\|/\\|/g;  #Escape '|', '[' and ']' characters
+      # $id =~ s/\[/\\[/g;
+      # $id =~ s/\]/\\]/g;
+      $pattern2 = qr/^\s+$id\s+\d+\s+(\S+)\s+\d+/;
+    }elsif($seqName and $hmmRes->program eq 'hmmscan'){
+      my $tmpSeqName = $seqName;
+      $tmpSeqName =~ s/(\W)/\\$1/g; # escape any non-word character
+      # $tmpSeqName =~ s/\|/\\|/g; #Escape '|', '[' and ']' characters
+      # $tmpSeqName =~ s/\[/\\[/g;  
+      # $tmpSeqName =~ s/\]/\\]/g;
+      $pattern1 = qr/^\s+$id\s+\d+\s+(\S+)\s+\d+/;
+      $pattern2 = qr/^\s+$tmpSeqName\s+\d+\s+(\S+)\s+\d+/;
+    }elsif($seqName and ($hmmRes->program eq 'phmmer' or $hmmRes->program eq 'jackhmmer') ){
+      $seqName =~ s/(\W)/\\$1/g; # escape any non-word character
+      # $seqName =~ s/\|/\|/g; #Escape '|', '[' and ']' characters
+      # $seqName =~ s/\[/\\[/g;
+      # $seqName =~ s/\]/\\]/g;
+      $pattern1 = qr/^\s+$seqName\s+\d+\s+(\S+)\s+\d+/;
+      $pattern2 = qr/^\s+$id\s+\d+\s+(\S+)\s+\d+/;
+    }
+  
+
+    $recurse = 0;
+    my $matchNo;
+    my $hmmlen = 0;
+    while (<$hs>) {
+      if (/$pattern1/) {
+        $units[ $matchNo - 1 ]->hmmalign->{hmm} .= $1;
+        $hmmlen = length($1);
+      }
+      elsif (/$pattern2/) {
+        $units[ $matchNo - 1 ]->hmmalign->{seq} .= $1;
+      }
+      elsif (/^\s+([x\.]+)\s+RF$/) {
+        my $rf = $1;
+        $units[ $matchNo - 1 ]->hmmalign->{rf} .= $rf;
+      }
+      elsif (/^\s+([0-9\*\.]+)\s+PP$/) {
+        my $pp = $1;
+        $units[ $matchNo - 1 ]->hmmalign->{pp} .= $pp;
+      }elsif (/^\s+(\S+)\s+CS$/) {
+        my $cs = $1;
+        $units[ $matchNo - 1 ]->hmmalign->{cs} .= $cs;
+      }elsif (/^\s+==\s+domain\s+(\d+)/) {
+        $matchNo = $1;
+      }
+      elsif (/^\s+(.*)\s+$/) {
+      	# $1 is *not* the match - this fails if there are prepended
+      	# or appended spaces
+        # $units[ $matchNo - 1 ]->hmmalign->{match} .= $1;
+        # Let's get a right substring based on the HMM length
+        chomp;
+        my $m1 = substr($_,-$hmmlen);
+        $units[ $matchNo - 1 ]->hmmalign->{match} .= $m1;
+      }elsif (/^$/) {
+        next;
+      }
+      elsif (/^[(\/\/|Internal)]/) {
+        $align   = 0;
+        $recurse = 0;
+        $eof = 1;
+        last;
+      }
+      elsif (/^\>\>\s+(\S+)/) {
+        $nextSeqId = $1;
+        $recurse   = 1;
+        last;
+      }
+     
+      else {
+        confess("Did not parse |$_| in units");
+      }
+    }
+  }
+
+  foreach my $u (@units) {
+    $hmmRes->addHMMUnit($u);
+  }
+      
+  $hmmRes->eof($eof);
+
+  if ($recurse and $nextSeqId) {
+    $self->_readUnitData( $nextSeqId, $hs, $hmmRes );
+  }
+  return;
+}
+use warnings 'recursion';
+
+#-------------------------------------------------------------------------------
+
+=head2 parseHMMER2 
+
+  Title    : parseHMMER2
+  Usage    : $self->parseHMMER2(\*FH )
+  Function : This is a minimal parser for reading in the output of HMMER2 hmmsearch   
+  Args     : The file handle to hmmsearch output
+  Returns  : A Bio::Pfam::HMM::HMMResults object
+  
+=cut
+
+sub parseHMMER2 {
+  my $self = shift;
+  my $file = shift;
+
+  my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
+
+  my %seqh;
+  my $count = 0;
+
+  while (<$file>) {
+    /^Scores for complete sequences/ && last;
+  }
+
+  while (<$file>) {
+    /^Parsed for domains/ && last;
+    if ( my ( $id, $de, $sc, $ev, $hits ) =
+      /^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\d+)\s*$/ )
+    {
+      $hmmRes->addHMMSeq(
+        Bio::Pfam::HMM::HMMSequence->new(
+          {
+            bits       => $sc,
+            evalue     => $ev,
+            name       => $id,
+            desc       => $de,
+            numberHits => $hits
+          }
+        )
+      );
+
+      $seqh{$id} = $sc;
+    }
+  }
+
+  while (<$file>) {
+    /^Histogram of all scores/ && last;
+    if ( my ( $id, $sqfrom, $sqto, $hmmf, $hmmt, $sc, $ev ) =
+      /^(\S+)\s+\S+\s+(\d+)\s+(\d+).+?(\d+)\s+(\d+)\s+\S+\s+(\S+)\s+(\S+)\s*$/ )
+    {
+      $hmmRes->addHMMUnit(
+        Bio::Pfam::HMM::HMMUnit->new(
+          {
+            name    => $id,
+            seqFrom => $sqfrom,
+            seqTo   => $sqto,
+            hmmFrom => $hmmf,
+            hmmTo   => $hmmt,
+            bits    => $sc,
+            evalue  => $ev
+          }
+        )
+      );
+
+    }
+  }
+
+  return $hmmRes;
+}
+
+#-------------------------------------------------------------------------------
+
+=head2 parseHMMER1
+
+  Title    : parseHMMER1
+  Usage    : $self->parseHMMER1(\*FH )
+  Function : This is a minimal parser for reading in the output of HMMER1 hmmsearch.
+           : There are a few hacks to get round some of them requirements     
+  Args     : The file handle to hmmsearch output
+  Returns  : A Bio::Pfam::HMM::HMMResults object
+  
+=cut
+
+sub parseHMMER1 {
+  my $self = shift;
+  my $file = shift;
+
+  my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
+
+  my %seqh;
+  my $count = 0;
+
+  while (<$file>) {
+    if ( my ( $bits, $s, $e, $id, $de ) =
+/^(-?\d+\.?\d*)\s+\(bits\)\s+f:\s+(\d+)\s+t:\s+(\d+)\s+Target:\s+(\S+)\s+(.*)/
+      )
+    {
+      if ( $id =~ /(\S+)\/(\d+)-(\d+)/ ) {
+        $id = $1;
+        $s  = $2 + $s - 1;
+        $e  = $2 + $e - 1;
+      }
+
+      if ( !$hmmRes->seqs->{$id} ) {
+        $hmmRes->addHMMSeq(
+          Bio::Pfam::HMM::HMMSequence->new(
+            {
+              bits       => $bits,
+              evalue     => 1,
+              name       => $id,
+              desc       => $de,
+              numberHits => 1
+            }
+          )
+        );
+      }
+      $hmmRes->addHMMUnit(
+        Bio::Pfam::HMM::HMMUnit->new(
+          {
+            name    => $id,
+            seqFrom => $s,
+            seqTo   => $e,
+            hmmFrom => "1",
+            hmmTo   => "1",
+            bits    => $bits,
+            evalue  => "1"
+          }
+        )
+      );
+      if ( $bits > $hmmRes->seqs->{$id}->bits ) {
+        $hmmRes->seqs->{$id}->bits($bits);
+      }
+    }
+  }
+  return $hmmRes;
+}
+
+#-------------------------------------------------------------------------------
+
+=head2 writeScoresFile
+
+  Title    : writeScoresFile
+  Usage    : $hmmResIO->writeScoresFile( $hmmRes)
+  Function : Writes a scores file for a Bio::Pfam::HMM::HMMResults object.
+  Args     : Bio::Pfam::HMM::HMMResults
+  Returns  : Nothing
+  
+=cut
+
+sub writeScoresFile {
+  my ( $self, $hmmRes ) = @_;
+
+  unless ($hmmRes) {
+    confess "A Bio::Pfam::HMM::HMMResults object was not parsed in\n";
+  }
+  unless ( $hmmRes->isa("Bio::Pfam::HMM::HMMResults") ) {
+    confess("Variable passed in is not a Bio::Pfam::HMM::Results object");
+  }
+
+  my $fh;
+  open( $fh, ">" . $self->scores )
+    or confess "Could not open " . $self->scores . ":[$!]\n";
+
+  my ( $lowSeq, $lowDom, $highSeq, $highDom );
+  $lowSeq  = $lowDom  =  999999.99;
+  $highSeq = $highDom = -999999.99;
+  unless ( defined $hmmRes->domThr and defined $hmmRes->seqThr ) {
+    warn "No threshold set, setting to 25.0 bits\n";
+    $hmmRes->domThr("25.0");
+    $hmmRes->seqThr("25.0");
+  }
+
+  my @sigUnits;
+
+  foreach my $seqId ( keys %{ $hmmRes->seqs } ) {
+
+    #Does this sequence score above or equal to the sequence threshold?
+    if ( $hmmRes->seqs->{$seqId}->bits >= $hmmRes->seqThr ) {
+
+      #Is this the lowest sequence thresh
+      if ( $hmmRes->seqs->{$seqId}->bits < $lowSeq ) {
+        $lowSeq = $hmmRes->seqs->{$seqId}->bits;
+      }
+
+#For each of the regions found on the sequence, look to see if the match is great
+#than the domain threshold.  If it is, is it lower than we we have seen previously
+      foreach my $unit ( @{ $hmmRes->seqs->{$seqId}->hmmUnits } ) {
+        if ( $unit->bits >= $hmmRes->domThr ) {
+          push( @sigUnits, $unit );
+          if ( $unit->bits < $lowDom ) {
+            $lowDom = $unit->bits();
+          }
+        }
+        elsif ( $unit->bits > $highDom ) {
+          $highDom = $unit->bits;
+        }
+      }
+    }
+    else {
+
+      #Is this the highest sequence thres below the cut-off
+      if ( $hmmRes->seqs->{$seqId}->bits > $highSeq ) {
+        $highSeq = $hmmRes->seqs->{$seqId}->bits;
+      }
+
+#For each of the regions found on the sequence, look to see if the match is great
+#than the domain threshold.  If it is, is it lower than we we have seen previously
+      foreach my $unit ( @{ $hmmRes->seqs->{$seqId}->hmmUnits } ) {
+        if ( $unit->bits < $hmmRes->domThr && $unit->bits > $highDom ) {
+          $highDom = $unit->bits;
+        }
+      }
+    }
+  }
+
+  $hmmRes->domTC($lowDom);
+  $hmmRes->seqTC($lowSeq);
+  $hmmRes->domNC($highDom);
+  $hmmRes->seqNC($highSeq);
+
+  #Print the domains to the scores file
+  foreach my $u ( sort { $b->bits <=> $a->bits } @sigUnits ) {
+    print $fh
+      sprintf( "%.1f %s/%s-%s %s-%s %s\n", $u->bits, $u->name, $u->envFrom, $u->envTo, $u->seqFrom, $u->seqTo, $u->evalue );
+  }
+  close($fh);
+
+}
+
+#-------------------------------------------------------------------------------
+
+#TODO - write _readAlign
+
+=head2 _readAlign
+
+  Title    :
+  Usage    :  
+  Function :
+  Args     :
+  Returns  :
+  
+=cut
+
+sub _readAlign {
+  my ( $self, $fh, $hmmRes ) = @_;
+
+}
+
+#Parse the alignment section
+#if($pp){
+
+#}else{
+#  while(<HS>){
+#    last if(/^\/\//)
+#  }
+#}
+
+
+
+sub _readFooter {
+  my($self, $fh, $hmmRes ) = @_;
+ 
+  # We are going to parse something like this! 
+    
+  #  Internal pipeline statistics summary:
+#-------------------------------------
+#Query sequence(s):                         1  (360 residues)
+#Target model(s):                           7  (836 nodes)
+#Passed MSV filter:                         2  (0.285714); expected 0.1 (0.02)
+#Passed Vit filter:                         1  (0.142857); expected 0.0 (0.001)
+#Passed Fwd filter:                         1  (0.142857); expected 0.0 (1e-05)
+#Initial search space (Z):                  7  [actual number of targets]
+#Domain search space  (domZ):               1  [number of targets reported over threshold]
+## CPU time: 0.00u 0.00s 00:00:00.00 Elapsed: 00:00:00
+## Mc/sec: inf
+#//  
+
+  while(<$fh>){
+    if(/\/\//){
+      last;
+    }
+  }
+}
+
+
+#Parse the internal summary section
+#Internal statistics summary:
+#----------------------------
+#Query HMM(s):      1  (0 nodes)
+#Target sequences:  5323441  (0 residues)
+#Passed MSV filter: 116519  (-37389918065567040729448769671768824784852036328367855636063687997915136.000; expected 19991592792512146725679052970637918208.000)
+#Passed Vit filter: 7579  (-0.0000; expected -35982214160587876085407389642471051723332987952235753317595472501307733302049608744822636544.0000)
+#Passed Fwd filter: 1687  (8.3e+165; expected -7.5e-266)
+#Mc/sec:            828.85
+# CPU time: 115.36u 4.45s 00:01:59.81 Elapsed: 00:03:01
+
+#sub writeHMMSearch {
+#  my ( $self, $hmmRes ) = @_;
+#  my $fh;
+#  open($fh, ">".$self->outfile."\n");
+#
+#  $self->_writeHeader($fh, $hmmRes);
+#  $self->_writeSeqHits( $fh, $hmmRes);
+#  $self->_writeDomHits( $fh, $hmmRes);
+#  $self->_writeAlign( $fh, $hmmRes) if($self->align);
+#  $self->_writeInternalSummary( $fh, $hmmRes);
+#}
+#sub mergeHMMSearch {
+#  my ( $self, $filenames ) = @_;
+#}
+
+
+sub write_ascii_out {
+
+    my ($self, $HMMResults, $fh, $scanData, $e_seq, $e_dom, $b_seq, $b_dom) = @_;
+
+
+    $scanData->{_max_seqname} = 20 unless($scanData->{_max_seqname} or $scanData->{_max_seqname} < 1);
+    
+    my $ga;
+
+    if($e_seq or $e_dom) {
+	$e_seq = $e_dom unless($e_seq);
+	$e_dom = "10" unless($e_dom);
+    } 
+    elsif($b_seq or $b_dom) {
+	$b_seq = $b_dom unless($b_seq);
+	$b_dom = "0" unless($b_dom);
+    }
+    else {
+	$ga = 1;
+    }
+
+
+    foreach my $unit ( sort { $a->seqFrom <=> $b->seqFrom } @{ $HMMResults->units } ) {    
+
+        if($unit->name =~ /Pfam\-B/) {
+
+	    next unless($HMMResults->seqs->{$unit->name}->evalue <= "0.001" and $unit->evalue <= "0.001");
+
+
+	    printf $fh "%-".$scanData->{_max_seqname}."s %6d %6d %6d %6d %-11s %-16s %7s %5d %5d %5d %8s %9s %3s %-8s\n",
+	    $HMMResults->seqName,
+	    $unit->seqFrom,
+	    $unit->seqTo,
+	    $unit->envFrom,
+	    $unit->envTo,
+	    $scanData->{_accmap}->{ $unit->name },
+	    $unit->name,
+	    "Pfam-B",
+	    $unit->hmmFrom,
+	    $unit->hmmTo,
+	    $scanData->{_model_len}->{ $unit->name },
+	    $unit->bits,
+	    $unit->evalue,
+	    "NA",
+	    "NA";
+
+
+	}
+	else {
+
+            #Filter results based on thresholds
+	    if($ga) {
+		next unless($unit->sig);
+	    }
+	    if($e_seq) {
+		next unless($HMMResults->seqs->{$unit->name}->evalue <= $e_seq and $unit->evalue <= $e_dom);
+	    }
+	    if($b_seq) {
+		
+		next unless($HMMResults->seqs->{$unit->name}->bits >= $b_seq and $unit->bits >= $b_dom);
+	    }
+	    
+	    my $clan = $scanData->{_clanmap}->{ $unit->name } || "No_clan";
+	    
+	    
+	    printf $fh "%-".$scanData->{_max_seqname}."s %6d %6d %6d %6d %-11s %-16s %7s %5d %5d %5d %8s %9s %3d %-8s ",
+	    $HMMResults->seqName,
+	    $unit->seqFrom,
+	    $unit->seqTo,
+	    $unit->envFrom,
+	    $unit->envTo,
+	    $scanData->{_accmap}->{ $unit->name },
+	    $unit->name,
+	    $scanData->{_type}->{ $unit->name },
+	    $unit->hmmFrom,
+	    $unit->hmmTo,
+	    $scanData->{_model_len}->{ $unit->name },
+	    $unit->bits,
+	    $unit->evalue,
+	    $unit->sig, 
+	    $clan;
+	
+	    
+	    if($unit->{'act_site'}) {
+		local $" = ",";
+		print $fh "predicted_active_site[@{$unit->{'act_site'}}]";
+	    }
+	
+	    if($scanData->{_translate}){
+	      my $strand = '?';
+	      my $start = '-';
+	      my $end   = '-';
+	      if(exists($scanData->{_orf}->{$HMMResults->seqName})){
+	       $strand = $scanData->{_orf}->{$HMMResults->seqName}->{strand};  
+	       if($strand eq '+'){
+	         $start = $scanData->{_orf}->{$HMMResults->seqName}->{start} + ($unit->envFrom * 3) - 3;
+	         $end = $scanData->{_orf}->{$HMMResults->seqName}->{start} + ($unit->envTo * 3) - 3;
+	       }elsif($strand eq '-'){
+	         $start = $scanData->{_orf}->{$HMMResults->seqName}->{start} - ($unit->envFrom * 3) + 3;
+           $end = $scanData->{_orf}->{$HMMResults->seqName}->{start} - ($unit->envTo * 3) + 3;
+	       }
+	      }
+	      print $fh "$strand $start $end";
+	    }
+	
+	    print $fh "\n";
+	}
+
+	if($scanData->{_align}){
+	    print $fh sprintf( "%-10s %s\n", "#HMM",   $unit->hmmalign->{hmm} );
+	    print $fh sprintf( "%-10s %s\n", "#MATCH", $unit->hmmalign->{match} );
+	    print $fh sprintf( "%-10s %s\n", "#PP",   $unit->hmmalign->{pp});
+	    print $fh sprintf( "%-10s %s\n", "#SEQ",   $unit->hmmalign->{seq});
+	    print $fh sprintf( "%-10s %s\n", "#CS",   $unit->hmmalign->{cs}) if($unit->hmmalign->{cs});
+	}
+	
+    }
+    
+}
+
+1;