Mercurial > repos > matteoc > agame_custom_tools
view pfamScan/Bio/Pfam/Scan/PfamScan.pm @ 0:68a3648c7d91 draft default tip
Uploaded
author | matteoc |
---|---|
date | Thu, 22 Dec 2016 04:45:31 -0500 |
parents | |
children |
line wrap: on
line source
=head1 NAME Bio::Pfam::Scan::PfamScan =cut package Bio::Pfam::Scan::PfamScan; =head1 SYNOPSIS my $ps = Bio::Pfam::Scan::PfamScan->new( -cut_off => $hmmscan_cut_off, -dir => $dir, -clan_overlap => $clan_overlap, -fasta => $fasta, -align => $align, -as => $as ); $ps->search; $ps->write_results; =head1 DESCRIPTION $Id: PfamScan.pm,v 1.4 2010-01-12 09:41:42 jm14 Exp $ =cut use strict; use warnings; use Bio::Pfam::HMM::HMMResultsIO; use Bio::Pfam::Active_site::as_search; use Bio::SimpleAlign; use Bio::Pfam::Scan::Seq; use Carp; use IPC::Run qw( start finish ); #------------------------------------------------------------------------------- #- constructor ----------------------------------------------------------------- #------------------------------------------------------------------------------- =head1 METHODS =head2 new The only constructor for the object. Accepts a set of arguments that specify the parameters for the search: =over =item -cut_off =item -dir =item -clan_overlap =item -fasta =item -sequence =item -align =item -hmm =item -as =back =cut sub new { my ( $class, @args ) = @_; my $self = {}; bless $self, $class; # To avoid hard coding the location for the binary, we assume it will be on the path..... $self->{_HMMSCAN} = 'hmmscan'; # handle arguments, if we were given any here $self->_process_args(@args) if @args; return $self; } #------------------------------------------------------------------------------- #- public methods -------------------------------------------------------------- #------------------------------------------------------------------------------- =head2 search The main method on the object. Performs a C<hmmscan> search using the supplied sequence and the specified HMM library. =cut sub search { my ( $self, @args ) = @_; # handle the arguments, if we were handed any here $self->_process_args(@args) if @args; # set up the output header $self->_build_header; croak qq(FATAL: no sequence given; set the search parameters before calling "search") unless defined $self->{_sequence}; my ( %AllResults, $pfamB, $firstResult ); foreach my $hmmlib ( @{ $self->{_hmmlib} } ) { my ( @hmmscan_cut_off, $seq_evalue, $dom_evalue ); if ( $hmmlib !~ /Pfam\-B/ ) { @hmmscan_cut_off = @{ $self->{_hmmscan_cutoff} }; } else { $pfamB = 1; $seq_evalue = 0.001; $dom_evalue = 0.001; # It's a pfamB search so use some default cut off values push @hmmscan_cut_off, '-E', $seq_evalue, '--domE', $dom_evalue; } push @{ $self->{_header} }, "# cpu number specified: " . $self->{_cpu} . "\n" if ( $hmmlib !~ /Pfam\-B/ and $self->{_cpu} ); push @{ $self->{_header} }, "# searching against: " . $self->{_dir} . "/$hmmlib, with cut off " . join( " ", @hmmscan_cut_off ) . "\n"; my @params; if ( $self->{_cpu} ) { @params = ( 'hmmscan', '--notextw', '--cpu', $self->{_cpu}, @hmmscan_cut_off, $self->{_dir} . '/' . $hmmlib, $self->{_fasta} ); } else { @params = ( 'hmmscan', '--notextw', @hmmscan_cut_off, $self->{_dir} . '/' . $hmmlib, $self->{_fasta} ); } print STDERR "PfamScan::search: hmmscan command: |@params|\n" if $ENV{DEBUG}; print STDERR 'PfamScan::search: sequence: |' . $self->{_sequence} . "|\n" if $ENV{DEBUG}; my $run = start \@params, '<pipe', \*IN, '>pipe', \*OUT, '2>pipe', \*ERR or croak qq(FATAL: error running hmmscan; IPC::Run returned '$?'); # print IN $self->{_sequence}; ; close IN; $self->{_hmmresultIO} = Bio::Pfam::HMM::HMMResultsIO->new; $self->{_all_results} = $self->{_hmmresultIO}->parseMultiHMMER3( \*OUT ); close OUT; my $err; while (<ERR>) { $err .= $_; } close ERR; finish $run or croak qq|FATAL: error running hmmscan ($err); ipc returned '$?'|; unless ( $hmmlib =~ /Pfam\-B/ ) { if ( $self->{_clan_overlap} ) { push( @{ $self->{_header} }, "# resolve clan overlaps: off\n" ); } else { push( @{ $self->{_header} }, "# resolve clan overlaps: on\n" ); $self->_resolve_clan_overlap; } if ( $self->{_as} ) { push( @{ $self->{_header} }, "# predict active sites: on\n" ); $self->_pred_act_sites; } else { push( @{ $self->{_header} }, "# predict active sites: off\n" ); } if ( $self->{_translate} ) { push @{ $self->{_header} }, "# translate DNA sequence: " . $self->{_translate} . "\n"; } } # Determine which hits are significant foreach my $result ( @{ $self->{_all_results} } ) { foreach my $unit ( sort { $a->seqFrom <=> $b->seqFrom } @{ $result->units } ) { unless ($pfamB) { $unit->sig(0); if ( $result->seqs->{ $unit->name }->bits >= $self->{_seqGA}->{ $unit->name } ) { if ( $unit->bits >= $self->{_domGA}->{ $unit->name } ) { $unit->sig(1); } } } } } if ($firstResult) { $AllResults{ $self->{_all_results} } = $self->{_all_results}; } else { $firstResult = $self->{_all_results}; } } # end of "foreach $hmmlib" # If more than one search, merge results into one object if ( keys %AllResults ) { foreach my $AllResult ( keys %AllResults ) { foreach my $seq_id ( keys %{ $self->{_seq_hash} } ) { my $flag; #If seq exists in both, add all units from $AllResult to $firstResult foreach my $result ( @{$firstResult} ) { if ( $result->seqName eq $seq_id ) { $flag = 1; foreach my $result2 ( @{ $AllResults{$AllResult} } ) { if ( $result2->seqName eq $seq_id ) { foreach my $hmmname ( keys %{ $result2->seqs } ) { $result->addHMMSeq( $result2->seqs->{$hmmname} ); } foreach my $unit ( @{ $result2->units } ) { $result->addHMMUnit($unit); } } } } } #If seq doesn't exist in $firstResult, need to add both sequence and units to $firstResult unless ($flag) { foreach my $result2 ( @{ $AllResults{$AllResult} } ) { if ( $result2->seqName eq $seq_id ) { push @{$firstResult}, $result2; } } } } } $self->{_all_results} = $firstResult; } # end of "if keys %AllResults" push @{ $self->{_header} }, "# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =\n#\n"; if ( $self->{_as} ) { push @{ $self->{_header} }, "# <seq id> <alignment start> <alignment end> <envelope start> <envelope end> <hmm acc> <hmm name> <type> <hmm start> <hmm end> <hmm length> <bit score> <E-value> <significance> <clan> <predicted_active_site_residues>"; } else { push @{ $self->{_header} }, "# <seq id> <alignment start> <alignment end> <envelope start> <envelope end> <hmm acc> <hmm name> <type> <hmm start> <hmm end> <hmm length> <bit score> <E-value> <significance> <clan>"; } if ( $self->{_translate} ) { push @{ $self->{_header} }, " <strand> <nt start> <nt end>"; } push @{ $self->{_header} }, "\n"; } #------------------------------------------------------------------------------- =head2 write_results Writes the results of the C<hmmscan> search. Takes a single argument, which can be an open filehandle or a filename. A fatal error is generated if a file of the given name already exists. =cut sub write_results { my ( $self, $out, $e_seq, $e_dom, $b_seq, $b_dom ) = @_; my $fh; if ( ref $out eq 'GLOB' ) { # we were handed a filehandle $fh = $out; } elsif ( $out and not ref $out ) { # we were handed a filename croak qq(FATAL: output file "$out" already exists) if -f $out; open( FH, ">$out" ) or croak qq(FATAL: Can\'t write to your output file "$out": $!); $fh = \*FH; } else { # neither filehandle nor filename, default to STDOUT $fh = \*STDOUT; } if ( $self->{_header} ) { my $header = join '', @{ $self->{_header} }; print $fh "$header\n"; } foreach my $result ( @{ $self->{_all_results} } ) { $self->{_hmmresultIO} ->write_ascii_out( $result, $fh, $self, $e_seq, $e_dom, $b_seq, $b_dom ); } close $fh; } #------------------------------------------------------------------------------- =head2 results Returns the search results. =cut sub results { my ( $self, $e_value ) = @_; unless ( defined $self->{_all_results} ) { carp "WARNING: call search() before trying to retrieve results"; return; } my @search_results = (); foreach my $hmm_result ( @{ $self->{_all_results} } ) { push @search_results, @{ $hmm_result->results( $self, $e_value ) }; } return \@search_results; } #------------------------------------------------------------------------------- #- private methods ------------------------------------------------------------- #------------------------------------------------------------------------------- =head1 PRIVATE METHODS =head2 _process_args Handles the input arguments. =cut sub _process_args { my ( $self, @args ) = @_; # accept both a hash and a hash ref my $args = ( ref $args[0] eq 'HASH' ) ? shift @args : {@args}; # make sure we get a sequence if ( $args->{-fasta} and $args->{-sequence} ) { croak qq(FATAL: "-fasta" and "-sequence" are mutually exclusive); } elsif ( $args->{-fasta} ) { croak qq(FATAL: fasta file "$args->{-fasta}" doesn\'t exist) unless -s $args->{-fasta}; } elsif ( $args->{-sequence} ) { croak qq(FATAL: no sequence given) unless length( $args->{-sequence} ); } else { croak qq(FATAL: must specify either "-fasta" or "-sequence"); } # check the cut off if ( ( $args->{-e_seq} and ( $args->{-b_seq} || $args->{-b_dom} ) ) or ( $args->{-b_seq} and ( $args->{-e_seq} || $args->{-e_dom} ) ) or ( $args->{-b_dom} and $args->{-e_dom} ) ) { croak qq(FATAL: can\'t use e value and bit score threshold together); } $self->{_hmmscan_cutoff} = (); if ( $args->{-e_seq} ) { croak qq(FATAL: the E-value sequence cut-off "$args->{-e_seq}" must be a positive non-zero number) unless $args->{-e_seq} > 0; push @{ $self->{_hmmscan_cutoff} }, '-E', $args->{-e_seq}; } if ( $args->{-e_dom} ) { croak q(FATAL: if you supply "-e_dom" you must also supply "-e_seq") unless $args->{-e_seq}; croak qq(FATAL: the E-value domain cut-off "$args->{-e_dom}" must be positive non-zero number) unless $args->{-e_dom} > 0; push @{ $self->{_hmmscan_cutoff} }, '--domE', $args->{-e_dom}; } if ( $args->{-b_seq} ) { push @{ $self->{_hmmscan_cutoff} }, '-T', $args->{-b_seq}; } if ( $args->{-b_dom} ) { croak q(FATAL: if you supply "-b_dom" you must also supply "-b_seq") unless $args->{-b_seq}; push @{ $self->{_hmmscan_cutoff} }, '--domT', $args->{-b_dom}; } unless ( $self->{_hmmscan_cutoff} ) { push @{ $self->{_hmmscan_cutoff} }, '--cut_ga'; } # make sure we have a valid directory for the HMM data files croak qq(FATAL: directory "$args->{-dir}" does not exist) unless -d $args->{-dir}; # populate the object $self->{_cut_off} = $args->{-cut_off}; $self->{_dir} = $args->{-dir}; $self->{_clan_overlap} = $args->{-clan_overlap}; $self->{_fasta} = $args->{-fasta}; $self->{_align} = $args->{-align}; $self->{_as} = $args->{-as}; $self->{_sequence} = $args->{-sequence}; $self->{_cpu} = $args->{-cpu}; $self->{_translate} = $args->{-translate}; $self->{_hmmlib} = []; if ( $args->{-hmmlib} ) { if ( ref $args->{-hmmlib} eq 'ARRAY' ) { push @{ $self->{_hmmlib} }, @{ $args->{-hmmlib} }; } else { push @{ $self->{_hmmlib} }, $args->{-hmmlib}; } } else { push @{ $self->{_hmmlib} }, "Pfam-A.hmm"; } # Now check that the library exists in the data dir! foreach my $hmmlib ( @{ $self->{_hmmlib} } ) { croak qq(FATAL: can't find $hmmlib and/or $hmmlib binaries in "$args->{-dir}") unless ( -s $self->{_dir}, "/$hmmlib" and -s $self->{_dir} . "/$hmmlib.h3f" and -s $self->{_dir} . "/$hmmlib.h3i" and -s $self->{_dir} . "/$hmmlib.h3m" and -s $self->{_dir} . "/$hmmlib.h3p" and -s $self->{_dir} . "/$hmmlib.dat" ); # read the necessary data, if it's not been read already $self->_read_pfam_data; } $self->{_max_seqname} = 0; # if there's nothing in "_sequence" try to load a fasta file $self->_read_fasta unless $self->{_sequence}; # check again for a sequence. If we don't have one at this point, bail with # an error croak qq(FATAL: no sequence given) unless $self->{_sequence}; # read fasta file, store maximum sequence name and store sequences for active # sites prediction $self->_parse_sequence unless $self->{_max_seqname}; if ( $self->{_as} ) { $self->_parse_act_site_data unless $self->{_read_read_act_site_data}; } if ( $self->{_translate} ) { $self->_translate_fasta; } # see if a version number was specified $self->{_version} = $args->{version}; } #------------------------------------------------------------------------------- =head2 _build_header Adds version to the header object =cut sub _build_header { my ( $self, $version ) = @_; unshift @{ $self->{_header} }, '# query sequence file: ' . $self->{_fasta} . "\n"; unshift @{ $self->{_header} }, <<EOF_license; # Copyright (c) 2009 Genome Research Ltd\n# Freely distributed under the GNU # General Public License # # Authors: Jaina Mistry (jaina\@ebi.ac.uk), # Rob Finn (rdf\@ebi.ac.uk) # # This is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along with # this program. If not, see <http://www.gnu.org/licenses/>. # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = EOF_license my $v = ( defined $self->{_version} ) ? "version $version, " : ''; unshift @{ $self->{_header} }, "# pfam_scan.pl, $v run at " . scalar(localtime) . "\n#\n"; } #------------------------------------------------------------------------------- =head2 _read_fasta Reads a sequence from the fasta-format file that was specified in the parameters. =cut sub _read_fasta { my $self = shift; open( FASTA, $self->{_fasta} ) or croak qq(FATAL: Couldn't open fasta file "$self->{_fasta}" $!\n); my @rows = <FASTA>; close FASTA; $self->{_sequence_rows} = \@rows; $self->{_sequence} = join '', @rows; } #------------------------------------------------------------------------------- =head2 _resolve_clan_overlap Resolves overlaps between clans. =cut sub _resolve_clan_overlap { my $self = shift; my @no_clan_overlap = (); foreach my $result ( @{ $self->{_all_results} } ) { my $new = $result->remove_overlaps_by_clan( $self->{_clanmap}, $self->{_nested} ); push @no_clan_overlap, $new; } $self->{_all_results} = \@no_clan_overlap; } #------------------------------------------------------------------------------- =head2 _pred_act_sites Predicts active sites. Takes no arguments. Populates the "act_site" field on each results object. =cut sub _pred_act_sites { my $self = shift; # print STDERR "predicting active sites...\n"; my $hmm_file = $self->{_dir} . '/Pfam-A.hmm'; RESULT: foreach my $result ( @{ $self->{_all_results} } ) { # print STDERR "result: |" . $result->seqName . "|\n"; UNIT: foreach my $unit ( @{ $result->units } ) { # print STDERR "family: |" . $unit->name . "|\n"; next UNIT unless ( $self->{_act_site_data}->{ $unit->name }->{'alignment'} ); my $seq_region = substr( $self->{_seq_hash}->{ $result->seqName }, $unit->seqFrom - 1, $unit->seqTo - $unit->seqFrom + 1 ); my $seq_se = $unit->seqFrom . '-' . $unit->seqTo; # print STDERR "seq_id: |" . $result->seqName . "|\n"; # print STDERR "seq_se: |" . $seq_se . "|\n"; # print STDERR "seq_region: |" . $seq_region . "|\n"; # print STDERR "family: |" . $unit->name . "|\n"; # print STDERR "hmm_file: |" . $hmm_file . "|\n"; # print STDERR "dir: |" . $self->{_dir} . "|\n"; $unit->{act_site} = Bio::Pfam::Active_site::as_search::find_as( $self->{_act_site_data}->{ $unit->name }->{'alignment'}, $self->{_act_site_data}->{ $unit->name }->{'residues'}, $result->seqName, $seq_se, $seq_region, $unit->name, $hmm_file ); } } } #------------------------------------------------------------------------------- =head2 _read_pfam_data Reads the Pfam data file ("Pfam-A.scan.dat") and populates the C<accmap>, C<nested> and C<clanmap> hashes on the object. =cut sub _read_pfam_data { my $self = shift; #print STDERR "reading " . $self->{_hmmlib} . ".dat\n" if($ENV{DEBUG}); $self->{_accmap} = {}; $self->{_nested} = {}; $self->{_clanmap} = {}; $self->{_desc} = {}; $self->{_seqGA} = {}; $self->{_domGA} = {}; $self->{_type} = {}; $self->{_model_len} = {}; foreach my $hmmlib ( @{ $self->{_hmmlib} } ) { my $scandat = $self->{_dir} . '/' . $hmmlib . '.dat'; open( SCANDAT, $scandat ) or croak qq(FATAL: Couldn't open "$scandat" data file: $!); my $id; while (<SCANDAT>) { if (m/^\#=GF ID\s+(\S+)/) { $id = $1; } elsif (m/^\#=GF\s+AC\s+(\S+)/) { $self->{_accmap}->{$id} = $1; } elsif (m/^\#=GF\s+DE\s+(.+)/) { $self->{_desc}->{$id} = $1; } elsif (m/^\#=GF\s+GA\s+(\S+)\;\s+(\S+)\;/) { $self->{_seqGA}->{$id} = $1; $self->{_domGA}->{$id} = $2; } elsif (m/^\#=GF\s+TP\s+(\S+)/) { $self->{_type}->{$id} = $1; } elsif (m/^\#=GF\s+ML\s+(\d+)/) { $self->{_model_len}->{$id} = $1; } elsif (/^\#=GF\s+NE\s+(\S+)/) { $self->{_nested}->{$id}->{$1} = 1; $self->{_nested}->{$1}->{$id} = 1; } elsif (/^\#=GF\s+CL\s+(\S+)/) { $self->{_clanmap}->{$id} = $1; } } close SCANDAT; # set a flag to show that we've read the data files already $self->{ '_read_' . $hmmlib } = 1; } } #------------------------------------------------------------------------------- =head2 _read_act_site_data Reads the Pfam active site data file ("active_site.dat") and populates the C<act_site_data> hashes on the object. =cut sub _parse_act_site_data { my $self = shift; my $as_dat = $self->{_dir} . '/active_site.dat'; $self->{_act_site_data} = {}; open( AS, $as_dat ) or croak qq(FATAL: Couldn\'t open "$as_dat" data file: $!); my ( $fam_id, $aln ); while (<AS>) { if (/^ID\s+(\S+)/) { $fam_id = $1; $aln = new Bio::SimpleAlign; } elsif (/^AL\s+(\S+)\/(\d+)\-(\d+)\s+(\S+)/) { my ( $seq_id, $st, $en, $seq ) = ( $1, $2, $3, $4 ); $aln->add_seq( Bio::Pfam::Scan::Seq->new( '-seq' => $seq, '-id' => $seq_id, '-start' => $st, '-end' => $en, '-type' => 'aligned' ) ); } elsif (/^RE\s+(\S+)\s+(\d+)/) { my ( $seq_id, $res ) = ( $1, $2 ); push( @{ $self->{_act_site_data}->{$fam_id}->{'residues'}->{$seq_id} }, $res ); } elsif (/^\/\//) { $self->{_act_site_data}->{$fam_id}->{'alignment'} = $aln; $fam_id = ""; $aln = ""; } else { warn "Ignoring line:\n[$_]"; } } close AS; $self->{_read_read_act_site_data} = 1; } #------------------------------------------------------------------------------- =head2 _parse_sequence This method is used to parse the sequence and hash it on sequence identifier. It also stores the length of the longest sequence id =cut sub _parse_sequence { my $self = shift; my $seq_hash = {}; my $seq_id; foreach ( @{ $self->{_sequence_rows} } ) { next if m/^\s*$/; #Ignore blank lines if (m/^>(\S+)/) { $seq_id = $1; if ( exists( $seq_hash->{$seq_id} ) ) { croak "FATAL: Sequence identifiers must be unique. Your fasta file contains two sequences with the same id ($seq_id)"; } #Store the max length of seq name, use this later when printing in ascii $self->{_max_seqname} = length($seq_id) if ( !$self->{_max_seqname} or length($seq_id) > $self->{_max_seqname} ); } else { croak "FATAL: Unrecognised format of fasta file. Each sequence must have a header line in the format '>identifier <optional description>'" unless defined $seq_id; chomp; $seq_hash->{$seq_id} .= $_; } } $self->{_seq_hash} = $seq_hash; } #------------------------------------------------------------------------------- =head2 _translate_fasta Uses the HMMER v2.3.2 progam "translate" to perform a six-frame translation of the input sequence. Checks the parameter "-translate". Accepted arguments are "all" and "orf", where "all" means (from the "translate" help text) "translate in full, with stops; no individual ORFs" and "orf" means "report only ORFs greater than minlen" where minlen is set to the default of 20. =cut sub _translate_fasta { my ($self) = @_; my $translatedFasta = $self->{_fasta} . ".translated"; my @params = ( 'translate', '-q', ); if ( $self->{_translate} eq 'all' ) { push( @params, '-a' ); } elsif ( $self->{_translate} eq 'orf' ) { push( @params, '-l', '20' ); } else { croak qq(Unexpected parameter '$self->{_translate}'); } push( @params, '-o', $translatedFasta, $self->{_fasta} ); print STDERR "PfamScan::translate_fasta: translate command: |@params|\n" if $ENV{DEBUG}; my $run = start \@params, '<pipe', \*IN, '>pipe', \*OUT, '2>pipe', \*ERR or croak qq(FATAL: error running translate; IPC::Run returned '$?'); close IN; close OUT; my $err; while (<ERR>) { $err .= $_; } close ERR; finish $run or croak qq|FATAL: error running translate ($err); ipc returned '$?'|; open( F, "<", $translatedFasta ) or croak qw(Could not open $translatedFasta '$!'); if ( $self->{_translate} eq 'orf' ) { while (<F>) { if (/^>\s?(\S+).*nt (\d+)\.+(\d+)/) { $self->{_orf}->{$1}->{start} = $2; $self->{_orf}->{$1}->{end} = $3; $self->{_orf}->{$1}->{strand} = ( $2 < $3 ) ? '+' : '-'; } } } else { my $currentSeq; my $currentFrame; my $currentLen = 0; my $maxEnd = 0; while (<F>) { chomp; if (/^>\s?(\S+\:)(\d+)/) { if ( $currentLen > 0 ) { my $seqName = $currentSeq . $currentFrame; if ( $currentFrame < 3 ) { my $start = 1 + $currentFrame; my $end = $start + $currentLen - 1; $self->{_orf}->{$seqName}->{strand} = '+'; $self->{_orf}->{$seqName}->{start} = $start; $self->{_orf}->{$seqName}->{end} = $end; $maxEnd = $end if ( $end > $maxEnd ); } else { my $start = $maxEnd - ( $currentFrame - 3 ); my $end = $start - $currentLen + 1; $self->{_orf}->{$seqName}->{strand} = '-'; $self->{_orf}->{$seqName}->{start} = $start; $self->{_orf}->{$seqName}->{end} = $end; } } $currentLen = 0; $currentSeq = $1; $currentFrame = $2; } else { $currentLen += length($_) * 3; } } my $seqName = $currentSeq . $currentFrame; if ( $currentFrame < 3 ) { my $start = 1 + $currentFrame; my $end = $start + $currentLen - 1; $self->{_orf}->{$seqName}->{strand} = '+'; $self->{_orf}->{$seqName}->{start} = $start; $self->{_orf}->{$seqName}->{end} = $end; $maxEnd = $end if ( $end > $maxEnd ); } else { my $start = $maxEnd - ( $currentFrame - 3 ); my $end = $start - $currentLen + 1; $self->{_orf}->{$seqName}->{strand} = '-'; $self->{_orf}->{$seqName}->{start} = $start; $self->{_orf}->{$seqName}->{end} = $end; } } $self->{_fasta} = $translatedFasta; } #------------------------------------------------------------------------------- =head1 COPYRIGHT Copyright (c) 2009: Genome Research Ltd. Authors: Jaina Mistry (jm14@sanger.ac.uk), John Tate (jt6@sanger.ac.uk), Rob Finn (finnr@janelia.hhmi.org) This is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. or see the on-line version at http://www.gnu.org/copyleft/gpl.txt =cut 1;