annotate pfamScan/Bio/Pfam/HMM/HMMResultsIO.pm @ 0:68a3648c7d91 draft default tip

Uploaded
author matteoc
date Thu, 22 Dec 2016 04:45:31 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1 # HMMResultsIO.pm
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
2 #
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
3 # Author: rdf
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
4 # Maintainer: $Id: HMMResultsIO.pm,v 1.2 2009-12-01 15:42:20 jt6 Exp $
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
5 # Version: $Revision: 1.2 $
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
6 # Created: Nov 16, 2008
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
7 # Last Modified: $Date: 2009-12-01 15:42:20 $
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
8
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
9 =head1 NAME
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
10
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
11 Template - a short description of the class
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
12
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
13 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
14
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
15 package Bio::Pfam::HMM::HMMResultsIO;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
16
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
17 =head1 DESCRIPTION
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
18
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
19 A more detailed description of what this class does and how it does it.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
20
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
21 $Id: HMMResultsIO.pm,v 1.2 2009-12-01 15:42:20 jt6 Exp $
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
22
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
23 =head1 COPYRIGHT
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
24
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
25 File: HMMResultsIO.pm
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
26
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
27 Copyright (c) 2007: Genome Research Ltd.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
28
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
29 Authors: Rob Finn (rdf@sanger.ac.uk), John Tate (jt6@sanger.ac.uk)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
30
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
31 This is free software; you can redistribute it and/or
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
32 modify it under the terms of the GNU General Public License
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
33 as published by the Free Software Foundation; either version 2
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
34 of the License, or (at your option) any later version.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
35
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
36 This program is distributed in the hope that it will be useful,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
37 but WITHOUT ANY WARRANTY; without even the implied warranty of
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
38 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
39 GNU General Public License for more details.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
40
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
41 You should have received a copy of the GNU General Public License
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
42 along with this program; if not, write to the Free Software
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
43 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
44 or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
45
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
46 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
47
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
48 use strict;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
49 use warnings;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
50 use Moose;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
51 use Carp;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
52
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
53 #All the things we need to objectfy the search results
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
54 use Bio::Pfam::HMM::HMMResults;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
55 use Bio::Pfam::HMM::HMMSequence;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
56 use Bio::Pfam::HMM::HMMUnit;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
57
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
58 #-------------------------------------------------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
59
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
60 =head1 ATTRIBUTES
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
61
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
62
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
63
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
64 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
65
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
66 has 'align' => (
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
67 isa => 'Int',
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
68 is => 'rw',
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
69 default => 0
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
70 );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
71
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
72 has 'outfile' => (
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
73 isa => 'Str',
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
74 is => 'rw',
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
75 default => 'OUTPUT'
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
76 );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
77
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
78 has 'pfamout' => (
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
79 isa => 'Str',
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
80 is => 'rw',
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
81 default => 'PFAMOUT'
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
82 );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
83
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
84 has 'scores' => (
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
85 isa => 'Str',
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
86 is => 'rw',
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
87 default => 'scores'
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
88 );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
89
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
90 #-------------------------------------------------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
91
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
92 =head1 METHODS
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
93
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
94 =head2 parseHMMER3
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
95
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
96 Title : parseHMMER
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
97 Usage : $hmmResIO->parseHMMSearch( filename )
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
98 Function : Parse the output from a HMMER3 search results
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
99 Args : Filename containing the search
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
100 Returns : A Bio::Pfam::HMM::HMMResults object
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
101
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
102 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
103
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
104 sub parseHMMER3 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
105 my ( $self, $filename ) = @_;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
106 my $fh;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
107
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
108 if(ref($filename) eq 'GLOB'){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
109 $fh = $filename;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
110 }else{
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
111 open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
112 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
113
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
114 # open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
115 my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
116 $self->_readHeader( $fh, $hmmRes );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
117 $self->_readSeqHits( $fh, $hmmRes );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
118 $self->_readUnitHits( $fh, $hmmRes );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
119 $self->_readFooter($fh, $hmmRes);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
120 return ($hmmRes);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
121 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
122
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
123
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
124
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
125 sub parseMultiHMMER3 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
126 my ( $self, $filename ) = @_;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
127 my $fh;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
128
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
129 if(ref($filename) eq 'GLOB'){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
130 $fh = $filename;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
131 }elsif( ref($filename) and $filename->isa('IO::File') ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
132 $fh = $filename;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
133 }else{
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
134 open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
135 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
136
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
137 my @hmmResAll;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
138 my $program;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
139 while(!eof($fh)){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
140 my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
141 my $eof = $self->_readHeader( $fh, $hmmRes );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
142 last if($eof);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
143 push(@hmmResAll, $hmmRes);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
144 if($hmmRes->program) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
145 $program = $hmmRes->program;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
146 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
147 else {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
148 $hmmRes->program($program);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
149 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
150 $self->_readSeqHits( $fh, $hmmRes );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
151 $self->_readUnitHits( $fh, $hmmRes );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
152 $self->_readFooter($fh, $hmmRes);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
153 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
154 return (\@hmmResAll);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
155 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
156
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
157 sub parseSplitHMMER3 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
158 my($self, $files ) = @_;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
159
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
160 my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
161
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
162 foreach my $filename (@{$files}){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
163 my ($fh);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
164 open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
165 $self->_readHeader( $fh, $hmmRes );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
166 $self->_readSeqHits( $fh, $hmmRes );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
167 $self->_readUnitHits( $fh, $hmmRes );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
168 $self->_readFooter($fh, $hmmRes);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
169 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
170
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
171 return ( $hmmRes );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
172
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
173 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
174
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
175
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
176 #-------------------------------------------------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
177
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
178 =head2 convertHMMSearch
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
179
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
180 Title : convertHMMSearch
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
181 Usage : $hmmResIO->convertHMMSearch('SEARCHFILE')
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
182 Function : This wraps up a couple of methods to convert the more complex hmmsearch
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
183 : results in to nice clean format that we Pfam-ers are used to.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
184 Args : The filename of the hmmsearch output file
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
185 Returns : Nothing
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
186
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
187 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
188
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
189 sub convertHMMSearch {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
190 my ( $self, $filename ) = @_;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
191
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
192 unless ($filename) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
193 confess "No filename passed in to convertHMMSearch\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
194 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
195 unless ( -s $filename ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
196 confess "$filename does not exists\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
197 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
198
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
199 #Now parse in the raw HMM output and write out the results as a PFAMOUT
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
200 my $hmmRes = $self->parseHMMER3($filename);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
201 $self->writePFAMOUT($hmmRes);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
202 return $hmmRes;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
203 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
204
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
205 #-------------------------------------------------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
206
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
207 =head2 writePFAMOUT
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
208
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
209 Title : writePFAMOUT
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
210 Usage : $hmmResIO->writePFAMOUT( $hmmRes )
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
211 Function : Writes a Bio::Pfam::HMM:HMMResults object in to a PFAMOUT file.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
212 Args : A Bio::Pfam::HMM:HMMResults
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
213 Returns : Nothing
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
214
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
215 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
216
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
217 sub writePFAMOUT {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
218 my ( $self, $hmmRes ) = @_;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
219
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
220 unless ($hmmRes) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
221 confess "A Bio::Pfam::HMM::HMMResults object was not parsed in\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
222 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
223 unless ( $hmmRes->isa("Bio::Pfam::HMM::HMMResults") ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
224 confess("Variable passed in is not a Bio::Pfam::HMM::Results object");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
225 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
226
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
227 my $fh;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
228 open( $fh, ">" . $self->pfamout )
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
229 or confess "Could not open " . $self->pfamout . ":[$!]\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
230
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
231 print $fh <<HEAD;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
232 # ===========
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
233 # Pfam output
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
234 # ===========
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
235 #
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
236 # Sequence scores
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
237 # ---------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
238 #
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
239 # name description bits evalue n exp bias
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
240
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
241 HEAD
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
242
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
243 foreach
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
244 my $seq ( sort { $b->bits <=> $a->bits } ( @{ $hmmRes->eachHMMSeq } ) )
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
245 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
246 $_ = $seq->desc;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
247 my ($desc) = /^(.{1,42})/;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
248 $desc = uc($desc);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
249 printf $fh (
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
250 "%-15s %-42s %8.1f %9s %3d %5.1f %5.1f\n",
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
251 $seq->name,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
252 $desc,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
253 $seq->bits,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
254 $seq->evalue,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
255 scalar( @{ $seq->hmmUnits } ),
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
256 defined( $seq->exp ) ? $seq->exp : "-",
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
257 defined( $seq->bias ) ? $seq->bias : "-"
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
258 );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
259 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
260
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
261 print $fh <<HEAD;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
262 #
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
263 # Domain scores
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
264 # -------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
265 #
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
266 # name env-st env-en ali-st ali-en hmm-st hmm-en bits evalue hit bias
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
267 #
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
268
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
269 HEAD
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
270
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
271 foreach my $dom ( sort { $b->bits <=> $a->bits } @{ $hmmRes->units } ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
272
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
273
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
274 printf $fh (
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
275 "%-15s %6d %6d %6d %6d %6s %6s %6.1f %9s %6d %6.1f\n",
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
276 $dom->name,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
277 $dom->envFrom,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
278 $dom->envTo,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
279 $dom->seqFrom,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
280 $dom->seqTo,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
281 $dom->hmmFrom,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
282 $dom->hmmTo,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
283 $dom->bits,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
284 $dom->evalue,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
285 $dom->domain,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
286 defined( $dom->bias ) ? $dom->bias : "-",
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
287
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
288 );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
289 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
290 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
291
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
292 #-------------------------------------------------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
293
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
294 =head2 parsePFAMOUT
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
295
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
296 Title : parsePFAMOUT
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
297 Usage : $self->parsePFAMOUT($filename)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
298 Function : Reads in a PFAMOUT file. This file contains the minimal amount of information
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
299 : require to constrcut a pfam ALIGN file.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
300 Args : A filename. Normally this is filename
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
301 Returns : A Bio::Pfam::HMM::HMMResults object
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
302
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
303 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
304
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
305 sub parsePFAMOUT {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
306 my $self = shift;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
307 my $filename = shift;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
308
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
309 unless ($filename) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
310 confess('No filename or filehandle passed to parsePFAMOUT');
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
311 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
312
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
313 my $fh;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
314 if ( ref($filename) eq 'GLOB' ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
315 $fh = $filename;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
316 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
317 else {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
318 open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
319 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
320 my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
321
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
322 while (<$fh>) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
323 /^# Domain scores/ && last;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
324
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
325 #if (/^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\d+)\s*$/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
326 if (/^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\d+)\s+\S+\s+(\S+)\s*$/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
327
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
328 $hmmRes->addHMMSeq(
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
329 Bio::Pfam::HMM::HMMSequence->new(
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
330 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
331 name => $1,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
332 desc => $2,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
333 bits => $3,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
334 evalue => $4,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
335 numberHits => $5,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
336 bias => $6
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
337 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
338 )
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
339 );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
340 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
341 elsif (/^#|^\s+$/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
342 next;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
343 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
344 else {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
345 warn "Did not parse|$_|\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
346 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
347 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
348 while (<$fh>) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
349
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
350 #if (/^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s*$/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
351 if (
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
352 /^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+\S+\s+(\S+)/)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
353 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
354 $hmmRes->addHMMUnit(
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
355 Bio::Pfam::HMM::HMMUnit->new(
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
356 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
357 name => $1,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
358 envFrom => $2,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
359 envTo => $3,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
360 seqFrom => $4,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
361 seqTo => $5,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
362 hmmFrom => $6,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
363 hmmTo => $7,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
364 bits => $8,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
365 evalue => $9,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
366 bias => $10
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
367 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
368 )
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
369 );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
370 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
371 elsif (/^#|^\s+$/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
372 next;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
373 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
374 elsif (/^$/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
375 next;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
376 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
377 else {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
378 warn "Did not parse: |$_|";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
379 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
380 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
381 close($fh);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
382 return ($hmmRes);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
383 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
384
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
385 #-------------------------------------------------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
386
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
387 =head2 _readHeader
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
388
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
389 Title : _readHeader
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
390 Usage : Private method. $self->_readHeader(\*FH, $hmmResults)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
391 Function : Reads the header section from a HMMER3 hmmsearch
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
392 Args : The file handle to hmmsearch output, a Bio::Pfam::HMM::HMMResults object
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
393 Returns : Nothing
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
394
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
395 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
396
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
397 #Parse the header part of the output first;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
398 sub _readHeader {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
399 my ( $self, $hs, $hmmRes ) = @_;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
400
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
401 #Check the $hs is defined and a GLOB
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
402
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
403 while (<$hs>) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
404 if (/^Scores for complete/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
405 last;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
406 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
407 elsif (/^# query HMM file:\s+(\S+)/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
408 $hmmRes->hmmName($1);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
409 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
410 elsif (/^# target sequence database:\s+(\S+)/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
411 $hmmRes->seqDB($1);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
412 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
413 elsif (/^output directed to file:\s+(\S+)/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
414 $hmmRes->thisFile($1);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
415 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
416 elsif (/^Query:\s+(\S+)\s+\[M\=(\d+)\]/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
417 $hmmRes->seedName($1);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
418 $hmmRes->hmmLength($2);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
419 }elsif(/^Query:\s+(\S+)\s+\[L\=(\d+)\]/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
420 $hmmRes->seqName($1);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
421 $hmmRes->seqLength($2);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
422 }elsif (/^sequence E-value threshold: <= (\d+)/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
423 $hmmRes->evalueThr($1);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
424 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
425 elsif (/^# Random generator seed: (\d+)/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
426 $hmmRes->randSeedNum($1);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
427 }elsif(/^Description:\s+(.*)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
428 $hmmRes->description($1);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
429 }elsif(/^# (phmmer|hmmsearch|hmmscan|jackhmmer)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
430 $hmmRes->program($1);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
431 }elsif (/(^#)|(^$)/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
432 next;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
433 }elsif(/^Accession/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
434 next;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
435 } elsif(/^\[ok\]/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
436 return(1);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
437 } else {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
438 die "Failed to parse hmmsearch results |$_| in header section\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
439 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
440 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
441 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
442
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
443 #-------------------------------------------------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
444
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
445 =head2 _readSeqHits
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
446
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
447 Title : _readSeqHits
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
448 Usage : Private method. $self->_readSeqHits(\*FH, $hmmResults)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
449 Function : Reads the sequence hits from a HMMER3 hmmsearch
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
450 Args : The file handle to hmmsearch output, a Bio::Pfam::HMM::HMMResults object
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
451 Returns : Nothing
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
452
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
453 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
454
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
455 sub _readSeqHits {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
456 my ( $self, $hs, $hmmRes ) = @_;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
457 while (<$hs>) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
458
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
459 #Match a line like this
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
460 # E-value score bias E-value score bias exp N Sequence Description
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
461 # ------- ------ ----- ------- ------ ----- ---- -- -------- -----------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
462 # 4e-83 285.8 10.0 5.3e-83 285.5 7.0 1.1 1 Q14SN3.1 Q14SN3_9HEPC Polyprotein (Fragment).
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
463 if (/^Domain annotation for each [sequence|model]/) { # This is the format for HMMER3b3
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
464 last;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
465 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
466 elsif (/^Domain and alignment annotation for each [sequence|model]/) { #This is the format for HMMER3b2 - can be removed later
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
467 last;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
468 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
469 elsif (/^\s+(E-value|---)/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
470 next;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
471 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
472 elsif (/^$/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
473 next;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
474 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
475 else {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
476 next if(/No hits detected that satisfy reporting thresholds/);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
477
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
478 #Assume that we have a sequence match
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
479 my @sMatch = split( /\s+/, $_ );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
480 unless ( scalar(@sMatch) >= 10 ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
481 die "Expected at least 10 pieces of data: $_;\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
482 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
483 my $desc;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
484 if ( scalar(@sMatch) >= 11 ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
485 $desc = join( " ", @sMatch[ 10 .. $#sMatch ] );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
486 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
487
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
488 $hmmRes->addHMMSeq(
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
489 Bio::Pfam::HMM::HMMSequence->new(
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
490 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
491 evalue => $sMatch[1],
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
492 bits => $sMatch[2],
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
493 bias => $sMatch[3],
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
494 exp => $sMatch[7],
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
495 numberHits => $sMatch[8],
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
496 name => $sMatch[9],
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
497 desc => defined($desc) ? $desc : "-",
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
498 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
499 )
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
500 );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
501
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
502 next;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
503 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
504 die "Failed to parse $_ in sequence section\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
505 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
506
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
507 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
508
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
509 #------------------------------------------------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
510
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
511 =head2 _readUnitHits
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
512
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
513 Title : _readUnitHits
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
514 Usage : Private method. $self->_readUnitHits(\*FH, $hmmResults)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
515 Function : Reads the unit (domain) hits from a HMMER3 hmmsearch
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
516 Args : The file handle to hmmsearch output, a Bio::Pfam::HMM::HMMResults object
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
517 Returns : Nothing
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
518
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
519 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
520
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
521 no warnings 'recursion';
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
522
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
523 sub _readUnitHits {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
524 my ( $self, $hs, $hmmRes ) = @_;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
525
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
526 if($hmmRes->eof){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
527 return;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
528 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
529
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
530 #Parse the domain hits section
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
531 #>> P37935.1 MAAY4_SCHCO Mating-type protein A-alpha Y4.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
532 # # bit score bias E-value ind Evalue hmm from hmm to ali from ali to env from env to ali-acc
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
533 # --- --------- ------- ---------- ---------- -------- -------- -------- -------- -------- -------- -------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
534 # 1 244.0 0.5 9.5e-76 1.7e-70 1 146 [. 1 145 [. 1 146 [. 0.99
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
535 #
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
536 # Alignments for each domain:
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
537 # == domain 1 score: 244.0 bits; conditional E-value: 9.5e-76
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
538 # SEED 1 medrlallkaisasakdlvalaasrGaksipspvkttavkfdplptPdldalrtrlkeaklPakaiksalsayekaCarWrsdleeafdktaksvsPanlhllealrirlyteqvekWlvqvlevaerWkaemekqrahiaatmgp 146
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
539 # m+++la+l++isa+akd++ala+srGa+++ +p++tt+++fd+l++P+ld++rtrl+ea+lP+kaik++lsaye+aCarW++dleeafd+ta+s+sP+n+++l++lr+rly+eqv+kWl++vl+v+erWkaemekqrahi+atmgp
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
540 # P37935.1 1 MAELLACLQSISAHAKDMMALARSRGATGS-RPTPTTLPHFDELLPPNLDFVRTRLQEARLPPKAIKGTLSAYESACARWKHDLEEAFDRTAHSISPHNFQRLAQLRTRLYVEQVQKWLYEVLQVPERWKAEMEKQRAHINATMGP 145
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
541 # 899***************************.******************************************************************************************************************8 PP
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
542
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
543 while (<$hs>) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
544 if (/^Internal/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
545 last;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
546 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
547 elsif (/\>\>\s+(\S+)/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
548 my $seqId = $1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
549 $self->_readUnitData( $seqId, $hs, $hmmRes );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
550 if($hmmRes->eof){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
551 return;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
552 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
553 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
554 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
555 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
556
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
557 sub _readUnitData {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
558 my ( $self, $id, $hs, $hmmRes ) = @_;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
559
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
560 if($hmmRes->eof){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
561 return;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
562 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
563 my $hmmName = $hmmRes->seedName();
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
564
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
565 my $seqName = $hmmRes->seqName;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
566
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
567 # bit score bias E-value ind Evalue hmm from hmm to ali from ali to env from env to ali-acc
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
568 # --- --------- ------- ---------- ---------- -------- -------- -------- -------- -------- -------- -------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
569 # 1 244.0 0.5 9.5e-76 1.7e-70 1 146 [. 1 145 [. 1 146 [. 0.99
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
570 #
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
571 # Alignments for each domain:
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
572
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
573 my @units;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
574 my $align = 1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
575 my $recurse = 0;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
576 my $eof = 0;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
577 my ($nextSeqId);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
578 while (<$hs>) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
579 if (/^[(\/\/|Internal)]/ ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
580 $align = 0;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
581 $recurse = 0;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
582 $eof = 1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
583 last;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
584 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
585 elsif (/^\>\>\s+(\S+)/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
586 $nextSeqId = $1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
587 $align = 0;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
588 $recurse = 1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
589 last;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
590 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
591 elsif (/^\s+Alignments for each domain:/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
592 $align = 1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
593 $recurse = 0;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
594 last;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
595 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
596 elsif (/^\s+(#\s+score|---)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
597
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
598 #Two human readable lines
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
599 next;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
600 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
601 elsif (/^$/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
602
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
603 #blank line
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
604 next;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
605 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
606 elsif (/^\s+\d+\s+/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
607 my @dMatch = split( /\s+/, $_ );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
608 unless ( scalar(@dMatch) == 17 ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
609 die "Expected 16 elements of data: $_\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
610 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
611
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
612 push(
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
613 @units,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
614 Bio::Pfam::HMM::HMMUnit->new(
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
615 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
616 name => $id,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
617 domain => $dMatch[1],
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
618 bits => $dMatch[3],
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
619 bias => $dMatch[4],
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
620 domEvalue => $dMatch[5],
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
621 evalue => $dMatch[6],
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
622 hmmFrom => $dMatch[7],
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
623 hmmTo => $dMatch[8],
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
624 seqFrom => $dMatch[10],
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
625 seqTo => $dMatch[11],
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
626 envFrom => $dMatch[13],
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
627 envTo => $dMatch[14],
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
628 aliAcc => $dMatch[16]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
629 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
630 )
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
631 );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
632
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
633 next;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
634 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
635 elsif(/^\s+\[No individual domains/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
636 $align=0;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
637 next;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
638 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
639 else {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
640 confess("Did not parse line: $_");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
641 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
642 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
643
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
644 # == domain 1 score: 244.0 bits; conditional E-value: 9.5e-76
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
645 # SEED 1 medrlallkaisasakdlvalaasrGaksipspvkttavkfdplptPdldalrtrlkeaklPakaiksalsayekaCarWrsdleeafdktaksvsPanlhllealrirlyteqvekWlvqvlevaerWkaemekqrahiaatmgp 146
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
646 # m+++la+l++isa+akd++ala+srGa+++ +p++tt+++fd+l++P+ld++rtrl+ea+lP+kaik++lsaye+aCarW++dleeafd+ta+s+sP+n+++l++lr+rly+eqv+kWl++vl+v+erWkaemekqrahi+atmgp
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
647 # P37935.1 1 MAELLACLQSISAHAKDMMALARSRGATGS-RPTPTTLPHFDELLPPNLDFVRTRLQEARLPPKAIKGTLSAYESACARWKHDLEEAFDRTAHSISPHNFQRLAQLRTRLYVEQVQKWLYEVLQVPERWKAEMEKQRAHINATMGP 145
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
648 # 899***************************.******************************************************************************************************************8 PP
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
649 #
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
650 # OR....
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
651 #
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
652 # == domain 1 score: 27.6 bits; conditional E-value: 7.4e-10
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
653 # PF00018 17 LsfkkGdvitvleksee.eWwkaelkdg.keGlvPsnYvep 55
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
654 # L++++Gd+++++++++e++Ww++++++++++G++P+n+v+p
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
655 # P15498.4 617 LRLNPGDIVELTKAEAEqNWWEGRNTSTnEIGWFPCNRVKP 657
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
656 # 7899**********9999*******************9987 PP
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
657
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
658
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
659 if ($align) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
660 my ($pattern1, $pattern2);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
661
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
662 if($hmmName and $hmmRes->program eq 'hmmsearch'){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
663 $pattern1 = qr/^\s+$hmmName\s+\d+\s+(\S+)\s+\d+/;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
664 $id =~ s/(\W)/\\$1/g; # escape any non-word character
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
665 # $id =~ s/\|/\\|/g; #Escape '|', '[' and ']' characters
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
666 # $id =~ s/\[/\\[/g;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
667 # $id =~ s/\]/\\]/g;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
668 $pattern2 = qr/^\s+$id\s+\d+\s+(\S+)\s+\d+/;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
669 }elsif($seqName and $hmmRes->program eq 'hmmscan'){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
670 my $tmpSeqName = $seqName;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
671 $tmpSeqName =~ s/(\W)/\\$1/g; # escape any non-word character
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
672 # $tmpSeqName =~ s/\|/\\|/g; #Escape '|', '[' and ']' characters
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
673 # $tmpSeqName =~ s/\[/\\[/g;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
674 # $tmpSeqName =~ s/\]/\\]/g;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
675 $pattern1 = qr/^\s+$id\s+\d+\s+(\S+)\s+\d+/;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
676 $pattern2 = qr/^\s+$tmpSeqName\s+\d+\s+(\S+)\s+\d+/;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
677 }elsif($seqName and ($hmmRes->program eq 'phmmer' or $hmmRes->program eq 'jackhmmer') ){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
678 $seqName =~ s/(\W)/\\$1/g; # escape any non-word character
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
679 # $seqName =~ s/\|/\|/g; #Escape '|', '[' and ']' characters
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
680 # $seqName =~ s/\[/\\[/g;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
681 # $seqName =~ s/\]/\\]/g;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
682 $pattern1 = qr/^\s+$seqName\s+\d+\s+(\S+)\s+\d+/;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
683 $pattern2 = qr/^\s+$id\s+\d+\s+(\S+)\s+\d+/;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
684 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
685
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
686
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
687 $recurse = 0;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
688 my $matchNo;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
689 my $hmmlen = 0;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
690 while (<$hs>) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
691 if (/$pattern1/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
692 $units[ $matchNo - 1 ]->hmmalign->{hmm} .= $1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
693 $hmmlen = length($1);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
694 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
695 elsif (/$pattern2/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
696 $units[ $matchNo - 1 ]->hmmalign->{seq} .= $1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
697 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
698 elsif (/^\s+([x\.]+)\s+RF$/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
699 my $rf = $1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
700 $units[ $matchNo - 1 ]->hmmalign->{rf} .= $rf;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
701 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
702 elsif (/^\s+([0-9\*\.]+)\s+PP$/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
703 my $pp = $1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
704 $units[ $matchNo - 1 ]->hmmalign->{pp} .= $pp;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
705 }elsif (/^\s+(\S+)\s+CS$/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
706 my $cs = $1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
707 $units[ $matchNo - 1 ]->hmmalign->{cs} .= $cs;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
708 }elsif (/^\s+==\s+domain\s+(\d+)/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
709 $matchNo = $1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
710 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
711 elsif (/^\s+(.*)\s+$/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
712 # $1 is *not* the match - this fails if there are prepended
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
713 # or appended spaces
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
714 # $units[ $matchNo - 1 ]->hmmalign->{match} .= $1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
715 # Let's get a right substring based on the HMM length
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
716 chomp;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
717 my $m1 = substr($_,-$hmmlen);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
718 $units[ $matchNo - 1 ]->hmmalign->{match} .= $m1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
719 }elsif (/^$/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
720 next;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
721 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
722 elsif (/^[(\/\/|Internal)]/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
723 $align = 0;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
724 $recurse = 0;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
725 $eof = 1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
726 last;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
727 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
728 elsif (/^\>\>\s+(\S+)/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
729 $nextSeqId = $1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
730 $recurse = 1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
731 last;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
732 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
733
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
734 else {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
735 confess("Did not parse |$_| in units");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
736 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
737 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
738 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
739
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
740 foreach my $u (@units) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
741 $hmmRes->addHMMUnit($u);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
742 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
743
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
744 $hmmRes->eof($eof);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
745
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
746 if ($recurse and $nextSeqId) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
747 $self->_readUnitData( $nextSeqId, $hs, $hmmRes );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
748 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
749 return;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
750 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
751 use warnings 'recursion';
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
752
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
753 #-------------------------------------------------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
754
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
755 =head2 parseHMMER2
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
756
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
757 Title : parseHMMER2
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
758 Usage : $self->parseHMMER2(\*FH )
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
759 Function : This is a minimal parser for reading in the output of HMMER2 hmmsearch
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
760 Args : The file handle to hmmsearch output
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
761 Returns : A Bio::Pfam::HMM::HMMResults object
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
762
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
763 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
764
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
765 sub parseHMMER2 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
766 my $self = shift;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
767 my $file = shift;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
768
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
769 my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
770
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
771 my %seqh;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
772 my $count = 0;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
773
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
774 while (<$file>) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
775 /^Scores for complete sequences/ && last;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
776 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
777
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
778 while (<$file>) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
779 /^Parsed for domains/ && last;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
780 if ( my ( $id, $de, $sc, $ev, $hits ) =
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
781 /^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\d+)\s*$/ )
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
782 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
783 $hmmRes->addHMMSeq(
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
784 Bio::Pfam::HMM::HMMSequence->new(
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
785 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
786 bits => $sc,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
787 evalue => $ev,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
788 name => $id,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
789 desc => $de,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
790 numberHits => $hits
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
791 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
792 )
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
793 );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
794
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
795 $seqh{$id} = $sc;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
796 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
797 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
798
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
799 while (<$file>) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
800 /^Histogram of all scores/ && last;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
801 if ( my ( $id, $sqfrom, $sqto, $hmmf, $hmmt, $sc, $ev ) =
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
802 /^(\S+)\s+\S+\s+(\d+)\s+(\d+).+?(\d+)\s+(\d+)\s+\S+\s+(\S+)\s+(\S+)\s*$/ )
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
803 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
804 $hmmRes->addHMMUnit(
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
805 Bio::Pfam::HMM::HMMUnit->new(
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
806 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
807 name => $id,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
808 seqFrom => $sqfrom,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
809 seqTo => $sqto,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
810 hmmFrom => $hmmf,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
811 hmmTo => $hmmt,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
812 bits => $sc,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
813 evalue => $ev
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
814 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
815 )
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
816 );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
817
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
818 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
819 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
820
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
821 return $hmmRes;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
822 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
823
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
824 #-------------------------------------------------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
825
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
826 =head2 parseHMMER1
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
827
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
828 Title : parseHMMER1
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
829 Usage : $self->parseHMMER1(\*FH )
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
830 Function : This is a minimal parser for reading in the output of HMMER1 hmmsearch.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
831 : There are a few hacks to get round some of them requirements
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
832 Args : The file handle to hmmsearch output
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
833 Returns : A Bio::Pfam::HMM::HMMResults object
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
834
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
835 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
836
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
837 sub parseHMMER1 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
838 my $self = shift;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
839 my $file = shift;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
840
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
841 my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
842
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
843 my %seqh;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
844 my $count = 0;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
845
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
846 while (<$file>) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
847 if ( my ( $bits, $s, $e, $id, $de ) =
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
848 /^(-?\d+\.?\d*)\s+\(bits\)\s+f:\s+(\d+)\s+t:\s+(\d+)\s+Target:\s+(\S+)\s+(.*)/
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
849 )
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
850 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
851 if ( $id =~ /(\S+)\/(\d+)-(\d+)/ ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
852 $id = $1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
853 $s = $2 + $s - 1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
854 $e = $2 + $e - 1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
855 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
856
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
857 if ( !$hmmRes->seqs->{$id} ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
858 $hmmRes->addHMMSeq(
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
859 Bio::Pfam::HMM::HMMSequence->new(
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
860 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
861 bits => $bits,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
862 evalue => 1,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
863 name => $id,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
864 desc => $de,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
865 numberHits => 1
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
866 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
867 )
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
868 );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
869 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
870 $hmmRes->addHMMUnit(
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
871 Bio::Pfam::HMM::HMMUnit->new(
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
872 {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
873 name => $id,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
874 seqFrom => $s,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
875 seqTo => $e,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
876 hmmFrom => "1",
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
877 hmmTo => "1",
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
878 bits => $bits,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
879 evalue => "1"
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
880 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
881 )
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
882 );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
883 if ( $bits > $hmmRes->seqs->{$id}->bits ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
884 $hmmRes->seqs->{$id}->bits($bits);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
885 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
886 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
887 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
888 return $hmmRes;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
889 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
890
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
891 #-------------------------------------------------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
892
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
893 =head2 writeScoresFile
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
894
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
895 Title : writeScoresFile
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
896 Usage : $hmmResIO->writeScoresFile( $hmmRes)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
897 Function : Writes a scores file for a Bio::Pfam::HMM::HMMResults object.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
898 Args : Bio::Pfam::HMM::HMMResults
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
899 Returns : Nothing
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
900
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
901 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
902
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
903 sub writeScoresFile {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
904 my ( $self, $hmmRes ) = @_;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
905
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
906 unless ($hmmRes) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
907 confess "A Bio::Pfam::HMM::HMMResults object was not parsed in\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
908 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
909 unless ( $hmmRes->isa("Bio::Pfam::HMM::HMMResults") ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
910 confess("Variable passed in is not a Bio::Pfam::HMM::Results object");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
911 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
912
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
913 my $fh;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
914 open( $fh, ">" . $self->scores )
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
915 or confess "Could not open " . $self->scores . ":[$!]\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
916
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
917 my ( $lowSeq, $lowDom, $highSeq, $highDom );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
918 $lowSeq = $lowDom = 999999.99;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
919 $highSeq = $highDom = -999999.99;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
920 unless ( defined $hmmRes->domThr and defined $hmmRes->seqThr ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
921 warn "No threshold set, setting to 25.0 bits\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
922 $hmmRes->domThr("25.0");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
923 $hmmRes->seqThr("25.0");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
924 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
925
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
926 my @sigUnits;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
927
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
928 foreach my $seqId ( keys %{ $hmmRes->seqs } ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
929
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
930 #Does this sequence score above or equal to the sequence threshold?
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
931 if ( $hmmRes->seqs->{$seqId}->bits >= $hmmRes->seqThr ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
932
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
933 #Is this the lowest sequence thresh
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
934 if ( $hmmRes->seqs->{$seqId}->bits < $lowSeq ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
935 $lowSeq = $hmmRes->seqs->{$seqId}->bits;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
936 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
937
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
938 #For each of the regions found on the sequence, look to see if the match is great
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
939 #than the domain threshold. If it is, is it lower than we we have seen previously
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
940 foreach my $unit ( @{ $hmmRes->seqs->{$seqId}->hmmUnits } ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
941 if ( $unit->bits >= $hmmRes->domThr ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
942 push( @sigUnits, $unit );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
943 if ( $unit->bits < $lowDom ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
944 $lowDom = $unit->bits();
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
945 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
946 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
947 elsif ( $unit->bits > $highDom ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
948 $highDom = $unit->bits;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
949 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
950 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
951 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
952 else {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
953
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
954 #Is this the highest sequence thres below the cut-off
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
955 if ( $hmmRes->seqs->{$seqId}->bits > $highSeq ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
956 $highSeq = $hmmRes->seqs->{$seqId}->bits;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
957 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
958
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
959 #For each of the regions found on the sequence, look to see if the match is great
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
960 #than the domain threshold. If it is, is it lower than we we have seen previously
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
961 foreach my $unit ( @{ $hmmRes->seqs->{$seqId}->hmmUnits } ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
962 if ( $unit->bits < $hmmRes->domThr && $unit->bits > $highDom ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
963 $highDom = $unit->bits;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
964 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
965 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
966 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
967 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
968
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
969 $hmmRes->domTC($lowDom);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
970 $hmmRes->seqTC($lowSeq);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
971 $hmmRes->domNC($highDom);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
972 $hmmRes->seqNC($highSeq);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
973
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
974 #Print the domains to the scores file
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
975 foreach my $u ( sort { $b->bits <=> $a->bits } @sigUnits ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
976 print $fh
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
977 sprintf( "%.1f %s/%s-%s %s-%s %s\n", $u->bits, $u->name, $u->envFrom, $u->envTo, $u->seqFrom, $u->seqTo, $u->evalue );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
978 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
979 close($fh);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
980
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
981 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
982
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
983 #-------------------------------------------------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
984
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
985 #TODO - write _readAlign
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
986
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
987 =head2 _readAlign
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
988
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
989 Title :
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
990 Usage :
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
991 Function :
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
992 Args :
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
993 Returns :
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
994
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
995 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
996
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
997 sub _readAlign {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
998 my ( $self, $fh, $hmmRes ) = @_;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
999
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1000 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1001
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1002 #Parse the alignment section
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1003 #if($pp){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1004
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1005 #}else{
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1006 # while(<HS>){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1007 # last if(/^\/\//)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1008 # }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1009 #}
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1010
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1011
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1012
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1013 sub _readFooter {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1014 my($self, $fh, $hmmRes ) = @_;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1015
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1016 # We are going to parse something like this!
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1017
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1018 # Internal pipeline statistics summary:
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1019 #-------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1020 #Query sequence(s): 1 (360 residues)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1021 #Target model(s): 7 (836 nodes)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1022 #Passed MSV filter: 2 (0.285714); expected 0.1 (0.02)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1023 #Passed Vit filter: 1 (0.142857); expected 0.0 (0.001)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1024 #Passed Fwd filter: 1 (0.142857); expected 0.0 (1e-05)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1025 #Initial search space (Z): 7 [actual number of targets]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1026 #Domain search space (domZ): 1 [number of targets reported over threshold]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1027 ## CPU time: 0.00u 0.00s 00:00:00.00 Elapsed: 00:00:00
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1028 ## Mc/sec: inf
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1029 #//
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1030
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1031 while(<$fh>){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1032 if(/\/\//){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1033 last;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1034 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1035 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1036 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1037
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1038
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1039 #Parse the internal summary section
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1040 #Internal statistics summary:
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1041 #----------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1042 #Query HMM(s): 1 (0 nodes)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1043 #Target sequences: 5323441 (0 residues)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1044 #Passed MSV filter: 116519 (-37389918065567040729448769671768824784852036328367855636063687997915136.000; expected 19991592792512146725679052970637918208.000)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1045 #Passed Vit filter: 7579 (-0.0000; expected -35982214160587876085407389642471051723332987952235753317595472501307733302049608744822636544.0000)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1046 #Passed Fwd filter: 1687 (8.3e+165; expected -7.5e-266)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1047 #Mc/sec: 828.85
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1048 # CPU time: 115.36u 4.45s 00:01:59.81 Elapsed: 00:03:01
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1049
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1050 #sub writeHMMSearch {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1051 # my ( $self, $hmmRes ) = @_;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1052 # my $fh;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1053 # open($fh, ">".$self->outfile."\n");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1054 #
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1055 # $self->_writeHeader($fh, $hmmRes);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1056 # $self->_writeSeqHits( $fh, $hmmRes);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1057 # $self->_writeDomHits( $fh, $hmmRes);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1058 # $self->_writeAlign( $fh, $hmmRes) if($self->align);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1059 # $self->_writeInternalSummary( $fh, $hmmRes);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1060 #}
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1061 #sub mergeHMMSearch {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1062 # my ( $self, $filenames ) = @_;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1063 #}
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1064
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1065
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1066 sub write_ascii_out {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1067
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1068 my ($self, $HMMResults, $fh, $scanData, $e_seq, $e_dom, $b_seq, $b_dom) = @_;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1069
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1070
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1071 $scanData->{_max_seqname} = 20 unless($scanData->{_max_seqname} or $scanData->{_max_seqname} < 1);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1072
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1073 my $ga;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1074
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1075 if($e_seq or $e_dom) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1076 $e_seq = $e_dom unless($e_seq);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1077 $e_dom = "10" unless($e_dom);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1078 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1079 elsif($b_seq or $b_dom) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1080 $b_seq = $b_dom unless($b_seq);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1081 $b_dom = "0" unless($b_dom);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1082 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1083 else {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1084 $ga = 1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1085 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1086
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1087
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1088 foreach my $unit ( sort { $a->seqFrom <=> $b->seqFrom } @{ $HMMResults->units } ) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1089
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1090 if($unit->name =~ /Pfam\-B/) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1091
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1092 next unless($HMMResults->seqs->{$unit->name}->evalue <= "0.001" and $unit->evalue <= "0.001");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1093
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1094
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1095 printf $fh "%-".$scanData->{_max_seqname}."s %6d %6d %6d %6d %-11s %-16s %7s %5d %5d %5d %8s %9s %3s %-8s\n",
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1096 $HMMResults->seqName,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1097 $unit->seqFrom,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1098 $unit->seqTo,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1099 $unit->envFrom,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1100 $unit->envTo,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1101 $scanData->{_accmap}->{ $unit->name },
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1102 $unit->name,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1103 "Pfam-B",
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1104 $unit->hmmFrom,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1105 $unit->hmmTo,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1106 $scanData->{_model_len}->{ $unit->name },
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1107 $unit->bits,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1108 $unit->evalue,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1109 "NA",
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1110 "NA";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1111
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1112
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1113 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1114 else {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1115
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1116 #Filter results based on thresholds
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1117 if($ga) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1118 next unless($unit->sig);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1119 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1120 if($e_seq) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1121 next unless($HMMResults->seqs->{$unit->name}->evalue <= $e_seq and $unit->evalue <= $e_dom);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1122 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1123 if($b_seq) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1124
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1125 next unless($HMMResults->seqs->{$unit->name}->bits >= $b_seq and $unit->bits >= $b_dom);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1126 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1127
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1128 my $clan = $scanData->{_clanmap}->{ $unit->name } || "No_clan";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1129
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1130
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1131 printf $fh "%-".$scanData->{_max_seqname}."s %6d %6d %6d %6d %-11s %-16s %7s %5d %5d %5d %8s %9s %3d %-8s ",
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1132 $HMMResults->seqName,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1133 $unit->seqFrom,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1134 $unit->seqTo,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1135 $unit->envFrom,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1136 $unit->envTo,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1137 $scanData->{_accmap}->{ $unit->name },
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1138 $unit->name,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1139 $scanData->{_type}->{ $unit->name },
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1140 $unit->hmmFrom,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1141 $unit->hmmTo,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1142 $scanData->{_model_len}->{ $unit->name },
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1143 $unit->bits,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1144 $unit->evalue,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1145 $unit->sig,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1146 $clan;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1147
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1148
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1149 if($unit->{'act_site'}) {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1150 local $" = ",";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1151 print $fh "predicted_active_site[@{$unit->{'act_site'}}]";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1152 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1153
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1154 if($scanData->{_translate}){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1155 my $strand = '?';
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1156 my $start = '-';
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1157 my $end = '-';
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1158 if(exists($scanData->{_orf}->{$HMMResults->seqName})){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1159 $strand = $scanData->{_orf}->{$HMMResults->seqName}->{strand};
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1160 if($strand eq '+'){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1161 $start = $scanData->{_orf}->{$HMMResults->seqName}->{start} + ($unit->envFrom * 3) - 3;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1162 $end = $scanData->{_orf}->{$HMMResults->seqName}->{start} + ($unit->envTo * 3) - 3;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1163 }elsif($strand eq '-'){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1164 $start = $scanData->{_orf}->{$HMMResults->seqName}->{start} - ($unit->envFrom * 3) + 3;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1165 $end = $scanData->{_orf}->{$HMMResults->seqName}->{start} - ($unit->envTo * 3) + 3;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1166 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1167 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1168 print $fh "$strand $start $end";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1169 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1170
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1171 print $fh "\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1172 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1173
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1174 if($scanData->{_align}){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1175 print $fh sprintf( "%-10s %s\n", "#HMM", $unit->hmmalign->{hmm} );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1176 print $fh sprintf( "%-10s %s\n", "#MATCH", $unit->hmmalign->{match} );
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1177 print $fh sprintf( "%-10s %s\n", "#PP", $unit->hmmalign->{pp});
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1178 print $fh sprintf( "%-10s %s\n", "#SEQ", $unit->hmmalign->{seq});
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1179 print $fh sprintf( "%-10s %s\n", "#CS", $unit->hmmalign->{cs}) if($unit->hmmalign->{cs});
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1180 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1181
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1182 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1183
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1184 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1185
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1186 1;