0
|
1 # HMMResultsIO.pm
|
|
2 #
|
|
3 # Author: rdf
|
|
4 # Maintainer: $Id: HMMResultsIO.pm,v 1.2 2009-12-01 15:42:20 jt6 Exp $
|
|
5 # Version: $Revision: 1.2 $
|
|
6 # Created: Nov 16, 2008
|
|
7 # Last Modified: $Date: 2009-12-01 15:42:20 $
|
|
8
|
|
9 =head1 NAME
|
|
10
|
|
11 Template - a short description of the class
|
|
12
|
|
13 =cut
|
|
14
|
|
15 package Bio::Pfam::HMM::HMMResultsIO;
|
|
16
|
|
17 =head1 DESCRIPTION
|
|
18
|
|
19 A more detailed description of what this class does and how it does it.
|
|
20
|
|
21 $Id: HMMResultsIO.pm,v 1.2 2009-12-01 15:42:20 jt6 Exp $
|
|
22
|
|
23 =head1 COPYRIGHT
|
|
24
|
|
25 File: HMMResultsIO.pm
|
|
26
|
|
27 Copyright (c) 2007: Genome Research Ltd.
|
|
28
|
|
29 Authors: Rob Finn (rdf@sanger.ac.uk), John Tate (jt6@sanger.ac.uk)
|
|
30
|
|
31 This is free software; you can redistribute it and/or
|
|
32 modify it under the terms of the GNU General Public License
|
|
33 as published by the Free Software Foundation; either version 2
|
|
34 of the License, or (at your option) any later version.
|
|
35
|
|
36 This program is distributed in the hope that it will be useful,
|
|
37 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
38 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
39 GNU General Public License for more details.
|
|
40
|
|
41 You should have received a copy of the GNU General Public License
|
|
42 along with this program; if not, write to the Free Software
|
|
43 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
44 or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
|
|
45
|
|
46 =cut
|
|
47
|
|
48 use strict;
|
|
49 use warnings;
|
|
50 use Moose;
|
|
51 use Carp;
|
|
52
|
|
53 #All the things we need to objectfy the search results
|
|
54 use Bio::Pfam::HMM::HMMResults;
|
|
55 use Bio::Pfam::HMM::HMMSequence;
|
|
56 use Bio::Pfam::HMM::HMMUnit;
|
|
57
|
|
58 #-------------------------------------------------------------------------------
|
|
59
|
|
60 =head1 ATTRIBUTES
|
|
61
|
|
62
|
|
63
|
|
64 =cut
|
|
65
|
|
66 has 'align' => (
|
|
67 isa => 'Int',
|
|
68 is => 'rw',
|
|
69 default => 0
|
|
70 );
|
|
71
|
|
72 has 'outfile' => (
|
|
73 isa => 'Str',
|
|
74 is => 'rw',
|
|
75 default => 'OUTPUT'
|
|
76 );
|
|
77
|
|
78 has 'pfamout' => (
|
|
79 isa => 'Str',
|
|
80 is => 'rw',
|
|
81 default => 'PFAMOUT'
|
|
82 );
|
|
83
|
|
84 has 'scores' => (
|
|
85 isa => 'Str',
|
|
86 is => 'rw',
|
|
87 default => 'scores'
|
|
88 );
|
|
89
|
|
90 #-------------------------------------------------------------------------------
|
|
91
|
|
92 =head1 METHODS
|
|
93
|
|
94 =head2 parseHMMER3
|
|
95
|
|
96 Title : parseHMMER
|
|
97 Usage : $hmmResIO->parseHMMSearch( filename )
|
|
98 Function : Parse the output from a HMMER3 search results
|
|
99 Args : Filename containing the search
|
|
100 Returns : A Bio::Pfam::HMM::HMMResults object
|
|
101
|
|
102 =cut
|
|
103
|
|
104 sub parseHMMER3 {
|
|
105 my ( $self, $filename ) = @_;
|
|
106 my $fh;
|
|
107
|
|
108 if(ref($filename) eq 'GLOB'){
|
|
109 $fh = $filename;
|
|
110 }else{
|
|
111 open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
|
|
112 }
|
|
113
|
|
114 # open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
|
|
115 my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
|
|
116 $self->_readHeader( $fh, $hmmRes );
|
|
117 $self->_readSeqHits( $fh, $hmmRes );
|
|
118 $self->_readUnitHits( $fh, $hmmRes );
|
|
119 $self->_readFooter($fh, $hmmRes);
|
|
120 return ($hmmRes);
|
|
121 }
|
|
122
|
|
123
|
|
124
|
|
125 sub parseMultiHMMER3 {
|
|
126 my ( $self, $filename ) = @_;
|
|
127 my $fh;
|
|
128
|
|
129 if(ref($filename) eq 'GLOB'){
|
|
130 $fh = $filename;
|
|
131 }elsif( ref($filename) and $filename->isa('IO::File') ) {
|
|
132 $fh = $filename;
|
|
133 }else{
|
|
134 open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
|
|
135 }
|
|
136
|
|
137 my @hmmResAll;
|
|
138 my $program;
|
|
139 while(!eof($fh)){
|
|
140 my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
|
|
141 my $eof = $self->_readHeader( $fh, $hmmRes );
|
|
142 last if($eof);
|
|
143 push(@hmmResAll, $hmmRes);
|
|
144 if($hmmRes->program) {
|
|
145 $program = $hmmRes->program;
|
|
146 }
|
|
147 else {
|
|
148 $hmmRes->program($program);
|
|
149 }
|
|
150 $self->_readSeqHits( $fh, $hmmRes );
|
|
151 $self->_readUnitHits( $fh, $hmmRes );
|
|
152 $self->_readFooter($fh, $hmmRes);
|
|
153 }
|
|
154 return (\@hmmResAll);
|
|
155 }
|
|
156
|
|
157 sub parseSplitHMMER3 {
|
|
158 my($self, $files ) = @_;
|
|
159
|
|
160 my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
|
|
161
|
|
162 foreach my $filename (@{$files}){
|
|
163 my ($fh);
|
|
164 open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
|
|
165 $self->_readHeader( $fh, $hmmRes );
|
|
166 $self->_readSeqHits( $fh, $hmmRes );
|
|
167 $self->_readUnitHits( $fh, $hmmRes );
|
|
168 $self->_readFooter($fh, $hmmRes);
|
|
169 }
|
|
170
|
|
171 return ( $hmmRes );
|
|
172
|
|
173 }
|
|
174
|
|
175
|
|
176 #-------------------------------------------------------------------------------
|
|
177
|
|
178 =head2 convertHMMSearch
|
|
179
|
|
180 Title : convertHMMSearch
|
|
181 Usage : $hmmResIO->convertHMMSearch('SEARCHFILE')
|
|
182 Function : This wraps up a couple of methods to convert the more complex hmmsearch
|
|
183 : results in to nice clean format that we Pfam-ers are used to.
|
|
184 Args : The filename of the hmmsearch output file
|
|
185 Returns : Nothing
|
|
186
|
|
187 =cut
|
|
188
|
|
189 sub convertHMMSearch {
|
|
190 my ( $self, $filename ) = @_;
|
|
191
|
|
192 unless ($filename) {
|
|
193 confess "No filename passed in to convertHMMSearch\n";
|
|
194 }
|
|
195 unless ( -s $filename ) {
|
|
196 confess "$filename does not exists\n";
|
|
197 }
|
|
198
|
|
199 #Now parse in the raw HMM output and write out the results as a PFAMOUT
|
|
200 my $hmmRes = $self->parseHMMER3($filename);
|
|
201 $self->writePFAMOUT($hmmRes);
|
|
202 return $hmmRes;
|
|
203 }
|
|
204
|
|
205 #-------------------------------------------------------------------------------
|
|
206
|
|
207 =head2 writePFAMOUT
|
|
208
|
|
209 Title : writePFAMOUT
|
|
210 Usage : $hmmResIO->writePFAMOUT( $hmmRes )
|
|
211 Function : Writes a Bio::Pfam::HMM:HMMResults object in to a PFAMOUT file.
|
|
212 Args : A Bio::Pfam::HMM:HMMResults
|
|
213 Returns : Nothing
|
|
214
|
|
215 =cut
|
|
216
|
|
217 sub writePFAMOUT {
|
|
218 my ( $self, $hmmRes ) = @_;
|
|
219
|
|
220 unless ($hmmRes) {
|
|
221 confess "A Bio::Pfam::HMM::HMMResults object was not parsed in\n";
|
|
222 }
|
|
223 unless ( $hmmRes->isa("Bio::Pfam::HMM::HMMResults") ) {
|
|
224 confess("Variable passed in is not a Bio::Pfam::HMM::Results object");
|
|
225 }
|
|
226
|
|
227 my $fh;
|
|
228 open( $fh, ">" . $self->pfamout )
|
|
229 or confess "Could not open " . $self->pfamout . ":[$!]\n";
|
|
230
|
|
231 print $fh <<HEAD;
|
|
232 # ===========
|
|
233 # Pfam output
|
|
234 # ===========
|
|
235 #
|
|
236 # Sequence scores
|
|
237 # ---------------
|
|
238 #
|
|
239 # name description bits evalue n exp bias
|
|
240
|
|
241 HEAD
|
|
242
|
|
243 foreach
|
|
244 my $seq ( sort { $b->bits <=> $a->bits } ( @{ $hmmRes->eachHMMSeq } ) )
|
|
245 {
|
|
246 $_ = $seq->desc;
|
|
247 my ($desc) = /^(.{1,42})/;
|
|
248 $desc = uc($desc);
|
|
249 printf $fh (
|
|
250 "%-15s %-42s %8.1f %9s %3d %5.1f %5.1f\n",
|
|
251 $seq->name,
|
|
252 $desc,
|
|
253 $seq->bits,
|
|
254 $seq->evalue,
|
|
255 scalar( @{ $seq->hmmUnits } ),
|
|
256 defined( $seq->exp ) ? $seq->exp : "-",
|
|
257 defined( $seq->bias ) ? $seq->bias : "-"
|
|
258 );
|
|
259 }
|
|
260
|
|
261 print $fh <<HEAD;
|
|
262 #
|
|
263 # Domain scores
|
|
264 # -------------
|
|
265 #
|
|
266 # name env-st env-en ali-st ali-en hmm-st hmm-en bits evalue hit bias
|
|
267 #
|
|
268
|
|
269 HEAD
|
|
270
|
|
271 foreach my $dom ( sort { $b->bits <=> $a->bits } @{ $hmmRes->units } ) {
|
|
272
|
|
273
|
|
274 printf $fh (
|
|
275 "%-15s %6d %6d %6d %6d %6s %6s %6.1f %9s %6d %6.1f\n",
|
|
276 $dom->name,
|
|
277 $dom->envFrom,
|
|
278 $dom->envTo,
|
|
279 $dom->seqFrom,
|
|
280 $dom->seqTo,
|
|
281 $dom->hmmFrom,
|
|
282 $dom->hmmTo,
|
|
283 $dom->bits,
|
|
284 $dom->evalue,
|
|
285 $dom->domain,
|
|
286 defined( $dom->bias ) ? $dom->bias : "-",
|
|
287
|
|
288 );
|
|
289 }
|
|
290 }
|
|
291
|
|
292 #-------------------------------------------------------------------------------
|
|
293
|
|
294 =head2 parsePFAMOUT
|
|
295
|
|
296 Title : parsePFAMOUT
|
|
297 Usage : $self->parsePFAMOUT($filename)
|
|
298 Function : Reads in a PFAMOUT file. This file contains the minimal amount of information
|
|
299 : require to constrcut a pfam ALIGN file.
|
|
300 Args : A filename. Normally this is filename
|
|
301 Returns : A Bio::Pfam::HMM::HMMResults object
|
|
302
|
|
303 =cut
|
|
304
|
|
305 sub parsePFAMOUT {
|
|
306 my $self = shift;
|
|
307 my $filename = shift;
|
|
308
|
|
309 unless ($filename) {
|
|
310 confess('No filename or filehandle passed to parsePFAMOUT');
|
|
311 }
|
|
312
|
|
313 my $fh;
|
|
314 if ( ref($filename) eq 'GLOB' ) {
|
|
315 $fh = $filename;
|
|
316 }
|
|
317 else {
|
|
318 open( $fh, $filename ) or confess "Could not open $filename:[$!]\n";
|
|
319 }
|
|
320 my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
|
|
321
|
|
322 while (<$fh>) {
|
|
323 /^# Domain scores/ && last;
|
|
324
|
|
325 #if (/^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\d+)\s*$/) {
|
|
326 if (/^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\d+)\s+\S+\s+(\S+)\s*$/) {
|
|
327
|
|
328 $hmmRes->addHMMSeq(
|
|
329 Bio::Pfam::HMM::HMMSequence->new(
|
|
330 {
|
|
331 name => $1,
|
|
332 desc => $2,
|
|
333 bits => $3,
|
|
334 evalue => $4,
|
|
335 numberHits => $5,
|
|
336 bias => $6
|
|
337 }
|
|
338 )
|
|
339 );
|
|
340 }
|
|
341 elsif (/^#|^\s+$/) {
|
|
342 next;
|
|
343 }
|
|
344 else {
|
|
345 warn "Did not parse|$_|\n";
|
|
346 }
|
|
347 }
|
|
348 while (<$fh>) {
|
|
349
|
|
350 #if (/^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s*$/) {
|
|
351 if (
|
|
352 /^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+\S+\s+(\S+)/)
|
|
353 {
|
|
354 $hmmRes->addHMMUnit(
|
|
355 Bio::Pfam::HMM::HMMUnit->new(
|
|
356 {
|
|
357 name => $1,
|
|
358 envFrom => $2,
|
|
359 envTo => $3,
|
|
360 seqFrom => $4,
|
|
361 seqTo => $5,
|
|
362 hmmFrom => $6,
|
|
363 hmmTo => $7,
|
|
364 bits => $8,
|
|
365 evalue => $9,
|
|
366 bias => $10
|
|
367 }
|
|
368 )
|
|
369 );
|
|
370 }
|
|
371 elsif (/^#|^\s+$/) {
|
|
372 next;
|
|
373 }
|
|
374 elsif (/^$/) {
|
|
375 next;
|
|
376 }
|
|
377 else {
|
|
378 warn "Did not parse: |$_|";
|
|
379 }
|
|
380 }
|
|
381 close($fh);
|
|
382 return ($hmmRes);
|
|
383 }
|
|
384
|
|
385 #-------------------------------------------------------------------------------
|
|
386
|
|
387 =head2 _readHeader
|
|
388
|
|
389 Title : _readHeader
|
|
390 Usage : Private method. $self->_readHeader(\*FH, $hmmResults)
|
|
391 Function : Reads the header section from a HMMER3 hmmsearch
|
|
392 Args : The file handle to hmmsearch output, a Bio::Pfam::HMM::HMMResults object
|
|
393 Returns : Nothing
|
|
394
|
|
395 =cut
|
|
396
|
|
397 #Parse the header part of the output first;
|
|
398 sub _readHeader {
|
|
399 my ( $self, $hs, $hmmRes ) = @_;
|
|
400
|
|
401 #Check the $hs is defined and a GLOB
|
|
402
|
|
403 while (<$hs>) {
|
|
404 if (/^Scores for complete/) {
|
|
405 last;
|
|
406 }
|
|
407 elsif (/^# query HMM file:\s+(\S+)/) {
|
|
408 $hmmRes->hmmName($1);
|
|
409 }
|
|
410 elsif (/^# target sequence database:\s+(\S+)/) {
|
|
411 $hmmRes->seqDB($1);
|
|
412 }
|
|
413 elsif (/^output directed to file:\s+(\S+)/) {
|
|
414 $hmmRes->thisFile($1);
|
|
415 }
|
|
416 elsif (/^Query:\s+(\S+)\s+\[M\=(\d+)\]/) {
|
|
417 $hmmRes->seedName($1);
|
|
418 $hmmRes->hmmLength($2);
|
|
419 }elsif(/^Query:\s+(\S+)\s+\[L\=(\d+)\]/) {
|
|
420 $hmmRes->seqName($1);
|
|
421 $hmmRes->seqLength($2);
|
|
422 }elsif (/^sequence E-value threshold: <= (\d+)/) {
|
|
423 $hmmRes->evalueThr($1);
|
|
424 }
|
|
425 elsif (/^# Random generator seed: (\d+)/) {
|
|
426 $hmmRes->randSeedNum($1);
|
|
427 }elsif(/^Description:\s+(.*)/){
|
|
428 $hmmRes->description($1);
|
|
429 }elsif(/^# (phmmer|hmmsearch|hmmscan|jackhmmer)/){
|
|
430 $hmmRes->program($1);
|
|
431 }elsif (/(^#)|(^$)/) {
|
|
432 next;
|
|
433 }elsif(/^Accession/){
|
|
434 next;
|
|
435 } elsif(/^\[ok\]/) {
|
|
436 return(1);
|
|
437 } else {
|
|
438 die "Failed to parse hmmsearch results |$_| in header section\n";
|
|
439 }
|
|
440 }
|
|
441 }
|
|
442
|
|
443 #-------------------------------------------------------------------------------
|
|
444
|
|
445 =head2 _readSeqHits
|
|
446
|
|
447 Title : _readSeqHits
|
|
448 Usage : Private method. $self->_readSeqHits(\*FH, $hmmResults)
|
|
449 Function : Reads the sequence hits from a HMMER3 hmmsearch
|
|
450 Args : The file handle to hmmsearch output, a Bio::Pfam::HMM::HMMResults object
|
|
451 Returns : Nothing
|
|
452
|
|
453 =cut
|
|
454
|
|
455 sub _readSeqHits {
|
|
456 my ( $self, $hs, $hmmRes ) = @_;
|
|
457 while (<$hs>) {
|
|
458
|
|
459 #Match a line like this
|
|
460 # E-value score bias E-value score bias exp N Sequence Description
|
|
461 # ------- ------ ----- ------- ------ ----- ---- -- -------- -----------
|
|
462 # 4e-83 285.8 10.0 5.3e-83 285.5 7.0 1.1 1 Q14SN3.1 Q14SN3_9HEPC Polyprotein (Fragment).
|
|
463 if (/^Domain annotation for each [sequence|model]/) { # This is the format for HMMER3b3
|
|
464 last;
|
|
465 }
|
|
466 elsif (/^Domain and alignment annotation for each [sequence|model]/) { #This is the format for HMMER3b2 - can be removed later
|
|
467 last;
|
|
468 }
|
|
469 elsif (/^\s+(E-value|---)/) {
|
|
470 next;
|
|
471 }
|
|
472 elsif (/^$/) {
|
|
473 next;
|
|
474 }
|
|
475 else {
|
|
476 next if(/No hits detected that satisfy reporting thresholds/);
|
|
477
|
|
478 #Assume that we have a sequence match
|
|
479 my @sMatch = split( /\s+/, $_ );
|
|
480 unless ( scalar(@sMatch) >= 10 ) {
|
|
481 die "Expected at least 10 pieces of data: $_;\n";
|
|
482 }
|
|
483 my $desc;
|
|
484 if ( scalar(@sMatch) >= 11 ) {
|
|
485 $desc = join( " ", @sMatch[ 10 .. $#sMatch ] );
|
|
486 }
|
|
487
|
|
488 $hmmRes->addHMMSeq(
|
|
489 Bio::Pfam::HMM::HMMSequence->new(
|
|
490 {
|
|
491 evalue => $sMatch[1],
|
|
492 bits => $sMatch[2],
|
|
493 bias => $sMatch[3],
|
|
494 exp => $sMatch[7],
|
|
495 numberHits => $sMatch[8],
|
|
496 name => $sMatch[9],
|
|
497 desc => defined($desc) ? $desc : "-",
|
|
498 }
|
|
499 )
|
|
500 );
|
|
501
|
|
502 next;
|
|
503 }
|
|
504 die "Failed to parse $_ in sequence section\n";
|
|
505 }
|
|
506
|
|
507 }
|
|
508
|
|
509 #------------------------------------------------------------------------------
|
|
510
|
|
511 =head2 _readUnitHits
|
|
512
|
|
513 Title : _readUnitHits
|
|
514 Usage : Private method. $self->_readUnitHits(\*FH, $hmmResults)
|
|
515 Function : Reads the unit (domain) hits from a HMMER3 hmmsearch
|
|
516 Args : The file handle to hmmsearch output, a Bio::Pfam::HMM::HMMResults object
|
|
517 Returns : Nothing
|
|
518
|
|
519 =cut
|
|
520
|
|
521 no warnings 'recursion';
|
|
522
|
|
523 sub _readUnitHits {
|
|
524 my ( $self, $hs, $hmmRes ) = @_;
|
|
525
|
|
526 if($hmmRes->eof){
|
|
527 return;
|
|
528 }
|
|
529
|
|
530 #Parse the domain hits section
|
|
531 #>> P37935.1 MAAY4_SCHCO Mating-type protein A-alpha Y4.
|
|
532 # # bit score bias E-value ind Evalue hmm from hmm to ali from ali to env from env to ali-acc
|
|
533 # --- --------- ------- ---------- ---------- -------- -------- -------- -------- -------- -------- -------
|
|
534 # 1 244.0 0.5 9.5e-76 1.7e-70 1 146 [. 1 145 [. 1 146 [. 0.99
|
|
535 #
|
|
536 # Alignments for each domain:
|
|
537 # == domain 1 score: 244.0 bits; conditional E-value: 9.5e-76
|
|
538 # SEED 1 medrlallkaisasakdlvalaasrGaksipspvkttavkfdplptPdldalrtrlkeaklPakaiksalsayekaCarWrsdleeafdktaksvsPanlhllealrirlyteqvekWlvqvlevaerWkaemekqrahiaatmgp 146
|
|
539 # m+++la+l++isa+akd++ala+srGa+++ +p++tt+++fd+l++P+ld++rtrl+ea+lP+kaik++lsaye+aCarW++dleeafd+ta+s+sP+n+++l++lr+rly+eqv+kWl++vl+v+erWkaemekqrahi+atmgp
|
|
540 # P37935.1 1 MAELLACLQSISAHAKDMMALARSRGATGS-RPTPTTLPHFDELLPPNLDFVRTRLQEARLPPKAIKGTLSAYESACARWKHDLEEAFDRTAHSISPHNFQRLAQLRTRLYVEQVQKWLYEVLQVPERWKAEMEKQRAHINATMGP 145
|
|
541 # 899***************************.******************************************************************************************************************8 PP
|
|
542
|
|
543 while (<$hs>) {
|
|
544 if (/^Internal/) {
|
|
545 last;
|
|
546 }
|
|
547 elsif (/\>\>\s+(\S+)/) {
|
|
548 my $seqId = $1;
|
|
549 $self->_readUnitData( $seqId, $hs, $hmmRes );
|
|
550 if($hmmRes->eof){
|
|
551 return;
|
|
552 }
|
|
553 }
|
|
554 }
|
|
555 }
|
|
556
|
|
557 sub _readUnitData {
|
|
558 my ( $self, $id, $hs, $hmmRes ) = @_;
|
|
559
|
|
560 if($hmmRes->eof){
|
|
561 return;
|
|
562 }
|
|
563 my $hmmName = $hmmRes->seedName();
|
|
564
|
|
565 my $seqName = $hmmRes->seqName;
|
|
566
|
|
567 # bit score bias E-value ind Evalue hmm from hmm to ali from ali to env from env to ali-acc
|
|
568 # --- --------- ------- ---------- ---------- -------- -------- -------- -------- -------- -------- -------
|
|
569 # 1 244.0 0.5 9.5e-76 1.7e-70 1 146 [. 1 145 [. 1 146 [. 0.99
|
|
570 #
|
|
571 # Alignments for each domain:
|
|
572
|
|
573 my @units;
|
|
574 my $align = 1;
|
|
575 my $recurse = 0;
|
|
576 my $eof = 0;
|
|
577 my ($nextSeqId);
|
|
578 while (<$hs>) {
|
|
579 if (/^[(\/\/|Internal)]/ ) {
|
|
580 $align = 0;
|
|
581 $recurse = 0;
|
|
582 $eof = 1;
|
|
583 last;
|
|
584 }
|
|
585 elsif (/^\>\>\s+(\S+)/) {
|
|
586 $nextSeqId = $1;
|
|
587 $align = 0;
|
|
588 $recurse = 1;
|
|
589 last;
|
|
590 }
|
|
591 elsif (/^\s+Alignments for each domain:/) {
|
|
592 $align = 1;
|
|
593 $recurse = 0;
|
|
594 last;
|
|
595 }
|
|
596 elsif (/^\s+(#\s+score|---)/){
|
|
597
|
|
598 #Two human readable lines
|
|
599 next;
|
|
600 }
|
|
601 elsif (/^$/) {
|
|
602
|
|
603 #blank line
|
|
604 next;
|
|
605 }
|
|
606 elsif (/^\s+\d+\s+/) {
|
|
607 my @dMatch = split( /\s+/, $_ );
|
|
608 unless ( scalar(@dMatch) == 17 ) {
|
|
609 die "Expected 16 elements of data: $_\n";
|
|
610 }
|
|
611
|
|
612 push(
|
|
613 @units,
|
|
614 Bio::Pfam::HMM::HMMUnit->new(
|
|
615 {
|
|
616 name => $id,
|
|
617 domain => $dMatch[1],
|
|
618 bits => $dMatch[3],
|
|
619 bias => $dMatch[4],
|
|
620 domEvalue => $dMatch[5],
|
|
621 evalue => $dMatch[6],
|
|
622 hmmFrom => $dMatch[7],
|
|
623 hmmTo => $dMatch[8],
|
|
624 seqFrom => $dMatch[10],
|
|
625 seqTo => $dMatch[11],
|
|
626 envFrom => $dMatch[13],
|
|
627 envTo => $dMatch[14],
|
|
628 aliAcc => $dMatch[16]
|
|
629 }
|
|
630 )
|
|
631 );
|
|
632
|
|
633 next;
|
|
634 }
|
|
635 elsif(/^\s+\[No individual domains/) {
|
|
636 $align=0;
|
|
637 next;
|
|
638 }
|
|
639 else {
|
|
640 confess("Did not parse line: $_");
|
|
641 }
|
|
642 }
|
|
643
|
|
644 # == domain 1 score: 244.0 bits; conditional E-value: 9.5e-76
|
|
645 # SEED 1 medrlallkaisasakdlvalaasrGaksipspvkttavkfdplptPdldalrtrlkeaklPakaiksalsayekaCarWrsdleeafdktaksvsPanlhllealrirlyteqvekWlvqvlevaerWkaemekqrahiaatmgp 146
|
|
646 # m+++la+l++isa+akd++ala+srGa+++ +p++tt+++fd+l++P+ld++rtrl+ea+lP+kaik++lsaye+aCarW++dleeafd+ta+s+sP+n+++l++lr+rly+eqv+kWl++vl+v+erWkaemekqrahi+atmgp
|
|
647 # P37935.1 1 MAELLACLQSISAHAKDMMALARSRGATGS-RPTPTTLPHFDELLPPNLDFVRTRLQEARLPPKAIKGTLSAYESACARWKHDLEEAFDRTAHSISPHNFQRLAQLRTRLYVEQVQKWLYEVLQVPERWKAEMEKQRAHINATMGP 145
|
|
648 # 899***************************.******************************************************************************************************************8 PP
|
|
649 #
|
|
650 # OR....
|
|
651 #
|
|
652 # == domain 1 score: 27.6 bits; conditional E-value: 7.4e-10
|
|
653 # PF00018 17 LsfkkGdvitvleksee.eWwkaelkdg.keGlvPsnYvep 55
|
|
654 # L++++Gd+++++++++e++Ww++++++++++G++P+n+v+p
|
|
655 # P15498.4 617 LRLNPGDIVELTKAEAEqNWWEGRNTSTnEIGWFPCNRVKP 657
|
|
656 # 7899**********9999*******************9987 PP
|
|
657
|
|
658
|
|
659 if ($align) {
|
|
660 my ($pattern1, $pattern2);
|
|
661
|
|
662 if($hmmName and $hmmRes->program eq 'hmmsearch'){
|
|
663 $pattern1 = qr/^\s+$hmmName\s+\d+\s+(\S+)\s+\d+/;
|
|
664 $id =~ s/(\W)/\\$1/g; # escape any non-word character
|
|
665 # $id =~ s/\|/\\|/g; #Escape '|', '[' and ']' characters
|
|
666 # $id =~ s/\[/\\[/g;
|
|
667 # $id =~ s/\]/\\]/g;
|
|
668 $pattern2 = qr/^\s+$id\s+\d+\s+(\S+)\s+\d+/;
|
|
669 }elsif($seqName and $hmmRes->program eq 'hmmscan'){
|
|
670 my $tmpSeqName = $seqName;
|
|
671 $tmpSeqName =~ s/(\W)/\\$1/g; # escape any non-word character
|
|
672 # $tmpSeqName =~ s/\|/\\|/g; #Escape '|', '[' and ']' characters
|
|
673 # $tmpSeqName =~ s/\[/\\[/g;
|
|
674 # $tmpSeqName =~ s/\]/\\]/g;
|
|
675 $pattern1 = qr/^\s+$id\s+\d+\s+(\S+)\s+\d+/;
|
|
676 $pattern2 = qr/^\s+$tmpSeqName\s+\d+\s+(\S+)\s+\d+/;
|
|
677 }elsif($seqName and ($hmmRes->program eq 'phmmer' or $hmmRes->program eq 'jackhmmer') ){
|
|
678 $seqName =~ s/(\W)/\\$1/g; # escape any non-word character
|
|
679 # $seqName =~ s/\|/\|/g; #Escape '|', '[' and ']' characters
|
|
680 # $seqName =~ s/\[/\\[/g;
|
|
681 # $seqName =~ s/\]/\\]/g;
|
|
682 $pattern1 = qr/^\s+$seqName\s+\d+\s+(\S+)\s+\d+/;
|
|
683 $pattern2 = qr/^\s+$id\s+\d+\s+(\S+)\s+\d+/;
|
|
684 }
|
|
685
|
|
686
|
|
687 $recurse = 0;
|
|
688 my $matchNo;
|
|
689 my $hmmlen = 0;
|
|
690 while (<$hs>) {
|
|
691 if (/$pattern1/) {
|
|
692 $units[ $matchNo - 1 ]->hmmalign->{hmm} .= $1;
|
|
693 $hmmlen = length($1);
|
|
694 }
|
|
695 elsif (/$pattern2/) {
|
|
696 $units[ $matchNo - 1 ]->hmmalign->{seq} .= $1;
|
|
697 }
|
|
698 elsif (/^\s+([x\.]+)\s+RF$/) {
|
|
699 my $rf = $1;
|
|
700 $units[ $matchNo - 1 ]->hmmalign->{rf} .= $rf;
|
|
701 }
|
|
702 elsif (/^\s+([0-9\*\.]+)\s+PP$/) {
|
|
703 my $pp = $1;
|
|
704 $units[ $matchNo - 1 ]->hmmalign->{pp} .= $pp;
|
|
705 }elsif (/^\s+(\S+)\s+CS$/) {
|
|
706 my $cs = $1;
|
|
707 $units[ $matchNo - 1 ]->hmmalign->{cs} .= $cs;
|
|
708 }elsif (/^\s+==\s+domain\s+(\d+)/) {
|
|
709 $matchNo = $1;
|
|
710 }
|
|
711 elsif (/^\s+(.*)\s+$/) {
|
|
712 # $1 is *not* the match - this fails if there are prepended
|
|
713 # or appended spaces
|
|
714 # $units[ $matchNo - 1 ]->hmmalign->{match} .= $1;
|
|
715 # Let's get a right substring based on the HMM length
|
|
716 chomp;
|
|
717 my $m1 = substr($_,-$hmmlen);
|
|
718 $units[ $matchNo - 1 ]->hmmalign->{match} .= $m1;
|
|
719 }elsif (/^$/) {
|
|
720 next;
|
|
721 }
|
|
722 elsif (/^[(\/\/|Internal)]/) {
|
|
723 $align = 0;
|
|
724 $recurse = 0;
|
|
725 $eof = 1;
|
|
726 last;
|
|
727 }
|
|
728 elsif (/^\>\>\s+(\S+)/) {
|
|
729 $nextSeqId = $1;
|
|
730 $recurse = 1;
|
|
731 last;
|
|
732 }
|
|
733
|
|
734 else {
|
|
735 confess("Did not parse |$_| in units");
|
|
736 }
|
|
737 }
|
|
738 }
|
|
739
|
|
740 foreach my $u (@units) {
|
|
741 $hmmRes->addHMMUnit($u);
|
|
742 }
|
|
743
|
|
744 $hmmRes->eof($eof);
|
|
745
|
|
746 if ($recurse and $nextSeqId) {
|
|
747 $self->_readUnitData( $nextSeqId, $hs, $hmmRes );
|
|
748 }
|
|
749 return;
|
|
750 }
|
|
751 use warnings 'recursion';
|
|
752
|
|
753 #-------------------------------------------------------------------------------
|
|
754
|
|
755 =head2 parseHMMER2
|
|
756
|
|
757 Title : parseHMMER2
|
|
758 Usage : $self->parseHMMER2(\*FH )
|
|
759 Function : This is a minimal parser for reading in the output of HMMER2 hmmsearch
|
|
760 Args : The file handle to hmmsearch output
|
|
761 Returns : A Bio::Pfam::HMM::HMMResults object
|
|
762
|
|
763 =cut
|
|
764
|
|
765 sub parseHMMER2 {
|
|
766 my $self = shift;
|
|
767 my $file = shift;
|
|
768
|
|
769 my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
|
|
770
|
|
771 my %seqh;
|
|
772 my $count = 0;
|
|
773
|
|
774 while (<$file>) {
|
|
775 /^Scores for complete sequences/ && last;
|
|
776 }
|
|
777
|
|
778 while (<$file>) {
|
|
779 /^Parsed for domains/ && last;
|
|
780 if ( my ( $id, $de, $sc, $ev, $hits ) =
|
|
781 /^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\d+)\s*$/ )
|
|
782 {
|
|
783 $hmmRes->addHMMSeq(
|
|
784 Bio::Pfam::HMM::HMMSequence->new(
|
|
785 {
|
|
786 bits => $sc,
|
|
787 evalue => $ev,
|
|
788 name => $id,
|
|
789 desc => $de,
|
|
790 numberHits => $hits
|
|
791 }
|
|
792 )
|
|
793 );
|
|
794
|
|
795 $seqh{$id} = $sc;
|
|
796 }
|
|
797 }
|
|
798
|
|
799 while (<$file>) {
|
|
800 /^Histogram of all scores/ && last;
|
|
801 if ( my ( $id, $sqfrom, $sqto, $hmmf, $hmmt, $sc, $ev ) =
|
|
802 /^(\S+)\s+\S+\s+(\d+)\s+(\d+).+?(\d+)\s+(\d+)\s+\S+\s+(\S+)\s+(\S+)\s*$/ )
|
|
803 {
|
|
804 $hmmRes->addHMMUnit(
|
|
805 Bio::Pfam::HMM::HMMUnit->new(
|
|
806 {
|
|
807 name => $id,
|
|
808 seqFrom => $sqfrom,
|
|
809 seqTo => $sqto,
|
|
810 hmmFrom => $hmmf,
|
|
811 hmmTo => $hmmt,
|
|
812 bits => $sc,
|
|
813 evalue => $ev
|
|
814 }
|
|
815 )
|
|
816 );
|
|
817
|
|
818 }
|
|
819 }
|
|
820
|
|
821 return $hmmRes;
|
|
822 }
|
|
823
|
|
824 #-------------------------------------------------------------------------------
|
|
825
|
|
826 =head2 parseHMMER1
|
|
827
|
|
828 Title : parseHMMER1
|
|
829 Usage : $self->parseHMMER1(\*FH )
|
|
830 Function : This is a minimal parser for reading in the output of HMMER1 hmmsearch.
|
|
831 : There are a few hacks to get round some of them requirements
|
|
832 Args : The file handle to hmmsearch output
|
|
833 Returns : A Bio::Pfam::HMM::HMMResults object
|
|
834
|
|
835 =cut
|
|
836
|
|
837 sub parseHMMER1 {
|
|
838 my $self = shift;
|
|
839 my $file = shift;
|
|
840
|
|
841 my $hmmRes = Bio::Pfam::HMM::HMMResults->new;
|
|
842
|
|
843 my %seqh;
|
|
844 my $count = 0;
|
|
845
|
|
846 while (<$file>) {
|
|
847 if ( my ( $bits, $s, $e, $id, $de ) =
|
|
848 /^(-?\d+\.?\d*)\s+\(bits\)\s+f:\s+(\d+)\s+t:\s+(\d+)\s+Target:\s+(\S+)\s+(.*)/
|
|
849 )
|
|
850 {
|
|
851 if ( $id =~ /(\S+)\/(\d+)-(\d+)/ ) {
|
|
852 $id = $1;
|
|
853 $s = $2 + $s - 1;
|
|
854 $e = $2 + $e - 1;
|
|
855 }
|
|
856
|
|
857 if ( !$hmmRes->seqs->{$id} ) {
|
|
858 $hmmRes->addHMMSeq(
|
|
859 Bio::Pfam::HMM::HMMSequence->new(
|
|
860 {
|
|
861 bits => $bits,
|
|
862 evalue => 1,
|
|
863 name => $id,
|
|
864 desc => $de,
|
|
865 numberHits => 1
|
|
866 }
|
|
867 )
|
|
868 );
|
|
869 }
|
|
870 $hmmRes->addHMMUnit(
|
|
871 Bio::Pfam::HMM::HMMUnit->new(
|
|
872 {
|
|
873 name => $id,
|
|
874 seqFrom => $s,
|
|
875 seqTo => $e,
|
|
876 hmmFrom => "1",
|
|
877 hmmTo => "1",
|
|
878 bits => $bits,
|
|
879 evalue => "1"
|
|
880 }
|
|
881 )
|
|
882 );
|
|
883 if ( $bits > $hmmRes->seqs->{$id}->bits ) {
|
|
884 $hmmRes->seqs->{$id}->bits($bits);
|
|
885 }
|
|
886 }
|
|
887 }
|
|
888 return $hmmRes;
|
|
889 }
|
|
890
|
|
891 #-------------------------------------------------------------------------------
|
|
892
|
|
893 =head2 writeScoresFile
|
|
894
|
|
895 Title : writeScoresFile
|
|
896 Usage : $hmmResIO->writeScoresFile( $hmmRes)
|
|
897 Function : Writes a scores file for a Bio::Pfam::HMM::HMMResults object.
|
|
898 Args : Bio::Pfam::HMM::HMMResults
|
|
899 Returns : Nothing
|
|
900
|
|
901 =cut
|
|
902
|
|
903 sub writeScoresFile {
|
|
904 my ( $self, $hmmRes ) = @_;
|
|
905
|
|
906 unless ($hmmRes) {
|
|
907 confess "A Bio::Pfam::HMM::HMMResults object was not parsed in\n";
|
|
908 }
|
|
909 unless ( $hmmRes->isa("Bio::Pfam::HMM::HMMResults") ) {
|
|
910 confess("Variable passed in is not a Bio::Pfam::HMM::Results object");
|
|
911 }
|
|
912
|
|
913 my $fh;
|
|
914 open( $fh, ">" . $self->scores )
|
|
915 or confess "Could not open " . $self->scores . ":[$!]\n";
|
|
916
|
|
917 my ( $lowSeq, $lowDom, $highSeq, $highDom );
|
|
918 $lowSeq = $lowDom = 999999.99;
|
|
919 $highSeq = $highDom = -999999.99;
|
|
920 unless ( defined $hmmRes->domThr and defined $hmmRes->seqThr ) {
|
|
921 warn "No threshold set, setting to 25.0 bits\n";
|
|
922 $hmmRes->domThr("25.0");
|
|
923 $hmmRes->seqThr("25.0");
|
|
924 }
|
|
925
|
|
926 my @sigUnits;
|
|
927
|
|
928 foreach my $seqId ( keys %{ $hmmRes->seqs } ) {
|
|
929
|
|
930 #Does this sequence score above or equal to the sequence threshold?
|
|
931 if ( $hmmRes->seqs->{$seqId}->bits >= $hmmRes->seqThr ) {
|
|
932
|
|
933 #Is this the lowest sequence thresh
|
|
934 if ( $hmmRes->seqs->{$seqId}->bits < $lowSeq ) {
|
|
935 $lowSeq = $hmmRes->seqs->{$seqId}->bits;
|
|
936 }
|
|
937
|
|
938 #For each of the regions found on the sequence, look to see if the match is great
|
|
939 #than the domain threshold. If it is, is it lower than we we have seen previously
|
|
940 foreach my $unit ( @{ $hmmRes->seqs->{$seqId}->hmmUnits } ) {
|
|
941 if ( $unit->bits >= $hmmRes->domThr ) {
|
|
942 push( @sigUnits, $unit );
|
|
943 if ( $unit->bits < $lowDom ) {
|
|
944 $lowDom = $unit->bits();
|
|
945 }
|
|
946 }
|
|
947 elsif ( $unit->bits > $highDom ) {
|
|
948 $highDom = $unit->bits;
|
|
949 }
|
|
950 }
|
|
951 }
|
|
952 else {
|
|
953
|
|
954 #Is this the highest sequence thres below the cut-off
|
|
955 if ( $hmmRes->seqs->{$seqId}->bits > $highSeq ) {
|
|
956 $highSeq = $hmmRes->seqs->{$seqId}->bits;
|
|
957 }
|
|
958
|
|
959 #For each of the regions found on the sequence, look to see if the match is great
|
|
960 #than the domain threshold. If it is, is it lower than we we have seen previously
|
|
961 foreach my $unit ( @{ $hmmRes->seqs->{$seqId}->hmmUnits } ) {
|
|
962 if ( $unit->bits < $hmmRes->domThr && $unit->bits > $highDom ) {
|
|
963 $highDom = $unit->bits;
|
|
964 }
|
|
965 }
|
|
966 }
|
|
967 }
|
|
968
|
|
969 $hmmRes->domTC($lowDom);
|
|
970 $hmmRes->seqTC($lowSeq);
|
|
971 $hmmRes->domNC($highDom);
|
|
972 $hmmRes->seqNC($highSeq);
|
|
973
|
|
974 #Print the domains to the scores file
|
|
975 foreach my $u ( sort { $b->bits <=> $a->bits } @sigUnits ) {
|
|
976 print $fh
|
|
977 sprintf( "%.1f %s/%s-%s %s-%s %s\n", $u->bits, $u->name, $u->envFrom, $u->envTo, $u->seqFrom, $u->seqTo, $u->evalue );
|
|
978 }
|
|
979 close($fh);
|
|
980
|
|
981 }
|
|
982
|
|
983 #-------------------------------------------------------------------------------
|
|
984
|
|
985 #TODO - write _readAlign
|
|
986
|
|
987 =head2 _readAlign
|
|
988
|
|
989 Title :
|
|
990 Usage :
|
|
991 Function :
|
|
992 Args :
|
|
993 Returns :
|
|
994
|
|
995 =cut
|
|
996
|
|
997 sub _readAlign {
|
|
998 my ( $self, $fh, $hmmRes ) = @_;
|
|
999
|
|
1000 }
|
|
1001
|
|
1002 #Parse the alignment section
|
|
1003 #if($pp){
|
|
1004
|
|
1005 #}else{
|
|
1006 # while(<HS>){
|
|
1007 # last if(/^\/\//)
|
|
1008 # }
|
|
1009 #}
|
|
1010
|
|
1011
|
|
1012
|
|
1013 sub _readFooter {
|
|
1014 my($self, $fh, $hmmRes ) = @_;
|
|
1015
|
|
1016 # We are going to parse something like this!
|
|
1017
|
|
1018 # Internal pipeline statistics summary:
|
|
1019 #-------------------------------------
|
|
1020 #Query sequence(s): 1 (360 residues)
|
|
1021 #Target model(s): 7 (836 nodes)
|
|
1022 #Passed MSV filter: 2 (0.285714); expected 0.1 (0.02)
|
|
1023 #Passed Vit filter: 1 (0.142857); expected 0.0 (0.001)
|
|
1024 #Passed Fwd filter: 1 (0.142857); expected 0.0 (1e-05)
|
|
1025 #Initial search space (Z): 7 [actual number of targets]
|
|
1026 #Domain search space (domZ): 1 [number of targets reported over threshold]
|
|
1027 ## CPU time: 0.00u 0.00s 00:00:00.00 Elapsed: 00:00:00
|
|
1028 ## Mc/sec: inf
|
|
1029 #//
|
|
1030
|
|
1031 while(<$fh>){
|
|
1032 if(/\/\//){
|
|
1033 last;
|
|
1034 }
|
|
1035 }
|
|
1036 }
|
|
1037
|
|
1038
|
|
1039 #Parse the internal summary section
|
|
1040 #Internal statistics summary:
|
|
1041 #----------------------------
|
|
1042 #Query HMM(s): 1 (0 nodes)
|
|
1043 #Target sequences: 5323441 (0 residues)
|
|
1044 #Passed MSV filter: 116519 (-37389918065567040729448769671768824784852036328367855636063687997915136.000; expected 19991592792512146725679052970637918208.000)
|
|
1045 #Passed Vit filter: 7579 (-0.0000; expected -35982214160587876085407389642471051723332987952235753317595472501307733302049608744822636544.0000)
|
|
1046 #Passed Fwd filter: 1687 (8.3e+165; expected -7.5e-266)
|
|
1047 #Mc/sec: 828.85
|
|
1048 # CPU time: 115.36u 4.45s 00:01:59.81 Elapsed: 00:03:01
|
|
1049
|
|
1050 #sub writeHMMSearch {
|
|
1051 # my ( $self, $hmmRes ) = @_;
|
|
1052 # my $fh;
|
|
1053 # open($fh, ">".$self->outfile."\n");
|
|
1054 #
|
|
1055 # $self->_writeHeader($fh, $hmmRes);
|
|
1056 # $self->_writeSeqHits( $fh, $hmmRes);
|
|
1057 # $self->_writeDomHits( $fh, $hmmRes);
|
|
1058 # $self->_writeAlign( $fh, $hmmRes) if($self->align);
|
|
1059 # $self->_writeInternalSummary( $fh, $hmmRes);
|
|
1060 #}
|
|
1061 #sub mergeHMMSearch {
|
|
1062 # my ( $self, $filenames ) = @_;
|
|
1063 #}
|
|
1064
|
|
1065
|
|
1066 sub write_ascii_out {
|
|
1067
|
|
1068 my ($self, $HMMResults, $fh, $scanData, $e_seq, $e_dom, $b_seq, $b_dom) = @_;
|
|
1069
|
|
1070
|
|
1071 $scanData->{_max_seqname} = 20 unless($scanData->{_max_seqname} or $scanData->{_max_seqname} < 1);
|
|
1072
|
|
1073 my $ga;
|
|
1074
|
|
1075 if($e_seq or $e_dom) {
|
|
1076 $e_seq = $e_dom unless($e_seq);
|
|
1077 $e_dom = "10" unless($e_dom);
|
|
1078 }
|
|
1079 elsif($b_seq or $b_dom) {
|
|
1080 $b_seq = $b_dom unless($b_seq);
|
|
1081 $b_dom = "0" unless($b_dom);
|
|
1082 }
|
|
1083 else {
|
|
1084 $ga = 1;
|
|
1085 }
|
|
1086
|
|
1087
|
|
1088 foreach my $unit ( sort { $a->seqFrom <=> $b->seqFrom } @{ $HMMResults->units } ) {
|
|
1089
|
|
1090 if($unit->name =~ /Pfam\-B/) {
|
|
1091
|
|
1092 next unless($HMMResults->seqs->{$unit->name}->evalue <= "0.001" and $unit->evalue <= "0.001");
|
|
1093
|
|
1094
|
|
1095 printf $fh "%-".$scanData->{_max_seqname}."s %6d %6d %6d %6d %-11s %-16s %7s %5d %5d %5d %8s %9s %3s %-8s\n",
|
|
1096 $HMMResults->seqName,
|
|
1097 $unit->seqFrom,
|
|
1098 $unit->seqTo,
|
|
1099 $unit->envFrom,
|
|
1100 $unit->envTo,
|
|
1101 $scanData->{_accmap}->{ $unit->name },
|
|
1102 $unit->name,
|
|
1103 "Pfam-B",
|
|
1104 $unit->hmmFrom,
|
|
1105 $unit->hmmTo,
|
|
1106 $scanData->{_model_len}->{ $unit->name },
|
|
1107 $unit->bits,
|
|
1108 $unit->evalue,
|
|
1109 "NA",
|
|
1110 "NA";
|
|
1111
|
|
1112
|
|
1113 }
|
|
1114 else {
|
|
1115
|
|
1116 #Filter results based on thresholds
|
|
1117 if($ga) {
|
|
1118 next unless($unit->sig);
|
|
1119 }
|
|
1120 if($e_seq) {
|
|
1121 next unless($HMMResults->seqs->{$unit->name}->evalue <= $e_seq and $unit->evalue <= $e_dom);
|
|
1122 }
|
|
1123 if($b_seq) {
|
|
1124
|
|
1125 next unless($HMMResults->seqs->{$unit->name}->bits >= $b_seq and $unit->bits >= $b_dom);
|
|
1126 }
|
|
1127
|
|
1128 my $clan = $scanData->{_clanmap}->{ $unit->name } || "No_clan";
|
|
1129
|
|
1130
|
|
1131 printf $fh "%-".$scanData->{_max_seqname}."s %6d %6d %6d %6d %-11s %-16s %7s %5d %5d %5d %8s %9s %3d %-8s ",
|
|
1132 $HMMResults->seqName,
|
|
1133 $unit->seqFrom,
|
|
1134 $unit->seqTo,
|
|
1135 $unit->envFrom,
|
|
1136 $unit->envTo,
|
|
1137 $scanData->{_accmap}->{ $unit->name },
|
|
1138 $unit->name,
|
|
1139 $scanData->{_type}->{ $unit->name },
|
|
1140 $unit->hmmFrom,
|
|
1141 $unit->hmmTo,
|
|
1142 $scanData->{_model_len}->{ $unit->name },
|
|
1143 $unit->bits,
|
|
1144 $unit->evalue,
|
|
1145 $unit->sig,
|
|
1146 $clan;
|
|
1147
|
|
1148
|
|
1149 if($unit->{'act_site'}) {
|
|
1150 local $" = ",";
|
|
1151 print $fh "predicted_active_site[@{$unit->{'act_site'}}]";
|
|
1152 }
|
|
1153
|
|
1154 if($scanData->{_translate}){
|
|
1155 my $strand = '?';
|
|
1156 my $start = '-';
|
|
1157 my $end = '-';
|
|
1158 if(exists($scanData->{_orf}->{$HMMResults->seqName})){
|
|
1159 $strand = $scanData->{_orf}->{$HMMResults->seqName}->{strand};
|
|
1160 if($strand eq '+'){
|
|
1161 $start = $scanData->{_orf}->{$HMMResults->seqName}->{start} + ($unit->envFrom * 3) - 3;
|
|
1162 $end = $scanData->{_orf}->{$HMMResults->seqName}->{start} + ($unit->envTo * 3) - 3;
|
|
1163 }elsif($strand eq '-'){
|
|
1164 $start = $scanData->{_orf}->{$HMMResults->seqName}->{start} - ($unit->envFrom * 3) + 3;
|
|
1165 $end = $scanData->{_orf}->{$HMMResults->seqName}->{start} - ($unit->envTo * 3) + 3;
|
|
1166 }
|
|
1167 }
|
|
1168 print $fh "$strand $start $end";
|
|
1169 }
|
|
1170
|
|
1171 print $fh "\n";
|
|
1172 }
|
|
1173
|
|
1174 if($scanData->{_align}){
|
|
1175 print $fh sprintf( "%-10s %s\n", "#HMM", $unit->hmmalign->{hmm} );
|
|
1176 print $fh sprintf( "%-10s %s\n", "#MATCH", $unit->hmmalign->{match} );
|
|
1177 print $fh sprintf( "%-10s %s\n", "#PP", $unit->hmmalign->{pp});
|
|
1178 print $fh sprintf( "%-10s %s\n", "#SEQ", $unit->hmmalign->{seq});
|
|
1179 print $fh sprintf( "%-10s %s\n", "#CS", $unit->hmmalign->{cs}) if($unit->hmmalign->{cs});
|
|
1180 }
|
|
1181
|
|
1182 }
|
|
1183
|
|
1184 }
|
|
1185
|
|
1186 1;
|