annotate pfamScan/Bio/Pfam/HMM/HMMIO.pm @ 0:68a3648c7d91 draft default tip

Uploaded
author matteoc
date Thu, 22 Dec 2016 04:45:31 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
1 # HMM.pm
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
2 #
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
3 # Author: finnr
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
4 # Maintainer: $Id: HMMIO.pm,v 1.3 2010-01-12 17:00:26 jm14 Exp $
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
5 # Version: $Revision: 1.3 $
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
6 # Created: Nov 24, 2008
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
7 # Last Modified: $Date: 2010-01-12 17:00:26 $
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
8 =head1 NAME
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
9
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
10 Template - a short description of the class
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
11
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
12 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
13
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
14 package Bio::Pfam::HMM::HMMIO;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
15
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
16 =head1 DESCRIPTION
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
17
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
18 A more detailed description of what this class does and how it does it.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
19
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
20 $Id: HMMIO.pm,v 1.3 2010-01-12 17:00:26 jm14 Exp $
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
21
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
22 =head1 COPYRIGHT
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
23
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
24 File: HMM.pm
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
25
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
26 Copyright (c) 2007: Genome Research Ltd.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
27
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
28 Authors: Rob Finn (rdf@sanger.ac.uk), John Tate (jt6@sanger.ac.uk)
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
29
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
30 This is free software; you can redistribute it and/or
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
31 modify it under the terms of the GNU General Public License
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
32 as published by the Free Software Foundation; either version 2
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
33 of the License, or (at your option) any later version.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
34
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
35 This program is distributed in the hope that it will be useful,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
36 but WITHOUT ANY WARRANTY; without even the implied warranty of
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
37 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
38 GNU General Public License for more details.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
39
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
40 You should have received a copy of the GNU General Public License
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
41 along with this program; if not, write to the Free Software
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
42 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
43 or see the on-line version at http://www.gnu.org/copyleft/gpl.txt
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
44
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
45 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
46
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
47 use strict;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
48 use warnings;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
49
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
50 use Moose;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
51 use Moose::Util::TypeConstraints;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
52 use Carp;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
53 use Bio::Pfam::HMM::HMM;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
54
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
55 #-------------------------------------------------------------------------------
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
56
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
57 =head1 METHODS
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
58
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
59 =cut
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
60 sub readHMM {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
61 my ($this, $hmm) = @_;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
62
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
63 unless($hmm){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
64 confess("No HMM passed in!");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
65 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
66 chomp($hmm);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
67
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
68 my @input;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
69 if(ref($hmm) eq 'GLOB'){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
70 @input = <$hmm>;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
71 }elsif($hmm !~ /\n/ and -e $hmm and -s $hmm){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
72 #Assume that we have a filename and try and open it;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
73 open(HMM, $hmm) || confess("Could not open $hmm:[$!]");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
74 @input = <HMM>;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
75 }else{
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
76 @input = split(/\n/, $hmm);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
77 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
78
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
79
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
80
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
81 #Parse the header section!
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
82 #HMMER3/f [3.1b1 | May 2013]
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
83 #NAME SEED
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
84 #ACC PF000001.1
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
85 #DESC A description
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
86 #LENG 55
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
87 #ALPH amino
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
88 #RF no
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
89 #MM no
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
90 #CONS yes
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
91 #CS no
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
92 #MAP yes
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
93 #DATE Fri Nov 21 09:58:16 2008
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
94 #COM [1] /Users/finnr/Work/Software/hmmer-3.0.20081101/bin/hmmbuild -o hmmbuild.log HMM SEED
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
95 #NSEQ 279
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
96 #EFFN 4.966292
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
97 #STATS LOCAL MSV -11.4716 0.69948
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
98 #STATS LOCAL VITERBI -12.3713 0.69948
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
99 #STATS LOCAL FORWARD -5.5807 0.69948
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
100
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
101 #To add GA, TC, NC, CKSUM, DESC
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
102 my($objHash);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
103 my $i =0;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
104 foreach ( @input ){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
105 if(my ($version) = $_ =~ /(HMMER3.*)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
106 $objHash->{version} = $version;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
107 }elsif(my ($acc) = $_ =~ /^ACC\s+(PF\d+\.\d+)$/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
108 $objHash->{accession} = $acc;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
109 }elsif(/NAME\s+(\S+)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
110 $objHash->{name} = $1 ;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
111 }elsif(/DESC\s+(.*)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
112 $objHash->{description} = $1 ;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
113 }elsif(my ($length) = $_ =~ /^LENG\s+(\d+)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
114 $objHash->{length} = $length;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
115 }elsif( my ($alpha) = $_ =~ /^ALPH\s+(\S+)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
116 $objHash->{alpha} = $alpha;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
117 }elsif( my ($rf) = $_ =~ /^RF\s+(no|yes)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
118 $objHash->{rf} = ($rf eq "no") ? 0 : 1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
119 }elsif( my ($mm) = $_ =~ /^MM\s+(no|yes)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
120 $objHash->{mm} = ($mm eq "no") ? 0 : 1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
121 }elsif( my ($cons) = $_ =~ /^CONS\s+(no|yes)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
122 $objHash->{cons} = ($cons eq "no") ? 0 : 1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
123 }elsif(my ($cs) = $_ =~ /^CS\s+(no|yes)/ ){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
124 $objHash->{cs} = ($cs eq "no") ? 0 : 1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
125 }elsif(my ($map) = $_ =~ /^MAP\s+(no|yes)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
126 $objHash->{map} = ($map eq "no") ? 0 : 1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
127 }elsif(my ($date) = $_ =~ /^DATE\s+(.*)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
128 $objHash->{date} = $date;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
129 }elsif(my ($sm) = $_ =~ /^SM\s+(.*)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
130 $objHash->{searchMethod} = $sm;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
131
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
132 }elsif(my ($options, $hmmName, $alignName) = $input[$i] =~ /^BM.*hmmbuild(.*)? (\S+) (\S+)$/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
133 $objHash->{buildLine} = { cmd => 'hmmbuild',
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
134 options => $options,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
135 name => $hmmName,
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
136 align => $alignName } ;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
137 }elsif(my($noSeqs) = $_ =~ /^NSEQ\s+(\d+)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
138 $objHash->{nSeq} = $noSeqs;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
139 }elsif( my($effn) = $_ =~ /^EFFN\s+(\d+\.\d+)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
140 #EFFN 4.966292
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
141 $objHash->{effn} = $effn ;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
142 }elsif( my ( $cksum ) = $_ =~ /^CKSUM (\d+)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
143 $objHash->{cksum} = $cksum ;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
144 }elsif(/GA\s+(\S+)\s+(\S+)\;/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
145 $objHash->{seqGA} = $1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
146 $objHash->{domGA} = $2;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
147 }elsif(/TC\s+(\S+)\s+(\S+)\;/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
148 $objHash->{seqTC} = $1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
149 $objHash->{domTC} = $2;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
150 }elsif(/NC\s+(\S+)\s+(\S+)\;/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
151 $objHash->{seqNC} = $1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
152 $objHash->{domNC} = $2;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
153 }elsif( my ($msv_mu, $msv_lambda ) = $_ =~ /^STATS LOCAL MSV\s+(\S+)\s+(0\.\d+)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
154 $objHash->{msvStats} = { mu => $msv_mu, lambda => $msv_lambda};
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
155 }elsif( my ($viterbi_mu, $viterbi_lambda ) = $_ =~ /^STATS LOCAL VITERBI\s+(\S+)\s+(0\.\d+)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
156 $objHash->{viterbiStats} = { mu => $viterbi_mu, lambda => $viterbi_lambda };
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
157 }elsif( my ($forward_tau, $forward_lambda ) = $_ =~ /^STATS LOCAL FORWARD\s+(\S+)\s+(0\.\d+)/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
158 $objHash->{forwardStats} = {tau => $forward_tau, lambda => $forward_lambda};
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
159 }elsif( $_ =~ /^HMM\s+A/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
160 last;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
161 }else{
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
162 confess("Got a bad HMM header line:$input[$i]\n");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
163 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
164 $i++;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
165 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
166
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
167 my $hmmObj = Bio::Pfam::HMM::HMM->new($objHash);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
168
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
169
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
170 #The next two lines are stand lines
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
171 #HMM A C D E F G H I K L M N P Q R S T V W Y
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
172 # m->m m->i m->d i->m i->i d->m d->d
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
173 $i++;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
174
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
175
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
176 #Add the comp line
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
177 for( my $line = 0; $line <=2; $line++){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
178 $i++;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
179 my @l = split(/\s+/, $input[$i]);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
180 my @c;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
181 if($line == 0 ){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
182 @c = @l[2..21];
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
183 }elsif( $line == 1){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
184 @c = @l[1..20];
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
185 }elsif($line == 2){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
186 @c = @l[1..7];
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
187 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
188 $hmmObj->compLines->[$line] = \@c;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
189 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
190
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
191
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
192
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
193 for(my $pos = 0; $pos < $hmmObj->length; $pos++){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
194 #There are three lines per position - match emission line, insert emission line, state transition line
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
195 for( my $line = 0; $line <=2; $line++){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
196 $i++;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
197 my @l = split(/\s+/, $input[$i]);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
198 my @e;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
199 if($line == 0 ){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
200 @e = @l[2..21];
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
201 if($hmmObj->map){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
202 $hmmObj->mapPos->[$pos] = $l[22];
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
203 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
204 }elsif( $line == 1){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
205 @e = @l[1..20];
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
206 }elsif($line == 2){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
207 @e = @l[1..7];
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
208 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
209 $hmmObj->emissionLines->[$pos]->[$line] = \@e;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
210 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
211 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
212
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
213 if($input[$i++] =~ /^\/\/$/){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
214 confess("Expected file terminator //, but got $input[$i]\n");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
215 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
216
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
217 #No veryifiy that we have COMP line and the the number of emissionlines is equivalent to length
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
218 unless(scalar( @{ $hmmObj->emissionLines } ) == $hmmObj->length){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
219 confess("Number of emssionLines does not match the length of the model, got ".scalar( @{ $hmmObj->emissionLines} ).
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
220 " expected ".$hmmObj->length);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
221 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
222
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
223 unless($hmmObj->compLines){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
224 confess("No compLine set on HMM");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
225 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
226
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
227 if($hmmObj->map){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
228 unless(scalar(@{$hmmObj->mapPos}) == $hmmObj->length ){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
229 confess("HMM object had map set, but the number of map positions does not match the length of the HMM");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
230 };
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
231 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
232 return $hmmObj;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
233 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
234
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
235
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
236
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
237 sub writeHMM {
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
238 my ($this, $hmm, $hmmObj) = @_;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
239
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
240 unless($hmm){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
241 confess("No HMM out file passed in!");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
242 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
243
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
244 unless(ref($hmm) eq 'GLOB'){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
245 my $hmmFile = $hmm;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
246 $hmm = undef;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
247 #Assume that we have a filename and try and open it;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
248 open($hmm, ">$hmmFile") || confess("Could not open $hmmFile:[$!]");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
249 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
250
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
251 print $hmm $hmmObj->version."\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
252 printf $hmm ("%-5s %s\n", "NAME", $hmmObj->name);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
253 printf $hmm ("%-5s %s\n", "ACC", $hmmObj->accession) if($hmmObj->accession);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
254 printf $hmm ("%-5s %s\n", "DESC", $hmmObj->description) if($hmmObj->description);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
255 printf $hmm ("%-5s %d\n", "LENG", $hmmObj->length);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
256 printf $hmm ("%-5s %s\n", "ALPH", $hmmObj->alpha);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
257 printf $hmm ("%-5s %s\n", "RF", ($hmmObj->rf ? "yes" : "no"));
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
258 printf $hmm ("%-5s %s\n", "MM", ($hmmObj->mm ? "yes" : "no"));
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
259 printf $hmm ("%-5s %s\n", "CONS", ($hmmObj->cons ? "yes" : "no"));
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
260 printf $hmm ("%-5s %s\n", "CS", ($hmmObj->cs ? "yes" : "no"));
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
261 printf $hmm ("%-5s %s\n", "MAP", ($hmmObj->map ? "yes" : "no"));
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
262 printf $hmm ("%-5s %s\n", "DATE", $hmmObj->date);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
263 printf $hmm ("%-5s %d\n", "NSEQ", $hmmObj->nSeq);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
264 printf $hmm ("%-5s %f\n", "EFFN", $hmmObj->effn);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
265 printf $hmm ("%-5s %d\n", "CKSUM", $hmmObj->cksum);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
266 printf $hmm ("%-5s %.2f %.2f;\n", "GA", $hmmObj->seqGA, $hmmObj->domGA) if(defined($hmmObj->seqGA));
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
267 printf $hmm ("%-5s %.2f %.2f;\n", "TC", $hmmObj->seqTC, $hmmObj->domTC) if(defined($hmmObj->seqTC));
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
268 printf $hmm ("%-5s %.2f %.2f;\n", "NC", $hmmObj->seqNC, $hmmObj->domNC) if(defined($hmmObj->seqNC));
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
269
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
270 printf $hmm ("%-5s %s %-9s %.4f %.5f\n", "STATS", "LOCAL", "MSV", $hmmObj->msvStats->{mu}, $hmmObj->msvStats->{lambda});
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
271 printf $hmm ("%-5s %s %-9s %.4f %.5f\n", "STATS", "LOCAL", "VITERBI", $hmmObj->viterbiStats->{mu}, $hmmObj->viterbiStats->{lambda});
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
272 printf $hmm ("%-5s %s %-9s %.4f %.5f\n", "STATS", "LOCAL", "FORWARD", $hmmObj->forwardStats->{tau}, $hmmObj->forwardStats->{lambda});
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
273
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
274 print $hmm <<EOF;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
275 HMM A C D E F G H I K L M N P Q R S T V W Y
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
276 m->m m->i m->d i->m i->i d->m d->d
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
277 EOF
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
278
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
279 printf $hmm ("%7s ", "COMPO");
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
280 foreach my $s (@{$hmmObj->compLines->[0]}){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
281 printf $hmm (" %7s", $s);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
282 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
283 print $hmm "\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
284
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
285 print $hmm (" " x 8);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
286 foreach my $s (@{$hmmObj->compLines->[1]}){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
287 printf $hmm (" %7s", $s);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
288 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
289 print $hmm "\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
290
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
291 print $hmm (" " x 8);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
292 foreach my $s (@{$hmmObj->compLines->[2]}){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
293 printf $hmm (" %7s", $s);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
294 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
295 print $hmm "\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
296
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
297
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
298 my $pos = 1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
299 foreach my $el (@{ $hmmObj->emissionLines }){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
300 printf $hmm ("%7s ", $pos);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
301 foreach my $s (@{$el->[0]}){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
302 printf $hmm (" %7s", $s);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
303 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
304 if($hmmObj->map){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
305 printf $hmm ("%7s - -\n", $hmmObj->mapPos->[$pos - 1]);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
306 }else{
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
307 print $hmm "\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
308 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
309 print $hmm (" " x 8);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
310 foreach my $s (@{$el->[1]}){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
311 printf $hmm (" %7s", $s);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
312 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
313 print $hmm "\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
314 print $hmm (" " x 8);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
315 foreach my $s (@{$el->[2]}){
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
316 printf $hmm (" %7s", $s);
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
317 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
318 print $hmm "\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
319 $pos++;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
320 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
321
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
322
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
323 print $hmm "//\n";
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
324 }
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
325
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
326
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
327
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
328 1;
68a3648c7d91 Uploaded
matteoc
parents:
diff changeset
329