annotate clustalomega/clustal-omega-1.0.2/src/hhalign/hhhmm-C.h @ 1:bc707542e5de

Uploaded
author clustalomega
date Thu, 21 Jul 2011 13:35:08 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1 /* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
3 /*********************************************************************
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
4 * Clustal Omega - Multiple sequence alignment
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
5 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
6 * Copyright (C) 2010 University College Dublin
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
7 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
8 * Clustal-Omega is free software; you can redistribute it and/or
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
9 * modify it under the terms of the GNU General Public License as
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
10 * published by the Free Software Foundation; either version 2 of the
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
11 * License, or (at your option) any later version.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
12 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
13 * This file is part of Clustal-Omega.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
14 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
15 ********************************************************************/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
16
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
17 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
18 * RCS $Id: hhhmm-C.h 224 2011-03-23 12:13:33Z fabian $
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
19 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
20
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
21
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
22 // hhhmm.C
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
23
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
24 #ifndef MAIN
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
25 #define MAIN
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
26 #include <iostream> // cin, cout, cerr
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
27 #include <fstream> // ofstream, ifstream
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
28 #include <stdio.h> // printf
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
29 using std::cout;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
30 using std::cerr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
31 using std::endl;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
32 using std::ios;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
33 using std::ifstream;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
34 using std::ofstream;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
35 #include <stdlib.h> // exit
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
36 #include <string> // strcmp, strstr
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
37 #include <math.h> // sqrt, pow
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
38 #include <limits.h> // INT_MIN
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
39 #include <float.h> // FLT_MIN
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
40 #include <time.h> // clock
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
41 #include <ctype.h> // islower, isdigit etc
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
42 #include "util-C.h" // imax, fmax, iround, iceil, ifloor, strint, strscn, strcut, substr, uprstr, uprchr, Basename etc.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
43 #include "list.h" // list data structure
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
44 #include "hash.h" // hash data structure
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
45 #include "hhdecl-C.h"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
46 #include "hhutil-C.h" // imax, fmax, iround, iceil, ifloor, strint, strscn, strcut, substr, uprstr, uprchr, Basename etc.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
47 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
48
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
49 // #ifndef WNLIB
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
50 // #define WNLIB
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
51 // #include "wnconj.h" // Will Naylor's wnlib for optimization in C
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
52 // #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
53
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
54 //////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
55 //// Class HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
56 //////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
57
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
58 //////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
59 // Object constructor
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
60 //////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
61 HMM::HMM(int maxseqdis, int maxres)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
62 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
63 sname = new char*[maxseqdis]; // names of stored sequences
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
64 for (int i = 0; i < maxseqdis; i++){ sname[i] = NULL;}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
65 seq = new char*[maxseqdis]; // residues of stored sequences (first at pos 1!)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
66 for (int i = 0; i < maxseqdis; i++){ seq[i] = NULL;}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
67 Neff_M = new float[maxres]; // Neff_M[i] = diversity of subalignment of seqs that have residue in col i
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
68 Neff_I = new float[maxres]; // Neff_I[i] = diversity of subalignment of seqs that have insert in col i
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
69 Neff_D = new float[maxres]; // Neff_D[i] = diversity of subalignment of seqs that have delete in col i
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
70 longname = new char[DESCLEN]; // Full name of first sequence of original alignment (NAME field)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
71 ss_dssp = new char[maxres]; // secondary structure determined by dssp 0:- 1:H 2:E 3:C 4:S 5:T 6:G 7:B
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
72 sa_dssp = new char[maxres]; // solvent accessibility state determined by dssp 0:- 1:A (absolutely buried) 2:B 3:C 4:D 5:E (exposed)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
73 ss_pred = new char[maxres]; // predicted secondary structure 0:- 1:H 2:E 3:C
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
74 ss_conf = new char[maxres]; // confidence value of prediction 0:- 1:0 ... 10:9
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
75 Xcons = NULL; // create only when needed: consensus sequence in internal representation (A=0 R=1 N=2 D=3 ...)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
76 l = new int[maxres]; // l[i] = pos. of j'th match state in aligment
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
77 /* FS introduced sentinel, NULL terminates loop in destructor, FS, r221->222 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
78 f = new float*[maxres+1]; f[maxres] = NULL; // f[i][a] = prob of finding amino acid a in column i WITHOUT pseudocounts
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
79 g = new float*[maxres+1]; g[maxres] = NULL; // f[i][a] = prob of finding amino acid a in column i WITH pseudocounts
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
80 p = new float*[maxres+1]; p[maxres] = NULL; // p[i][a] = prob of finding amino acid a in column i WITH OPTIMUM pseudocounts
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
81 tr = new float*[maxres+1]; tr[maxres] = NULL; // log2 of transition probabilities M2M M2I M2D I2M I2I D2M D2D M2M_GAPOPEN GAPOPEN GAPEXTD
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
82 // tr_lin = new float*[maxres]; // linear transition probabilities M2M M2I M2D I2M I2I D2M D2D M2M_GAPOPEN GAPOPEN GAPEXTD
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
83 for (int i=0; i<maxres; i++) {f[i]=new(float[NAA+3]);}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
84 for (int i=0; i<maxres; i++) {g[i]=new(float[NAA]);}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
85 for (int i=0; i<maxres; i++) {p[i]=new(float[NAA]);}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
86 for (int i=0; i<maxres; i++) {tr[i]=new(float[NTRANS]);}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
87 // for (int i=0; i<maxres; i++) tr_lin[i]=new(float[NTRANS]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
88 L=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
89 Neff_HMM=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
90 n_display=N_in=N_filtered=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
91 nss_dssp=nsa_dssp=nss_pred=nss_conf=nfirst=ncons=-1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
92 // lamda_hash.New(37,0.0); // Set size and NULL element for hash
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
93 // mu_hash.New(37,0.0); // Set size and NULL element for hash
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
94 lamda=0.0; mu=0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
95 name[0]=longname[0]=fam[0]='\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
96 trans_lin=0; // transition probs in log space
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
97 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
98
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
99
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
100 //////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
101 // Object destructor
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
102 //////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
103 HMM::~HMM()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
104 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
105 //Delete name and seq matrices
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
106 if (NULL != sname){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
107 for (int k=0; (k < n_display) && (NULL != sname[k]); k++){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
108 delete[] sname[k]; sname[k] = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
109 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
110 delete[] sname; sname = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
111 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
112 if (NULL != seq){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
113 for (int k=0; (k < n_display) && (NULL != seq[k]); k++){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
114 delete[] seq[k]; seq[k] = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
115 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
116 delete[] seq; seq = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
117 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
118 delete[] Neff_M; Neff_M = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
119 delete[] Neff_D; Neff_D = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
120 delete[] Neff_I; Neff_I = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
121 delete[] longname; longname = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
122 delete[] ss_dssp; ss_dssp = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
123 delete[] sa_dssp; sa_dssp = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
124 delete[] ss_pred; ss_pred = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
125 delete[] ss_conf; ss_conf = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
126 delete[] Xcons; Xcons = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
127 delete[] l; l = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
128 for (int i=0; i</*MAXRES*/par.maxResLen; i++){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
129 if (f[i]){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
130 delete[] f[i]; f[i] = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
131 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
132 else break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
133 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
134 for (int i=0; i</*MAXRES*/par.maxResLen; i++){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
135 if (g[i]){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
136 delete[] g[i]; g[i] = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
137 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
138 else break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
139 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
140 for (int i=0; i</*MAXRES*/par.maxResLen; i++){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
141 if (p[i]){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
142 delete[] p[i]; p[i] = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
143 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
144 else break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
145 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
146 for (int i=0; i</*MAXRES*/par.maxResLen; i++){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
147 if (tr[i]){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
148 delete[] tr[i]; tr[i] = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
149 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
150 else break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
151 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
152 // for (int i=0; i</*MAXRES*/par.maxResLen; i++) if (tr_lin[i]) delete[] tr_lin[i]; else break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
153 delete[] f; f = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
154 delete[] g; g = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
155 delete[] p; p = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
156 delete[] tr; tr = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
157 // delete[] tr_lin;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
158 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
159
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
160 //////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
161 // Deep-copy constructor
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
162 //////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
163 HMM& HMM::operator=(HMM& q)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
164 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
165 L=q.L;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
166 for (int i=0; i<=L+1; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
167 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
168 for (int a=0; a<NAA; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
169 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
170 f[i][a]=q.f[i][a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
171 g[i][a]=q.g[i][a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
172 p[i][a]=q.p[i][a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
173 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
174 for (int a=0; a<NTRANS; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
175 tr[i][a]=q.tr[i][a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
176 ss_dssp[i]=q.ss_dssp[i];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
177 sa_dssp[i]=q.sa_dssp[i];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
178 ss_pred[i]=q.ss_pred[i];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
179 ss_conf[i]=q.ss_conf[i];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
180 l[i]=q.l[i];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
181 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
182 if (q.Xcons)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
183 for (int i=0; i<=L+1; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
184 Xcons[i] =q.Xcons[i];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
185
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
186 n_display=q.n_display;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
187 for (int k=0; k<n_display; k++) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
188 sname[k]=new(char[strlen(q.sname[k])+1]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
189 if (!sname[k]) MemoryError("array of names for sequences to display");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
190 strcpy(sname[k],q.sname[k]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
191 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
192 for (int k=0; k<n_display; k++) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
193 seq[k]=new(char[strlen(q.seq[k])+1]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
194 if (!seq[k]) MemoryError("array of names for sequences to display");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
195 strcpy(seq[k],q.seq[k]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
196 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
197 ncons=q.ncons;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
198 nfirst=q.nfirst;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
199 nss_dssp=q.nss_dssp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
200 nsa_dssp=q.nsa_dssp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
201 nss_pred=q.nss_pred;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
202 nss_conf=q.nss_conf;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
203
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
204 for (int i=0; i<=L+1; ++i) Neff_M[i]=q.Neff_M[i];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
205 for (int i=0; i<=L+1; ++i) Neff_I[i]=q.Neff_I[i];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
206 for (int i=0; i<=L+1; ++i) Neff_D[i]=q.Neff_D[i];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
207 Neff_HMM=q.Neff_HMM;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
208
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
209 strcpy(longname,q.longname);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
210 strcpy(name,q.name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
211 strcpy(fam,q.fam);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
212 strcpy(sfam,q.sfam);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
213 strcpy(fold,q.fold);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
214 strcpy(cl,q.cl);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
215 strcpy(file,q.file);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
216
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
217 lamda=q.lamda;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
218 mu=q.mu;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
219
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
220 for (int a=0; a<NAA; ++a) pav[a]=q.pav[a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
221 N_in=q.N_in;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
222 N_filtered=q.N_filtered;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
223 trans_lin=q.trans_lin;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
224 return (HMM&) (*this);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
225 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
226
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
227
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
228 ///////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
229 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
230 * @brief Read an HMM from an HHsearch .hhm file; return 0 at end of file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
231 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
232 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
233 HMM::Read(FILE* dbf, char* path)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
234 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
235 char line[LINELEN]=""; // input line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
236 char str3[8]="",str4[8]=""; // first 3 and 4 letters of input line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
237 char* ptr; // pointer for string manipulation
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
238 int i=0; // index for match state (first=1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
239 int a; // amino acid index
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
240 static int warn=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
241
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
242 trans_lin=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
243 L=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
244 Neff_HMM=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
245 n_display=N_in=N_filtered=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
246 nss_dssp=nsa_dssp=nss_pred=nss_conf=nfirst=ncons=-1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
247 lamda=mu=0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
248 trans_lin=0; // transition probs in log space
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
249 name[0]=longname[0]=fam[0]='\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
250 //If at the end of while-loop L is still 0 then we have reached end of db file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
251
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
252 //Do not delete name and seq vectors because their adresses are transferred to hitlist as part of a hit!!
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
253
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
254 while (fgetline(line,LINELEN-1,dbf) && !(line[0]=='/' && line[1]=='/'))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
255 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
256
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
257 if (strscn(line)==NULL) continue; // skip lines that contain only white space
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
258 substr(str3,line,0,2); // copy the first three characters into str3
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
259 substr(str4,line,0,3); // copy the first four characters into str4
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
260
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
261 if (!strncmp("HH",line,2)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
262
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
263 if (!strcmp("NAME",str4))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
264 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
265 ptr=strscn(line+4); //advance to first non-white-space character
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
266 if (ptr)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
267 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
268 strncpy(longname,ptr,DESCLEN-1); //copy full name to longname
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
269 longname[DESCLEN-1]='\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
270 strncpy(name,ptr,NAMELEN-1); //copy longname to name...
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
271 strcut(name); //...cut after first word...
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
272 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
273 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
274 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
275 strcpy(longname,"undefined");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
276 strcpy(name,"undefined");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
277 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
278 if (v>=4) cout<<"Reading in HMM "<<name<<":\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
279 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
280
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
281 else if (!strcmp("FAM",str3))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
282 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
283 ptr=strscn(line+3); //advance to first non-white-space character
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
284 if (ptr) strncpy(fam,ptr,IDLEN-1); else strcpy(fam,""); //copy family name to basename
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
285 ScopID(cl,fold,sfam,fam); //get scop classification from basename (e.g. a.1.2.3.4)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
286 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
287
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
288 else if (!strcmp("FILE",str4))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
289 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
290 if (path) strncpy(file,path,NAMELEN-1); else *file='\0'; // copy path to file variable
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
291 ptr=strscn(line+4); //advance to first non-white-space character
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
292 if (ptr)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
293 strncat(file,ptr,NAMELEN-1-strlen(file)); // append file name read from file to path
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
294 else strcat(file,"*");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
295 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
296
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
297 else if (!strcmp("LENG",str4))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
298 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
299 ptr=line+4;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
300 L=strint(ptr); //read next integer (number of match states)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
301 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
302 else if (!strcmp("FILT",str4) || !strcmp("NSEQ",str4))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
303 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
304 ptr=line+4;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
305 N_filtered=strint(ptr); //read next integer: number of sequences after filtering
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
306 N_in=strint(ptr); //read next integer: number of sequences in alignment
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
307 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
308
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
309 else if (!strcmp("NEFF",str4) || !strcmp("NAA",str3)) sscanf(line+6,"%f",&Neff_HMM);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
310
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
311 else if (!strcmp("EVD",str3))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
312 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
313 // char key[IDLEN];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
314 sscanf(line+6,"%f %f",&lamda,&mu);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
315 // sscanf(line+22,"%s",key);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
316 // lamda_hash.Add(key,lamda);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
317 // mu_hash.Add(key,mu);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
318 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
319
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
320 else if (!strcmp("DESC",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
321 else if (!strcmp("COM",str3)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
322 else if (!strcmp("DATE",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
323
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
324 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
325 // Read template sequences that should get displayed in output alignments
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
326 else if (!strcmp("SEQ",str3))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
327 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
328 //char cur_seq[MAXCOL]=""; //Sequence currently read in
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
329 char *cur_seq = new(char[par.maxColCnt]); //Sequence currently read in
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
330 int k; // sequence index; start with -1; after reading name of n'th sequence-> k=n
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
331 int h; // index for character in input line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
332 int l=1; // index of character in sequence seq[k]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
333 int i=1; // index of match states in ss_dssp[i] and ss_pred[i] sequence
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
334 int n_seq=0; // number of sequences to be displayed EXCLUDING ss sequences
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
335 cur_seq[0]='-'; // overwrite '\0' character at beginning to be able to do strcpy(*,cur_seq)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
336 k=-1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
337 while (fgetline(line,LINELEN-1,dbf) && line[0]!='#')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
338 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
339 if (v>=4) cout<<"Read from file:"<<line<<"\n"; //DEBUG
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
340 if (line[0]=='>') //line contains sequence name
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
341 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
342 if (k>=MAXSEQDIS-1) //maximum number of allowable sequences exceeded
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
343 {while (fgetline(line,LINELEN-1,dbf) && line[0]!='#'); break;}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
344 k++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
345 if (!strncmp(line,">ss_dssp",8)) nss_dssp=k;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
346 else if (!strncmp(line,">sa_dssp",8)) nsa_dssp=k;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
347 else if (!strncmp(line,">ss_pred",8)) nss_pred=k;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
348 else if (!strncmp(line,">ss_conf",8)) nss_conf=k;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
349 else if (!strncmp(line,">Cons-",6) || !strncmp(line,">Consensus",10)) ncons=k;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
350 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
351 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
352 if (nfirst==-1) nfirst=k;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
353 if (n_seq>=par.nseqdis)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
354 {while (fgetline(line,LINELEN-1,dbf) && line[0]!='#'); k--; break;}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
355 n_seq++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
356 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
357
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
358 //If this is not the first sequence then store residues of previous sequence
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
359 if (k>0) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
360 seq[k-1]=new(char[strlen(cur_seq)+1]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
361 if (!seq[k-1]) MemoryError("array of sequences to display");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
362 strcpy(seq[k-1],cur_seq);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
363 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
364
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
365 // store sequence name
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
366 strcut(line+1); //find next white-space character and overwrite it with end-of-string character
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
367 sname[k] = new (char[strlen(line+1)+1]); //+1 for terminating '\0'
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
368 if (!sname[k]) MemoryError("array of names for sequences to display");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
369 strcpy(sname[k],line+1); //store sequence name in **name
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
370 l=1; i=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
371 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
372 else //line contains sequence residues
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
373 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
374 if (k==-1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
375 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
376 cerr<<endl<<"WARNING: Ignoring following line while reading HMM"<<name<<":\n\'"<<line<<"\'\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
377 continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
378 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
379
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
380 h=0; //counts characters in current line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
381
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
382 // Check whether all characters are correct; store into cur_seq
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
383 if (k==nss_dssp) // lines with dssp secondary structure states (. - H E C S T G B)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
384 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
385 while (h<LINELEN && line[h]>'\0' && l</*MAXCOL*/par.maxColCnt-1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
386 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
387 if (ss2i(line[h])>=0 && line[h]!='.')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
388 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
389 char c=ss2ss(line[h]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
390 cur_seq[l]=c;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
391 if (c!='.' && !(c>='a' && c<='z')) ss_dssp[i++]=ss2i(c);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
392 l++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
393 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
394 else if (v && ss2i(line[h])==-2)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
395 cerr<<endl<<"WARNING: invalid symbol \'"<<line[h]<<"\' at pos. "<<h<<" in line '"<<line<<"' of HMM "<<name<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
396 h++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
397 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
398 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
399 if (k==nsa_dssp) // lines with dssp secondary solvent accessibility (- A B C D E)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
400 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
401 while (h<LINELEN && line[h]>'\0' && l</*MAXCOL*/par.maxColCnt-1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
402 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
403 if (sa2i(line[h])>=0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
404 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
405 char c=line[h];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
406 cur_seq[l]=c;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
407 if (c!='.' && !(c>='a' && c<='z')) sa_dssp[i++]=sa2i(c);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
408 l++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
409 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
410 else if (v && sa2i(line[h])==-2)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
411 cerr<<endl<<"WARNING: invalid symbol \'"<<line[h]<<"\' at pos. "<<h<<" in line '"<<line<<"' of HMM "<<name<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
412 h++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
413 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
414 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
415 else if (k==nss_pred) // lines with predicted secondary structure (. - H E C)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
416 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
417 while (h<LINELEN && line[h]>'\0' && l</*MAXCOL*/par.maxColCnt-1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
418 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
419 if (ss2i(line[h])>=0 && ss2i(line[h])<=3 && line[h]!='.')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
420 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
421 char c=ss2ss(line[h]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
422 cur_seq[l]=c;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
423 if (c!='.' && !(c>='a' && c<='z')) ss_pred[i++]=ss2i(c);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
424 l++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
425 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
426 else if (v && ss2i(line[h])==-2)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
427 cerr<<endl<<"WARNING: invalid symbol \'"<<line[h]<<"\' at pos. "<<h<<" in line '"<<line<<"' of HMM "<<name<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
428 h++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
429 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
430 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
431 else if (k==nss_conf) // lines with confidence values should contain only 0-9, '-', or '.'
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
432 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
433 while (h<LINELEN && line[h]>'\0' && l</*MAXCOL*/par.maxColCnt-1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
434 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
435 if (line[h]=='-' || (line[h]>='0' && line[h]<='9'))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
436 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
437 cur_seq[l]=line[h];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
438 ss_conf[l]=cf2i(line[h]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
439 l++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
440 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
441 else if (v && cf2i(line[h])==-2)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
442 cerr<<endl<<"WARNING: invalid symbol \'"<<line[h]<<"\' at pos. "<<h<<" in line '"<<line<<"' of HMM "<<name<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
443 h++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
444 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
445 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
446 else // normal line containing residues
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
447 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
448 while (h<LINELEN && line[h]>'\0' && l</*MAXCOL*/par.maxColCnt-1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
449 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
450 if (aa2i(line[h])>=0 && line[h]!='.') // ignore '.' and white-space characters ' ', \t and \n (aa2i()==-1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
451 {cur_seq[l]=line[h]; l++;}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
452 else if (aa2i(line[h])==-2 && v)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
453 cerr<<endl<<"WARNING: invalid symbol \'"<<line[h]<<"\' at pos. "<<h<<" in line '"<<line<<"' of HMM "<<name<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
454 h++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
455 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
456 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
457 cur_seq[l]='\0'; //Ensure that cur_seq ends with a '\0' character
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
458
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
459 } //end else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
460 } //while(getline)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
461 //If this is not the first sequence some residues have already been read in
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
462 if (k>=0) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
463 seq[k]=new(char[strlen(cur_seq)+1]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
464 if (!seq[k]) MemoryError("array of sequences to display");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
465 strcpy(seq[k],cur_seq);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
466 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
467 n_display=k+1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
468
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
469 // DEBUG
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
470 if (v>=4)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
471 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
472 printf("nss_dssp=%i nsa_dssp=%i nss_pred=%i nss_conf=%i nfirst=%i\n",nss_dssp,nsa_dssp,nss_pred,nss_conf,nfirst);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
473 for (k=0; k<n_display; k++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
474 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
475 int j;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
476 cout<<">"<<sname[k]<<"(k="<<k<<")\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
477 if (k==nss_dssp) {for (j=1; j<=L; j++) cout<<char(i2ss(ss_dssp[j]));}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
478 else if (k==nsa_dssp) {for (j=1; j<=L; j++) cout<<char(i2sa(sa_dssp[j]));}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
479 else if (k==nss_pred) {for (j=1; j<=L; j++) cout<<char(i2ss(ss_pred[j]));}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
480 else if (k==nss_conf) {for (j=1; j<=L; j++) cout<<int(ss_conf[j]-1);}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
481 else {for (j=1; j<=L; j++) cout<<seq[k][j];}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
482 cout<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
483 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
484 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
485
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
486 } //end if("SEQ")
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
487
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
488 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
489 // Read average amino acid frequencies for HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
490 else if (!strcmp("FREQ",str4))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
491 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
492 fprintf(stderr,"Error: hhm file has obsolete format.\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
493 fprintf(stderr,"Please use hhmake version > 1.1 to generate hhm files.\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
494 exit(1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
495 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
496
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
497 else if (!strcmp("AVER",str4)) {} // AVER line scrapped
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
498 else if (!strcmp("NULL",str4))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
499 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
500 ptr=line+4;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
501 for (a=0; a<20 && ptr; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
502 //s2[a]: transform amino acids Sorted by alphabet -> internal numbers for amino acids
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
503 pb[s2a[a]] = (float) fpow2(float(-strinta(ptr))/HMMSCALE);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
504 if (!ptr) return Warning(dbf,line,name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
505 if (v>=4)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
506 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
507 printf("\nNULL ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
508 for (a=0; a<20; ++a) printf("%5.1f ",100.*pb[s2a[a]]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
509 printf("\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
510 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
511 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
512
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
513 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
514 // Read transition probabilities from start state
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
515 else if (!strcmp("HMM",str3))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
516 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
517 fgetline(line,LINELEN-1,dbf); // Skip line with amino acid labels
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
518 fgetline(line,LINELEN-1,dbf); // Skip line with transition labels
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
519 ptr=line;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
520 for (a=0; a<=D2D && ptr; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
521 tr[0][a] = float(-strinta(ptr))/HMMSCALE; //store transition probabilites as log2 values
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
522 // strinta returns next integer in string and puts ptr to first char
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
523 // after the integer. Returns -99999 if '*' is found.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
524 // ptr is set to 0 if no integer is found after ptr.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
525 Neff_M[0] = float(strinta(ptr))/HMMSCALE; // Read eff. number of sequences with M->? transition
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
526 Neff_I[0] = float(strinta(ptr))/HMMSCALE; // Read eff. number of sequences with I->? transition
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
527 Neff_D[0] = float(strinta(ptr))/HMMSCALE; // Read eff. number of sequences with D->? transition
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
528 if (!ptr) return Warning(dbf,line,name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
529
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
530 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
531 // Read columns of HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
532 int next_i=0; // index of next column
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
533 while (fgetline(line,LINELEN-2,dbf) && !(line[0]=='/' && line[1]=='/') && line[0]!='#')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
534 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
535 if (strscn(line)==NULL) continue; // skip lines that contain only white space
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
536
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
537 // Read in AA probabilities
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
538 ptr=line+1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
539 int prev_i = next_i;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
540 next_i = strint(ptr); ++i;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
541 if (v && next_i!=prev_i+1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
542 if (++warn<=5)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
543 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
544 cerr<<endl<<"WARNING: in HMM "<<name<<" state "<<prev_i<<" is followed by state "<<next_i<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
545 if (warn==5) cerr<<endl<<"WARNING: further warnings while reading HMMs will be suppressed.\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
546 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
547 if (i>L)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
548 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
549 cerr<<endl<<"WARNING: in HMM "<<name<<" there are more columns than the stated length "<<L<<". Skipping HMM\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
550 return 2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
551 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
552 if (i>=/*MAXRES*/par.maxResLen-2)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
553 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
554 fgetline(line,LINELEN-1,dbf); // Skip line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
555 continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
556 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
557
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
558 for (a=0; a<20 && ptr; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
559 // f[i][s2a[a]] = (float)pow(2.,float(-strinta(ptr))/HMMSCALE);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
560 f[i][s2a[a]] = fpow2(float(-strinta(ptr))/HMMSCALE); // speed-up ~5 s for 10000 SCOP domains
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
561
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
562 //s2a[a]: transform amino acids Sorted by alphabet -> internal numbers for amino acids
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
563 l[i]=strint(ptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
564 if (!ptr) return Warning(dbf,line,name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
565 if (v>=4)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
566 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
567 printf("%s",line);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
568 printf("%6i ",i);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
569 for (a=0; a<20; ++a) printf("%5.1f ",100*f[i][s2a[a]]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
570 printf("%5i",l[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
571 printf("\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
572 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
573
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
574 // Read transition probabilities
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
575 fgetline(line,LINELEN-1,dbf); // Skip line with amino acid labels
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
576 if (line[0]!=' ' && line[0]!='\t') return Warning(dbf,line,name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
577 ptr=line;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
578 for (a=0; a<=D2D && ptr; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
579 tr[i][a] = float(-strinta(ptr))/HMMSCALE; //store transition prob's as log2-values
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
580 Neff_M[i] = float(strinta(ptr))/HMMSCALE; // Read eff. number of sequences with M->? transition
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
581 Neff_I[i] = float(strinta(ptr))/HMMSCALE; // Read eff. number of sequences with I->? transition
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
582 Neff_D[i] = float(strinta(ptr))/HMMSCALE; // Read eff. number of sequences with D->? transition
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
583 if (!ptr) return Warning(dbf,line,name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
584 if (v>=4)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
585 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
586 printf(" ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
587 for (a=0; a<=D2D; ++a) printf("%5.1f ",100*fpow2(tr[i][a]));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
588 printf("%5.1f %5.1f %5.1f \n",Neff_M[i],Neff_I[i],Neff_D[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
589 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
590 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
591 if (line[0]=='/' && line[1]=='/') break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
592 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
593 else if (v) cerr<<endl<<"WARNING: Ignoring line\n\'"<<line<<"\'\nin HMM "<<name<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
594
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
595 } //while(getline)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
596
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
597 if (L==0) return 0; //End of db file -> stop reading in
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
598
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
599 // Set coefficients of EVD (= 0.0 if not calibrated for these parameters)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
600 // lamda = lamda_hash.Show(par.Key());
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
601 // mu = mu_hash.Show(par.Key());
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
602 if (lamda && v>=3) printf("HMM %s is already calibrated: lamda=%-5.3f, mu=%-5.2f\n",name,lamda,mu);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
603
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
604 if (v && i!=L) cerr<<endl<<"Warning: in HMM "<<name<<" there are only "<<i<<" columns while the stated length is "<<L<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
605 if (v && i>/*MAXRES*/par.maxResLen-2) {i=/*MAXRES*/par.maxResLen-2; cerr<<endl<<"WARNING: maximum number "<</*MAXRES*/par.maxResLen-2<<" of residues exceeded while reading HMM "<<name<<"\n";}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
606 if (v && !i) cerr<<endl<<"WARNING: HMM "<<name<<" contains no match states. Check the alignment that gave rise to this HMM.\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
607 if (v>=2) cout<<"Read in HMM "<<name<<" with "<<L<<" match states and effective number of sequences = "<<Neff_HMM<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
608 L = i;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
609
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
610 // Set emission probabilities of zero'th (begin) state and L+1st (end) state to background probabilities
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
611 for (a=0; a<20; ++a) f[0][a]=f[L+1][a]=pb[a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
612 Neff_M[L+1]=1.0f;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
613 Neff_I[L+1]=Neff_D[L+1]=0.0f;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
614
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
615 return 1; //return status: ok
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
616
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
617 } /* this is the end of HMM::Read() */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
618
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
619
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
620 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
621 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
622 * @brief Read an HMM from a HMMer .hmm file; return 0 at end of file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
623 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
624 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
625 HMM::ReadHMMer(FILE* dbf, char* filestr)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
626 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
627 char line[LINELEN]=""; // input line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
628 char desc[DESCLEN]=""; // description of family
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
629 char str4[5]=""; // first 4 letters of input line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
630 char* ptr; // pointer for string manipulation
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
631 int i=0; // index for match state (first=1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
632 int a; // amino acid index
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
633 char dssp=0; // 1 if a consensus SS has been found in the transition prob lines
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
634 char annot=0; // 1 if at least one annotation character in insert lines is ne '-' or ' '
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
635 int k=0; // index for seq[k]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
636 static char ignore_hmmer_cal = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
637 char* annotchr; // consensus amino acids in ASCII format, or, in HMMER format, the reference annotation character in insert line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
638 //annotchr = new char[MAXRES]; // consensus amino acids in ASCII format, or, in HMMER format, the reference annotation character in insert line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
639 annotchr = new char[par.maxResLen]; // consensus amino acids in ASCII format, or, in HMMER format, the reference annotation character in insert line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
640 static int warn=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
641 int iAlpha = 20; /* size of alphabet, default is protein = 20 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
642 double dAlphaInv = 1.00 / (double)(iAlpha); /* weight of AA */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
643
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
644 trans_lin=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
645 L=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
646 Neff_HMM=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
647 n_display=N_in=N_filtered=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
648 nss_dssp=nsa_dssp=nss_pred=nss_conf=nfirst=ncons=-1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
649 lamda=mu=0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
650 trans_lin=0; // transition probs in log space
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
651 name[0]=longname[0]=desc[0]=fam[0]='\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
652 //If at the end of while-loop L is still 0 then we have reached end of db file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
653
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
654 // Do not delete name and seq vectors because their adresses are transferred to hitlist as part of a hit!!
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
655
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
656 while (fgetline(line,LINELEN-1,dbf) && !(line[0]=='/' && line[1]=='/'))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
657 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
658
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
659 if (strscn(line)==NULL) continue; // skip lines that contain only white space
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
660 if (!strncmp("HMMER",line,5)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
661
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
662 substr(str4,line,0,3); // copy the first four characters into str4
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
663
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
664 if (!strcmp("NAME",str4) && name[0]=='\0')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
665 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
666 ptr=strscn(line+4); // advance to first non-white-space character
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
667 strncpy(name,ptr,NAMELEN-1); // copy full name to name
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
668 strcut(name); // ...cut after first word...
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
669 if (v>=4) cout<<"Reading in HMM "<<name<<":\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
670 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
671
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
672 else if (!strcmp("ACC ",str4))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
673 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
674 ptr=strscn(line+4); // advance to first non-white-space character
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
675 strncpy(longname,ptr,DESCLEN-1); // copy Accession id to longname...
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
676 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
677
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
678 else if (!strcmp("DESC",str4))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
679 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
680 ptr=strscn(line+4); // advance to first non-white-space character
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
681 if (ptr)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
682 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
683 strncpy(desc,ptr,DESCLEN-1); // copy description to name...
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
684 desc[DESCLEN-1]='\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
685 strcut(ptr); // ...cut after first word...
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
686 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
687 if (!ptr || ptr[1]!='.' || strchr(ptr+3,'.')==NULL) strcpy(fam,""); else strcpy(fam,ptr); // could not find two '.' in name?
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
688 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
689
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
690 else if (!strcmp("LENG",str4))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
691 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
692 ptr=line+4;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
693 L=strint(ptr); //read next integer (number of match states)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
694 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
695
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
696 else if (!strcmp("ALPH",str4)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
697
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
698 ptr=strscn(line+4);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
699
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
700 if (0 == strcmp(ptr, "Amino")){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
701 iAlpha = 20;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
702 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
703 else if (0 == strcmp(ptr, "Nucleic")){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
704 iAlpha = 4;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
705 printf("%s:%s:%d: WARNING: HMM reading does not work for DNA/RNA\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
706 __FUNCTION__, __FILE__, __LINE__);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
707 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
708 else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
709 return Warning(dbf,line,name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
710 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
711 dAlphaInv = 1.00 / (double)(iAlpha);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
712 //continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
713 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
714 else if (!strcmp("RF ",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
715 else if (!strcmp("CS ",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
716 else if (!strcmp("MAP ",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
717 else if (!strcmp("COM ",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
718 else if (!strcmp("NSEQ",str4))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
719 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
720 ptr=line+4;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
721 N_in=N_filtered=strint(ptr); //read next integer: number of sequences after filtering
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
722 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
723
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
724 else if (!strcmp("DATE",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
725 else if (!strncmp("CKSUM ",line,5)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
726 else if (!strcmp("GA ",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
727 else if (!strcmp("TC ",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
728 else if (!strcmp("NC ",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
729
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
730 else if (!strncmp("SADSS",line,5))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
731 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
732 if (nsa_dssp<0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
733 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
734 nsa_dssp=k++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
735 seq[nsa_dssp] = new(char[/*MAXRES*/par.maxResLen+2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
736 sname[nsa_dssp] = new(char[15]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
737 strcpy(seq[nsa_dssp]," ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
738 strcpy(sname[nsa_dssp],"sa_dssp");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
739
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
740 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
741 ptr=strscn(line+5);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
742 if (ptr)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
743 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
744 strcut(ptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
745 if (strlen(seq[nsa_dssp])+strlen(ptr)>=(unsigned)(/*MAXRES*/par.maxResLen))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
746 printf("\nWARNING: HMM %s has SADSS records with more than %i residues.\n",name,/*MAXRES*/par.maxResLen);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
747 else strcat(seq[nsa_dssp],ptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
748 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
749 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
750
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
751 else if (!strncmp("SSPRD",line,5))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
752 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
753 if (nss_pred<0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
754 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
755 nss_pred=k++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
756 seq[nss_pred] = new(char[/*MAXRES*/par.maxResLen+2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
757 sname[nss_pred] = new(char[15]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
758 strcpy(seq[nss_pred]," ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
759 strcpy(sname[nss_pred],"ss_pred");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
760
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
761 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
762 ptr=strscn(line+5);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
763 if (ptr)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
764 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
765 strcut(ptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
766 if (strlen(seq[nss_pred])+strlen(ptr)>=(unsigned)(/*MAXRES*/par.maxResLen))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
767 printf("\nWARNING: HMM %s has SSPRD records with more than %i residues.\n",name,/*MAXRES*/par.maxResLen);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
768 else strcat(seq[nss_pred],ptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
769 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
770 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
771
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
772 else if (!strncmp("SSCON",line,5))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
773 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
774 if (nss_conf<0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
775 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
776 nss_conf=k++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
777 seq[nss_conf] = new(char[/*MAXRES*/par.maxResLen+2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
778 sname[nss_conf] = new(char[15]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
779 strcpy(seq[nss_conf]," ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
780 strcpy(sname[nss_conf],"ss_conf");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
781 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
782 ptr=strscn(line+5);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
783 if (ptr)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
784 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
785 strcut(ptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
786 if (strlen(seq[nss_conf])+strlen(ptr)>=(unsigned)(/*MAXRES*/par.maxResLen))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
787 printf("\nWARNING: HMM %s has SSPRD records with more than %i residues.\n",name,/*MAXRES*/par.maxResLen);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
788 else strcat(seq[nss_conf],ptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
789 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
790 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
791
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
792 else if (!strncmp("SSCIT",line,5)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
793 else if (!strcmp("XT ",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
794 else if (!strcmp("NULT",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
795
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
796 else if (!strcmp("NULE",str4))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
797 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
798 ptr=line+4;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
799 for (a=0; (a < iAlpha) && ptr; ++a){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
800 /* FIXME: FS introduced alphabet size (was '20')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
801 and dAlphaInv (was '0.05' = 1/20) */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
802 //s2a[a]: transform amino acids Sorted by alphabet -> internal numbers for amino acids
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
803 pb[s2a[a]] = (float) dAlphaInv * fpow2(float(strinta(ptr,-99999))/HMMSCALE); /* dAlphaInv */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
804 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
805 for (a = iAlpha; a < 20; a++){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
806 pb[s2a[a]] = 0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
807 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
808 if (!ptr) return Warning(dbf,line,name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
809 if (v>=4)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
810 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
811 printf("\nNULL ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
812 for (a=0; a<iAlpha; ++a) { /* FIXME: FS introduced iAlpha, was '20' */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
813 printf("%5.1f ",100.*pb[s2a[a]]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
814 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
815 printf("\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
816 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
817 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
818
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
819 else if (!strcmp("EVD ",str4))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
820 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
821 char* ptr=line+4;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
822 ptr = strscn(ptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
823 sscanf(ptr,"%f",&lamda);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
824 ptr = strscn(ptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
825 sscanf(ptr,"%f",&mu);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
826 if (lamda<0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
827 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
828 if (v>=2 && ignore_hmmer_cal==0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
829 cerr<<endl<<"Warning: some HMMs have been calibrated with HMMER's 'hmmcalibrate'. These calibrations will be ignored\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
830 ignore_hmmer_cal=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
831 mu = lamda = 0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
832 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
833 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
834
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
835 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
836 // Read transition probabilities from start state
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
837 else if (!strncmp("HMM",line,3))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
838 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
839 fgetline(line,LINELEN-1,dbf); // Skip line with amino acid labels
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
840 fgetline(line,LINELEN-1,dbf); // Skip line with transition labels
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
841 ptr=line;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
842 for (a=0; a<=M2D && ptr; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
843 tr[0][a] = float(strinta(ptr,-99999))/HMMSCALE; //store transition probabilites as log2 values
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
844 // strinta returns next integer in string and puts ptr to first char
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
845 // after the integer. Returns -99999 if '*' is found.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
846 // ptr is set to 0 if no integer is found after ptr.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
847 tr[0][I2M] = tr[0][D2M] = 0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
848 tr[0][I2I] = tr[0][D2D] = -99999.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
849 if (!ptr) return Warning(dbf,line,name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
850 if (v>=4)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
851 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
852 printf(" ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
853 for (a=0; a<=D2D && ptr; ++a) printf("%5.1f ",100*fpow2(tr[i][a]));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
854 printf("\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
855 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
856
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
857 // Prepare to store DSSP states (if there are none, delete afterwards)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
858 nss_dssp=k++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
859 seq[nss_dssp] = new(char[/*MAXRES*/par.maxResLen+2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
860 sname[nss_dssp] = new(char[15]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
861 strcpy(sname[nss_dssp],"ss_dssp");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
862
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
863 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
864 // Read columns of HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
865 int next_i=0; // index of next column
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
866 while (fgetline(line,LINELEN-1,dbf) && !(line[0]=='/' && line[1]=='/') && line[0]!='#')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
867 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
868 if (strscn(line)==NULL) continue; // skip lines that contain only white space
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
869
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
870 // Read in AA probabilities
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
871 ptr=line;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
872 int prev_i = next_i;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
873 next_i = strint(ptr); ++i;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
874 if (v && next_i!=prev_i+1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
875 if (++warn<5)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
876 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
877 cerr<<endl<<"WARNING: in HMM "<<name<<" state "<<prev_i<<" is followed by state "<<next_i<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
878 if (warn==5) cerr<<endl<<"WARNING: further warnings while reading HMMs will be suppressed.\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
879 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
880 if (i>L)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
881 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
882 cerr<<endl<<"Error: in HMM "<<name<<" there are more columns than the stated length "<<L<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
883 return 2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
884 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
885 if (i>L && v)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
886 cerr<<endl<<"WARNING: in HMM "<<name<<" there are more columns than the stated length "<<L<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
887 if (i>=/*MAXRES*/par.maxResLen-2)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
888 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
889 fgetline(line,LINELEN-1,dbf); // Skip two lines
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
890 fgetline(line,LINELEN-1,dbf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
891 continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
892 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
893
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
894 for (a=0; (a<iAlpha) && ptr; ++a){ /* FIXME: FS introduced iAlpha, was '20' */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
895 f[i][s2a[a]] = (float) pb[s2a[a]]*fpow2(float(strinta(ptr,-99999))/HMMSCALE);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
896 //s2a[a]: transform amino acids Sorted by alphabet -> internal numbers for amino acids
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
897 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
898 for (a = iAlpha; a < 20; a++){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
899 f[i][s2a[a]] = 0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
900 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
901 if (!ptr) return Warning(dbf,line,name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
902 if (v>=4)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
903 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
904 printf("%6i ",i);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
905 for (a=0; a<iAlpha; ++a) { /* FIXME: FS introduced iAlpha, was '20' */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
906 printf("%5.1f ",100*f[i][s2a[a]]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
907 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
908 printf("\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
909 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
910
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
911 // Read insert emission line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
912 fgetline(line,LINELEN-1,dbf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
913 ptr = strscn(line);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
914 if (!ptr) return Warning(dbf,line,name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
915 annotchr[i]=uprchr(*ptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
916 if (*ptr!='-' && *ptr!=' ') annot=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
917
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
918 // Read annotation character and seven transition probabilities
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
919 fgetline(line,LINELEN-1,dbf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
920 ptr = strscn(line);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
921 switch (*ptr)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
922 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
923 case 'H':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
924 ss_dssp[i]=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
925 seq[nss_dssp][i]=*ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
926 dssp=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
927 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
928 case 'E':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
929 ss_dssp[i]=2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
930 seq[nss_dssp][i]=*ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
931 dssp=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
932 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
933 case 'C':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
934 ss_dssp[i]=3;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
935 seq[nss_dssp][i]=*ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
936 dssp=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
937 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
938 case 'S':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
939 ss_dssp[i]=4;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
940 seq[nss_dssp][i]=*ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
941 dssp=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
942 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
943 case 'T':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
944 ss_dssp[i]=5;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
945 seq[nss_dssp][i]=*ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
946 dssp=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
947 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
948 case 'G':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
949 ss_dssp[i]=6;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
950 seq[nss_dssp][i]=*ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
951 dssp=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
952 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
953 case 'B':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
954 ss_dssp[i]=7;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
955 seq[nss_dssp][i]=*ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
956 dssp=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
957 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
958 case 'I':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
959 dssp=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
960 case '~':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
961 ss_dssp[i]=3;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
962 seq[nss_dssp][i]=*ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
963 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
964 case '-':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
965 default:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
966 ss_dssp[i]=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
967 seq[nss_dssp][i]=*ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
968 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
969
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
970 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
971
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
972 ptr+=2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
973 for (a=0; a<=D2D && ptr; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
974 tr[i][a] = float(strinta(ptr,-99999))/HMMSCALE; //store transition prob's as log2-values
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
975 if (!ptr) return Warning(dbf,line,name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
976 if (v>=4)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
977 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
978 printf(" ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
979 for (a=0; a<=D2D; ++a) printf("%5.1f ",100*fpow2(tr[i][a]));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
980 printf("\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
981 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
982 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
983
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
984 if (line[0]=='/' && line[1]=='/') break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
985
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
986 } /* strncmp("HMM") */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
987
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
988 } //while(getline)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
989
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
990 if (L==0) return 0; //End of db file -> stop reading in
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
991
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
992 // Set coefficients of EVD (= 0.0 if not calibrated for these parameters)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
993 // lamda = lamda_hash.Show(par.Key());
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
994 // mu = mu_hash.Show(par.Key());
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
995 if (lamda && v>=2) printf("HMM %s is already calibrated: lamda=%-5.3f, mu=%-5.2f\n",name,lamda,mu);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
996
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
997 if (v && i!=L) cerr<<endl<<"Warning: in HMM "<<name<<" there are only "<<i<<" columns while the stated length is "<<L<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
998 if (v && i>=/*MAXRES*/par.maxResLen-2) {i=/*MAXRES*/par.maxResLen-2; cerr<<endl<<"WARNING: maximum number "<</*MAXRES*/par.maxResLen-2<<" of residues exceeded while reading HMM "<<name<<"\n";}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
999 if (v && !i) cerr<<endl<<"WARNING: HMM "<<name<<" contains no match states. Check the alignment that gave rise to this HMM.\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1000 L = i;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1001
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1002 if (strlen(longname)>0) strcat(longname," ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1003 strncat(longname,name,DESCLEN-strlen(longname)-1); // longname = ACC NAME DESC
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1004 if (strlen(name)>0) strcat(longname," ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1005 strncat(longname,desc,DESCLEN-strlen(longname)-1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1006 longname[DESCLEN-1]='\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1007 ScopID(cl,fold,sfam,fam);// get scop classification from basename (e.g. a.1.2.3.4)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1008 RemoveExtension(file,filestr); // copy name of dbfile without extension into 'file'
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1009
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1010 // Secondary structure
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1011 if (!dssp)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1012 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1013 // remove dssp sequence
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1014 // memory that had been allocated in case ss_dssp was given needs to be freed
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1015 delete[] seq[nss_dssp]; seq[nss_dssp] = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1016 // memory that had been allocated in case ss_dssp was given needs to be freed
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1017 delete[] sname[nss_dssp]; sname[nss_dssp] = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1018 nss_dssp=-1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1019 k--;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1020 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1021 if (nss_pred>=0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1022 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1023 for (i=1; i<=L; ++i) ss_pred[i] = ss2i(seq[nss_pred][i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1024 if (nss_conf>=0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1025 for (i=1; i<=L; ++i) ss_conf[i] = cf2i(seq[nss_conf][i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1026 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1027 for (i=1; i<=L; ++i) ss_conf[i] = 5;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1028 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1029
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1030 // Copy query (first sequence) and consensus residues?
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1031 if (par.showcons)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1032 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1033 sname[k]=new(char[10]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1034 strcpy(sname[k],"Consensus");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1035 sname[k+1]=new(char[strlen(longname)+1]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1036 strcpy(sname[k+1],longname);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1037 seq[k]=new(char[L+2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1038 seq[k][0]=' ';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1039 seq[k][L+1]='\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1040 seq[k+1]=new(char[L+2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1041 seq[k+1][0]=' ';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1042 seq[k+1][L+1]='\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1043 for (i=1; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1044 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1045 float pmax=0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1046 int amax=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1047 for (a=0; a<NAA; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1048 if (f[i][a]>pmax) {amax=a; pmax=f[i][a];}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1049 if (pmax>0.6) seq[k][i]=i2aa(amax);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1050 else if (pmax>0.4) seq[k][i]=lwrchr(i2aa(amax));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1051 else seq[k][i]='x';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1052 seq[k+1][i]=i2aa(amax);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1053 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1054 ncons=k++; // nfirst is set later!
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1055 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1056 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1057 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1058 sname[k]=new(char[strlen(longname)+1]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1059 /* FIXME valgrind says bytes get lost here during hmm iteration --
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1060 fixed in HMM::ClobberGlobal(), I (FS) think */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1061 strcpy(sname[k],longname);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1062 seq[k]=new(char[L+2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1063 seq[k][0]=' ';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1064 seq[k][L+1]='\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1065 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1066
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1067 if (annot) // read in some annotation characters?
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1068 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1069 annotchr[0]=' ';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1070 annotchr[L+1]='\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1071 strcpy(seq[k],annotchr); // overwrite the consensus sequence with the annotation characters
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1072 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1073 else if (!par.showcons) // we have not yet calculated the consensus, but we need it now as query (first sequence)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1074 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1075 /* FIXME: FS set ncons=k
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1076 don't understand why it is not set but seem to need it */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1077 ncons = k;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1078 for (i=1; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1079 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1080 float pmax=0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1081 int amax=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1082 for (a=0; a<NAA; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1083 if (f[i][a]>pmax) {amax=a; pmax=f[i][a];}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1084 seq[k][i]=i2aa(amax);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1085 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1086 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1087 // printf("%i query name=%s seq=%s\n",n,sname[n],seq[n]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1088 nfirst=k++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1089
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1090 n_display=k;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1091
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1092 // Calculate overall Neff_HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1093 Neff_HMM=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1094 for (i=1; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1095 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1096 float S=0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1097 for (a=0; a<iAlpha; ++a) { /* FIXME: FS introduced iAlpha, was '20' */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1098 if (f[i][a]>1E-10) S-=f[i][a]*fast_log2(f[i][a]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1099 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1100 Neff_HMM+=(float) fpow2(S);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1101 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1102 Neff_HMM/=L;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1103 for (i=0; i<=L; ++i) Neff_M[i] = Neff_I[i] = Neff_D[i] = 10.0; // to add only little additional pseudocounts!
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1104 if (v>=2)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1105 cout<<"Read in HMM "<<name<<" with "<<L<<" match states and effective number of sequences = "<<Neff_HMM<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1106
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1107 // Set emission probabilities of zero'th (begin) state and L+1st (end) state to background probabilities
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1108 for (a=0; a<iAlpha; ++a) { /* FIXME: FS introduced iAlpha, was '20' */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1109 f[0][a]=f[L+1][a]=pb[a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1110 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1111 delete[] annotchr; annotchr = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1112
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1113 return 1; //return status: ok
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1114
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1115 } /* this is the end of HMM::ReadHMMer() */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1116
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1117
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1118
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1119 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1120 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1121 * @brief Read an HMM from a HMMER3 .hmm file; return 0 at end of file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1122 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1123 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1124 HMM::ReadHMMer3(FILE* dbf, char* filestr)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1125 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1126 char line[LINELEN]=""; // input line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1127 char desc[DESCLEN]=""; // description of family
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1128 char str4[5]=""; // first 4 letters of input line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1129 char* ptr; // pointer for string manipulation
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1130 int i=0; // index for match state (first=1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1131 int a; // amino acid index
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1132 char dssp=0; // 1 if a consensus SS has been found in the transition prob lines
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1133 char annot=0; // 1 if at least one annotation character in insert lines is ne '-' or ' '
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1134 int k=0; // index for seq[k]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1135 char* annotchr; // consensus amino acids in ASCII format, or, in HMMER format, the reference annotation character in insert line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1136 //annotchr = new char[MAXRES]; // consensus amino acids in ASCII format, or, in HMMER format, the reference annotation character in insert line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1137 annotchr = new char[par.maxResLen]; // consensus amino acids in ASCII format, or, in HMMER format, the reference annotation character in insert line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1138 static int warn=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1139 int iAlpha = 20; /* size of alphabet, default is protein = 20 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1140 double dAlphaInv = 1.00 / (double)(iAlpha); /* weight of AA */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1141
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1142 trans_lin=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1143 L=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1144 Neff_HMM=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1145 n_seqs=n_display=N_in=N_filtered=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1146 nss_dssp=nsa_dssp=nss_pred=nss_conf=nfirst=ncons=-1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1147 lamda=mu=0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1148 trans_lin=0; // transition probs in log space
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1149 name[0]=longname[0]=desc[0]=fam[0]='\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1150 //If at the end of while-loop L is still 0 then we have reached end of db file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1151
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1152 // Do not delete name and seq vectors because their adresses are transferred to hitlist as part of a hit!!
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1153
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1154
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1155 while (fgetline(line,LINELEN-1,dbf) && !(line[0]=='/' && line[1]=='/'))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1156 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1157
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1158 if (strscn(line)==NULL) continue; // skip lines that contain only white space
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1159 if (!strncmp("HMMER",line,5)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1160
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1161 substr(str4,line,0,3); // copy the first four characters into str4
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1162
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1163 if (!strcmp("NAME",str4) && name[0]=='\0')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1164 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1165 ptr=strscn(line+4); // advance to first non-white-space character
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1166 strncpy(name,ptr,NAMELEN-1); // copy full name to name
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1167 strcut(name); // ...cut after first word...
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1168 if (v>=4) cout<<"Reading in HMM "<<name<<":\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1169 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1170
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1171 else if (!strcmp("ACC ",str4))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1172 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1173 ptr=strscn(line+4); // advance to first non-white-space character
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1174 strncpy(longname,ptr,DESCLEN-1); // copy Accession id to longname...
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1175 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1176
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1177 else if (!strcmp("DESC",str4))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1178 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1179 ptr=strscn(line+4); // advance to first non-white-space character
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1180 if (ptr)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1181 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1182 strncpy(desc,ptr,DESCLEN-1); // copy description to name...
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1183 desc[DESCLEN-1]='\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1184 strcut(ptr); // ...cut after first word...
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1185 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1186 if (!ptr || ptr[1]!='.' || strchr(ptr+3,'.')==NULL) strcpy(fam,""); else strcpy(fam,ptr); // could not find two '.' in name?
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1187 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1188
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1189 else if (!strcmp("LENG",str4))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1190 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1191 ptr=line+4;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1192 L=strint(ptr); //read next integer (number of match states)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1193 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1194
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1195 else if (!strcmp("ALPH",str4)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1196
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1197 ptr=strscn(line+4);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1198
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1199 if (0 == strcmp(ptr, "amino")){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1200 iAlpha = 20;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1201 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1202 else if (0 == strcmp(ptr, "Nucleic")){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1203 iAlpha = 4;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1204 printf("%s:%s:%d: WARNING: HMM reading does not work for DNA/RNA\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1205 __FUNCTION__, __FILE__, __LINE__);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1206 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1207 else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1208 return Warning(dbf,line,name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1209 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1210 dAlphaInv = 1.00 / (double)(iAlpha);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1211 //continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1212 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1213 else if (!strcmp("RF ",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1214 else if (!strcmp("CS ",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1215 else if (!strcmp("MAP ",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1216 else if (!strcmp("COM ",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1217 else if (!strcmp("NSEQ",str4))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1218 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1219 ptr=line+4;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1220 N_in=N_filtered=strint(ptr); //read next integer: number of sequences after filtering
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1221 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1222
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1223 else if (!strcmp("DATE",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1224 else if (!strncmp("CKSUM ",line,5)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1225 else if (!strcmp("GA ",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1226 else if (!strcmp("TC ",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1227 else if (!strcmp("NC ",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1228
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1229 //////////////////////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1230 // Still needed???
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1231
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1232 else if (!strncmp("SADSS",line,5))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1233 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1234 if (nsa_dssp<0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1235 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1236 nsa_dssp=k++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1237 seq[nsa_dssp] = new(char[/*MAXRES*/par.maxResLen+2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1238 sname[nsa_dssp] = new(char[15]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1239 strcpy(seq[nsa_dssp]," ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1240 strcpy(sname[nsa_dssp],"sa_dssp");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1241
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1242 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1243 ptr=strscn(line+5);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1244 if (ptr)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1245 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1246 strcut(ptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1247 if (strlen(seq[nsa_dssp])+strlen(ptr)>=(unsigned)(/*MAXRES*/par.maxResLen))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1248 printf("\nWARNING: HMM %s has SADSS records with more than %i residues.\n",name,/*MAXRES*/par.maxResLen);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1249 else strcat(seq[nsa_dssp],ptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1250 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1251 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1252
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1253 else if (!strncmp("SSPRD",line,5))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1254 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1255 if (nss_pred<0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1256 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1257 nss_pred=k++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1258 seq[nss_pred] = new(char[/*MAXRES*/par.maxResLen+2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1259 sname[nss_pred] = new(char[15]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1260 strcpy(seq[nss_pred]," ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1261 strcpy(sname[nss_pred],"ss_pred");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1262
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1263 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1264 ptr=strscn(line+5);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1265 if (ptr)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1266 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1267 strcut(ptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1268 if (strlen(seq[nss_pred])+strlen(ptr)>=(unsigned)(/*MAXRES*/par.maxResLen))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1269 printf("\nWARNING: HMM %s has SSPRD records with more than %i residues.\n",name,/*MAXRES*/par.maxResLen);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1270 else strcat(seq[nss_pred],ptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1271 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1272 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1273
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1274 else if (!strncmp("SSCON",line,5))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1275 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1276 if (nss_conf<0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1277 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1278 nss_conf=k++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1279 seq[nss_conf] = new(char[/*MAXRES*/par.maxResLen+2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1280 sname[nss_conf] = new(char[15]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1281 strcpy(seq[nss_conf]," ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1282 strcpy(sname[nss_conf],"ss_conf");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1283 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1284 ptr=strscn(line+5);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1285 if (ptr)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1286 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1287 strcut(ptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1288 if (strlen(seq[nss_conf])+strlen(ptr)>=(unsigned)(/*MAXRES*/par.maxResLen))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1289 printf("\nWARNING: HMM %s has SSPRD records with more than %i residues.\n",name,/*MAXRES*/par.maxResLen);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1290 else strcat(seq[nss_conf],ptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1291 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1292 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1293
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1294 else if (!strncmp("SSCIT",line,5)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1295 else if (!strcmp("XT ",str4)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1296 //////////////////////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1297 else if (!strncmp("STATS LOCAL",line,11)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1298
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1299 else if (!strcmp("EFFN",str4))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1300 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1301 ptr=line+4;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1302 float effn = strflt(ptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1303 // Calculate Neff_HMM by using f(x) = ax^0.1 + bx^0.5 + cx + d (fitted with scop25 dataset)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1304 Neff_HMM = -1.403534 * pow(effn, 0.1) + 4.428118 * pow(effn, 0.5) - 0.2885410 * effn - 1.108568;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1305 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1306
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1307 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1308 // Read transition probabilities from start state
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1309 else if (!strncmp("HMM",line,3))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1310 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1311 fgetline(line,LINELEN-1,dbf); // Skip line with amino acid labels
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1312 fgetline(line,LINELEN-1,dbf); // Skip line with transition labels
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1313 ptr=strscn(line);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1314
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1315 if (!strncmp("COMPO",ptr,5))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1316 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1317 ptr=ptr+5;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1318 for (a=0; a<20 && ptr; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1319 //s2a[a]: transform amino acids Sorted by alphabet -> internal numbers for amino acids
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1320 pb[s2a[a]] = (float) exp(-1.0*strflta(ptr,99999));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1321 if (!ptr) return Warning(dbf,line,name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1322 if (v>=4)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1323 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1324 printf("\nNULL ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1325 for (a=0; a<20; ++a) printf("%6.3g ",100.*pb[s2a[a]]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1326 printf("\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1327 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1328 fgetline(line,LINELEN-1,dbf); // Read next line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1329 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1330
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1331 fgetline(line,LINELEN-1,dbf); // Skip line with 0-states insert probabilities
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1332
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1333 ptr = strscn(line);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1334 for (a=0; a<=D2D && ptr; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1335 tr[0][a] = log2((float) exp(-1.0*strflta(ptr,99999))); //store transition probabilites as log2 values
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1336 // strinta returns next integer in string and puts ptr to first char
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1337 // after the integer. Returns -99999 if '*' is found.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1338 // ptr is set to 0 if no integer is found after ptr.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1339 if (!ptr) return Warning(dbf,line,name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1340 if (v>=4)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1341 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1342 printf(" ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1343 for (a=0; a<=D2D && ptr; ++a) printf("%6.3g ",100*fpow2(tr[i][a]));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1344 printf("\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1345 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1346
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1347 // Prepare to store DSSP states (if there are none, delete afterwards)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1348 nss_dssp=k++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1349 seq[nss_dssp] = new(char[/*MAXRES*/par.maxResLen+2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1350 sname[nss_dssp] = new(char[15]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1351 strcpy(sname[nss_dssp],"ss_dssp");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1352
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1353 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1354 // Read columns of HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1355 int next_i=0; // index of next column
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1356 while (fgetline(line,LINELEN-1,dbf) && !(line[0]=='/' && line[1]=='/') && line[0]!='#')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1357 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1358 if (strscn(line)==NULL) continue; // skip lines that contain only white space
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1359
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1360 // Read in AA probabilities
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1361 ptr=line;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1362 int prev_i = next_i;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1363 next_i = strint(ptr); ++i;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1364 if (v && next_i!=prev_i+1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1365 if (++warn<5)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1366 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1367 cerr<<endl<<"WARNING: in HMM "<<name<<" state "<<prev_i<<" is followed by state "<<next_i<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1368 if (warn==5) cerr<<endl<<"WARNING: further warnings while reading HMMs will be suppressed.\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1369 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1370 if (i>L)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1371 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1372 cerr<<endl<<"Error: in HMM "<<name<<" there are more columns than the stated length "<<L<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1373 return 2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1374 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1375 if (i>L && v)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1376 cerr<<endl<<"WARNING: in HMM "<<name<<" there are more columns than the stated length "<<L<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1377 if (i>=/*MAXRES*/par.maxResLen-2)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1378 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1379 fgetline(line,LINELEN-1,dbf); // Skip two lines
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1380 fgetline(line,LINELEN-1,dbf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1381 continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1382 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1383
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1384 for (a=0; a<iAlpha && ptr; ++a){ /* FIXME: FS introduced iAlpha, was '20' */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1385 f[i][s2a[a]] = (float) exp(-1.0*strflta(ptr,99999));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1386 //s2a[a]: transform amino acids Sorted by alphabet -> internal numbers for amino acids
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1387 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1388 for (a = iAlpha; a < 20; a++){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1389 f[i][s2a[a]] = 0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1390 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1391 if (!ptr) return Warning(dbf,line,name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1392 if (v>=4)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1393 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1394 printf("%6i ",i);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1395 for (a=0; a<iAlpha; ++a) printf("%6.3g ",100*f[i][s2a[a]]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1396 printf("\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1397 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1398
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1399 // Ignore MAP annotation
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1400 ptr = strscn(line); //find next word
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1401 ptr = strscn_ws(line); // ignore word
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1402
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1403 // Read RF and CS annotation
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1404 ptr = strscn(line);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1405 if (!ptr) return Warning(dbf,line,name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1406 annotchr[i]=uprchr(*ptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1407 if (*ptr!='-' && *ptr!=' ') annot=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1408
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1409 ptr = strscn(line);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1410 switch (*ptr)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1411 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1412 case 'H':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1413 ss_dssp[i]=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1414 seq[nss_dssp][i]=*ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1415 dssp=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1416 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1417 case 'E':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1418 ss_dssp[i]=2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1419 seq[nss_dssp][i]=*ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1420 dssp=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1421 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1422 case 'C':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1423 ss_dssp[i]=3;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1424 seq[nss_dssp][i]=*ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1425 dssp=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1426 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1427 case 'S':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1428 ss_dssp[i]=4;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1429 seq[nss_dssp][i]=*ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1430 dssp=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1431 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1432 case 'T':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1433 ss_dssp[i]=5;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1434 seq[nss_dssp][i]=*ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1435 dssp=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1436 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1437 case 'G':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1438 ss_dssp[i]=6;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1439 seq[nss_dssp][i]=*ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1440 dssp=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1441 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1442 case 'B':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1443 ss_dssp[i]=7;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1444 seq[nss_dssp][i]=*ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1445 dssp=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1446 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1447 case 'I':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1448 dssp=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1449 case '~':
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1450 ss_dssp[i]=3;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1451 seq[nss_dssp][i]=*ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1452 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1453 case '-': // no SS available from any template
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1454 case '.': // no clear consensus SS structure
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1455 case 'X': // no clear consensus SS structure
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1456 ss_dssp[i]=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1457 seq[nss_dssp][i]='-';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1458 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1459 default:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1460 ss_dssp[i]=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1461 seq[nss_dssp][i]=*ptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1462 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1463 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1464
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1465 // Read insert emission line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1466 fgetline(line,LINELEN-1,dbf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1467
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1468 // Read seven transition probabilities
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1469 fgetline(line,LINELEN-1,dbf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1470
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1471 ptr+=2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1472 for (a=0; a<=D2D && ptr; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1473 tr[i][a] = log2((float) exp(-1.0*strflta(ptr,99999))); //store transition prob's as log2-values
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1474 if (!ptr) return Warning(dbf,line,name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1475 if (v>=4)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1476 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1477 printf(" ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1478 for (a=0; a<=D2D; ++a) printf("%6.3g ",100*fpow2(tr[i][a]));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1479 printf("\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1480 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1481 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1482
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1483 if (line[0]=='/' && line[1]=='/') break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1484
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1485 } /* strncmp("HMM") */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1486
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1487 } //while(getline)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1488
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1489 if (L==0) return 0; //End of db file -> stop reading in
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1490
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1491 if (v && i!=L) cerr<<endl<<"Warning: in HMM "<<name<<" there are only "<<i<<" columns while the stated length is "<<L<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1492 if (v && i>=/*MAXRES*/par.maxResLen-2) {i=/*MAXRES*/par.maxResLen-2; cerr<<endl<<"WARNING: maximum number "<</*MAXRES*/par.maxResLen-2<<" of residues exceeded while reading HMM "<<name<<"\n";}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1493 if (v && !i) cerr<<endl<<"WARNING: HMM "<<name<<" contains no match states. Check the alignment that gave rise to this HMM.\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1494 L = i;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1495
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1496 if (strlen(longname)>0) strcat(longname," ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1497 strncat(longname,name,DESCLEN-strlen(longname)-1); // longname = ACC NAME DESC
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1498 if (strlen(name)>0) strcat(longname," ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1499 strncat(longname,desc,DESCLEN-strlen(longname)-1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1500 longname[DESCLEN-1]='\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1501 ScopID(cl,fold,sfam,fam);// get scop classification from basename (e.g. a.1.2.3.4)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1502 RemoveExtension(file,filestr); // copy name of dbfile without extension into 'file'
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1503
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1504 // Secondary structure
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1505 if (!dssp)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1506 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1507 // remove dssp sequence
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1508 delete[] seq[nss_dssp]; // memory that had been allocated in case ss_dssp was given needs to be freed
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1509 delete[] sname[nss_dssp]; // memory that had been allocated in case ss_dssp was given needs to be freed
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1510 nss_dssp=-1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1511 k--;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1512 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1513 else { seq[nss_dssp][0]='-'; seq[nss_dssp][L+1]='\0'; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1514
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1515 if (nss_pred>=0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1516 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1517 for (i=1; i<=L; ++i) ss_pred[i] = ss2i(seq[nss_pred][i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1518 if (nss_conf>=0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1519 for (i=1; i<=L; ++i) ss_conf[i] = cf2i(seq[nss_conf][i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1520 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1521 for (i=1; i<=L; ++i) ss_conf[i] = 5;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1522 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1523
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1524 // Copy query (first sequence) and consensus residues?
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1525 if (par.showcons)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1526 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1527 sname[k]=new(char[10]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1528 strcpy(sname[k],"Consensus");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1529 sname[k+1]=new(char[strlen(longname)+1]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1530 strcpy(sname[k+1],longname);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1531 seq[k]=new(char[L+2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1532 seq[k][0]=' ';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1533 seq[k][L+1]='\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1534 seq[k+1]=new(char[L+2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1535 seq[k+1][0]=' ';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1536 seq[k+1][L+1]='\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1537 for (i=1; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1538 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1539 float pmax=0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1540 int amax=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1541 for (a=0; a<NAA; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1542 if (f[i][a]>pmax) {amax=a; pmax=f[i][a];}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1543 if (pmax>0.6) seq[k][i]=i2aa(amax);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1544 else if (pmax>0.4) seq[k][i]=lwrchr(i2aa(amax));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1545 else seq[k][i]='x';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1546 seq[k+1][i]=i2aa(amax);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1547 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1548 ncons=k++; // nfirst is set later!
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1549 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1550 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1551 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1552 sname[k]=new(char[strlen(longname)+1]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1553 strcpy(sname[k],longname);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1554 seq[k]=new(char[L+2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1555 seq[k][0]=' ';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1556 seq[k][L+1]='\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1557 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1558
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1559 if (annot) // read in some annotation characters?
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1560 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1561 annotchr[0]=' ';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1562 annotchr[L+1]='\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1563 strcpy(seq[k],annotchr); // overwrite the consensus sequence with the annotation characters
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1564 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1565 else if (!par.showcons) // we have not yet calculated the consensus, but we need it now as query (first sequence)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1566 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1567 for (i=1; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1568 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1569 float pmax=0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1570 int amax=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1571 for (a=0; a<NAA; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1572 if (f[i][a]>pmax) {amax=a; pmax=f[i][a];}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1573 seq[k][i]=i2aa(amax);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1574 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1575 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1576 // printf("%i query name=%s seq=%s\n",n,sname[n],seq[n]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1577 nfirst=k++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1578
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1579 n_display=k;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1580 n_seqs=k;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1581
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1582 // If no effektive number of sequences is given, calculate Neff_HMM by given profile
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1583 if (Neff_HMM == 0) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1584 for (i=1; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1585 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1586 float S=0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1587 for (a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1588 if (f[i][a]>1E-10) S-=f[i][a]*fast_log2(f[i][a]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1589 Neff_HMM+=(float) fpow2(S);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1590 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1591 Neff_HMM/=L;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1592 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1593
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1594 for (i=0; i<=L; ++i) Neff_M[i] = Neff_I[i] = Neff_D[i] = 10.0; // to add only little additional pseudocounts!
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1595 Neff_M[L+1]=1.0f;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1596 Neff_I[L+1]=Neff_D[L+1]=0.0f;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1597
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1598 if (v>=2)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1599 cout<<"Read in HMM "<<name<<" with "<<L<<" match states and effective number of sequences = "<<Neff_HMM<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1600
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1601 ///////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1602
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1603 // Set emission probabilities of zero'th (begin) state and L+1st (end) state to background probabilities
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1604 for (a=0; a<20; ++a) f[0][a]=f[L+1][a]=pb[a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1605 delete[] annotchr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1606
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1607 has_pseudocounts=true;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1608
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1609 return 1; //return status: ok
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1610
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1611
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1612 } /* this is the end of HMM::ReadHMMer3() */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1613
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1614
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1615 //////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1616 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1617 * @brief Add transition pseudocounts to HMM (and calculate lin-space transition probs)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1618 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1619 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1620 HMM::AddTransitionPseudocounts(float gapd, float gape, float gapf, float gapg, float gaph, float gapi, float gapb)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1621 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1622 int i; //position in alignment
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1623 float sum;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1624 float pM2M, pM2I, pM2D, pI2I, pI2M, pD2D, pD2M;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1625 float p0,p1,p2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1626 if (par.gapb<=0) return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1627 if (trans_lin==1) {fprintf(stderr,"Error: Adding transition pseudocounts to linear representation of %s not allowed. Please report this error to the HHsearch developers.\n",name); exit(6);}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1628 if (trans_lin==2) {fprintf(stderr,"Error: Adding transition pseudocounts twice is %s not allowed. Please report this error to the HHsearch developers.\n",name); exit(6);}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1629 trans_lin=2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1630
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1631 // Calculate pseudocount transition probabilities
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1632 pM2D=pM2I=gapd*0.0286; //a-priori probability for inserts and deletions
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1633 pM2M=1-pM2D-pM2I;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1634 // gape=0 -> pI2I=0 gape=1 -> pI2I=0.75 gape=inf -> pI2I=1.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1635 pI2I=1.0*gape/(gape-1+1.0/0.75);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1636 pI2M=1-pI2I;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1637 // gape=0 -> pD2D=0 gape=1 -> pD2D=0.75 gape=inf -> pD2D=1.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1638 pD2D=1.0*gape/(gape-1+1.0/0.75);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1639 pD2M=1-pD2D;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1640
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1641 for (i=0; i<=L; ++i) //for all columns in HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1642 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1643 // Transitions from M state
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1644 p0 = (Neff_M[i]-1)*fpow2(tr[i][M2M]) + gapb*pM2M;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1645 p1 = (Neff_M[i]-1)*fpow2(tr[i][M2D]) + gapb*pM2D;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1646 p2 = (Neff_M[i]-1)*fpow2(tr[i][M2I]) + gapb*pM2I;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1647 if (i==0) p1=p2=0; //from M(0) no transition to D(1) and I(0) possible
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1648 if (i==L) p1=p2=0; //from M(L) no transition to D(L+1) and I(L+1) possible
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1649 sum = p0+p1+p2+FLT_MIN;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1650
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1651 // p0 = p0/sum ;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1652 // p1 = pow(p1/sum,gapf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1653 // p2 = pow(p2/sum,gapg);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1654 // sum = p0+p1+p2+FLT_MIN;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1655 // tr[i][M2M] = fast_log2(p0/sum);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1656 // tr[i][M2D] = fast_log2(p1/sum);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1657 // tr[i][M2I] = fast_log2(p2/sum);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1658
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1659 tr[i][M2M] = fast_log2(p0/sum);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1660 tr[i][M2D] = fast_log2(p1/sum)*gapf;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1661 tr[i][M2I] = fast_log2(p2/sum)*gapg;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1662
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1663 // Transitions from I state
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1664 p0 = Neff_I[i]*fpow2(tr[i][I2M]) + gapb*pI2M;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1665 p1 = Neff_I[i]*fpow2(tr[i][I2I]) + gapb*pI2I;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1666 sum = p0+p1+FLT_MIN;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1667
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1668 // p0 = pow(p0/sum,gapg);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1669 // p1 = pow(p1/sum,gapi);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1670 // sum = p0+p1+FLT_MIN;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1671 // tr[i][I2M] = fast_log2(p0/sum);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1672 // tr[i][I2I] = fast_log2(p1/sum);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1673
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1674 tr[i][I2M] = fast_log2(p0/sum);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1675 tr[i][I2I] = fast_log2(p1/sum)*gapi;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1676
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1677 // Transitions from D state
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1678 p0 = Neff_D[i]*fpow2(tr[i][D2M]) + gapb*pD2M;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1679 p1 = Neff_D[i]*fpow2(tr[i][D2D]) + gapb*pD2D;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1680 if (i==L) p1=0; //from D(L) no transition to D(L+1) possible
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1681 sum = p0+p1+FLT_MIN;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1682
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1683 // p0 = pow(p0/sum,gapf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1684 // p1 = pow(p1/sum,gaph);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1685 // sum = p0+p1+FLT_MIN;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1686 // tr[i][D2M] = fast_log2(p0/sum);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1687 // tr[i][D2D] = fast_log2(p1/sum);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1688
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1689 tr[i][D2M] = fast_log2(p0/sum);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1690 tr[i][D2D] = fast_log2(p1/sum)*gaph;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1691
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1692 // SS-dependent gap penalties
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1693 tr[i][M2M_GAPOPEN]=tr[i][M2M];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1694 tr[i][GAPOPEN]=0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1695 tr[i][GAPEXTD]=0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1696 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1697
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1698 if (v>=4)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1699 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1700 printf("\nPseudocount transition probabilities:\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1701 printf("pM2M=%4.1f%%, pM2I=%4.1f%%, pM2D=%4.1f%%, ",100*pM2M,100*pM2I,100*pM2D);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1702 printf("pI2M=%4.1f%%, pI2I=%4.1f%%, ",100*pI2M,100*pI2I);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1703 printf("pD2M=%4.1f%%, pD2D=%4.1f%% ",100*pD2M,100*pD2D);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1704 printf("tau = %4.1f%%\n\n",100.*gapb/(Neff_HMM-1+gapb));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1705 printf("Listing transition probabilities WITH pseudocounts:\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1706 printf(" i dssp pred sacc M->M M->I M->D I->M I->I D->M D->D\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1707
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1708 for (i=1; i<=L; ++i) //for all columns in HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1709 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1710 printf("%4i %1c %1c %1c %6.3f %6.3f %6.3f ",i,i2ss(ss_dssp[i]),i2ss(ss_pred[i]),i2sa(sa_dssp[i]),fpow2(tr[i][M2M]),fpow2(tr[i][M2I]),fpow2(tr[i][M2D]));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1711 printf("%6.3f %6.3f ",fpow2(tr[i][I2M]),fpow2(tr[i][I2I]));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1712 printf("%6.3f %6.3f ",fpow2(tr[i][D2M]),fpow2(tr[i][D2D]));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1713 printf("%1i %2i %1i\n",ss_pred[i],ss_conf[i],ss_dssp[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1714 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1715 printf("\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1716 printf("nss_dssp=%i nss_pred=%i\n",nss_dssp,nss_pred);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1717 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1718 return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1719 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1720
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1721
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1722 //////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1723 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1724 * @brief Use secondary structure-dependent gap penalties
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1725 * on top of the HMM transition penalties
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1726 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1727 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1728 HMM::UseSecStrucDependentGapPenalties()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1729 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1730 int i; // column in HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1731 int ii;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1732 //unsigned char iis[MAXRES]; // inside-integer array
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1733 unsigned char iis[par.maxResLen]; // inside-integer array
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1734 float d; // Additional penalty for opening gap whithin SS element
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1735 float e; // Additional penalty for extending gap whithin SS element
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1736
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1737 // Determine inside-integers:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1738 // CCSTCCCHHHHHHHHHHHCCCCCEEEEECCSBGGGCCCCEECC
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1739 // 0000000123444432100000012210000000000001000
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1740 ii=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1741 for (i=0; i<=L; ++i) // forward run
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1742 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1743 if (ss_dssp[i]==1 || ss_dssp[i]==2) {ii+=(ii<par.ssgapi);} else ii=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1744 iis[i]=ii;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1745 } for (i=0; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1746 ii=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1747 iis[0]=iis[L]=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1748 for (i=L; i>=0; i--) // backward run
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1749 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1750 if (ss_dssp[i]==1 || ss_dssp[i]==2) {ii+=(ii<par.ssgapi);} else ii=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1751 iis[i-1]=imin(ii,iis[i-1]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1752 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1753
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1754 // Add SS-dependent gap penalties to HMM transition penalties
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1755 for (i=0; i<=L; ++i) //for all columns in HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1756 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1757 d=-iis[i]*par.ssgapd;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1758 e=-iis[i]*par.ssgape;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1759 tr[i][GAPOPEN]=d;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1760 tr[i][GAPEXTD]=e;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1761 tr[i][M2M_GAPOPEN]+=d;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1762 tr[i][M2I]+=d;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1763 tr[i][I2M]+=d;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1764 tr[i][I2I]+=e;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1765 tr[i][M2D]+=d;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1766 tr[i][D2M]+=d;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1767 tr[i][D2D]+=e;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1768 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1769
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1770 if (v>=3)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1771 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1772 printf("Col SS II\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1773 for (i=0; i<=L; ++i) printf("%3i %c %2i\n",i,i2ss(ss_dssp[i]),iis[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1774 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1775 return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1776 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1777
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1778 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1779 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1780 * @brief Generate an amino acid frequency matrix g[][] with full pseudocount admixture (tau=1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1781 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1782 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1783 HMM::PreparePseudocounts()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1784 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1785 for (int i=0; i<=L+1; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1786 for (int a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1787 g[i][a] = // produces fast code
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1788 R[a][0]*f[i][0] +R[a][1]*f[i][1] +R[a][2]*f[i][2] +R[a][3]*f[i][3] +R[a][4]*f[i][4]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1789 +R[a][5]*f[i][5] +R[a][6]*f[i][6] +R[a][7]*f[i][7] +R[a][8]*f[i][8] +R[a][9]*f[i][9]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1790 +R[a][10]*f[i][10]+R[a][11]*f[i][11]+R[a][12]*f[i][12]+R[a][13]*f[i][13]+R[a][14]*f[i][14]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1791 +R[a][15]*f[i][15]+R[a][16]*f[i][16]+R[a][17]*f[i][17]+R[a][18]*f[i][18]+R[a][19]*f[i][19];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1792 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1793
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1794 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1795 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1796 * @brief Add amino acid pseudocounts to HMM and calculate average protein aa probabilities pav[a]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1797 * Pseudocounts: t.p[i][a] = (1-tau)*f[i][a] + tau*g[i][a]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1798 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1799 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1800 HMM::AddAminoAcidPseudocounts(char pcm, float pca, float pcb, float pcc)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1801 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1802 int i; //position in HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1803 int a; //amino acid (0..19)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1804 float sum;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1805 float tau; //tau = pseudocount admixture
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1806
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1807 for (a=0; a<20; ++a) pav[a]=pb[a]*100.0f/Neff_HMM; // initialize vector of average aa freqs with pseudocounts
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1808
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1809 // Calculate amino acid frequencies p[i][a] = (1-tau(i))*f[i][a] + tau(i)*g[i][a]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1810 switch (pcm)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1811 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1812 case 0: //no pseudocounts whatsoever: tau=0
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1813 for (i=1; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1814 for (a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1815 pav[a] += ( p[i][a]=f[i][a] );
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1816 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1817 case 1: //constant pseudocounts (for optimization): tau = pca
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1818 tau = pca;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1819 for (i=1; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1820 for (a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1821 pav[a] += ( p[i][a] = (1.-tau)*f[i][a] + tau * g[i][a] );
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1822 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1823 case 2: //divergence-dependent pseudocounts
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1824 case 4: //divergence-dependent pseudocounts and rate matrix rescaling
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1825 if (par.pcc==1.0f)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1826 for (i=1; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1827 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1828 tau = fmin(1.0, pca/(1. + Neff_M[i]/pcb ) );
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1829 for (a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1830 pav[a] += ( p[i][a] = (1.-tau)*f[i][a] + tau * g[i][a] );
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1831 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1832 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1833 for (i=1; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1834 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1835 tau = fmin(1.0, pca/(1. + pow((Neff_M[i])/pcb,pcc)));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1836 for (a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1837 pav[a] += ( p[i][a] = (1.-tau)*f[i][a] + tau * g[i][a] );
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1838 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1839 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1840 case 3: // constant-divergence pseudocounts
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1841 for (i=1; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1842 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1843 float x = Neff_M[i]/pcb;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1844 pca = 0.793 + 0.048*(pcb-10.0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1845 tau = fmax(0.0, pca*(1-x + pcc*x*(1-x)) );
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1846 for (a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1847 pav[a] += ( p[i][a] = (1.-tau)*f[i][a] + tau * g[i][a] );
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1848 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1849 if (v>=2) { printf("Divergence before / after addition of amino acid pseudocounts: %5.2f / %5.2f\n",Neff_HMM, CalcNeff()); }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1850 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1851 } //end switch (pcm)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1852
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1853
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1854 // Normalize vector of average aa frequencies pav[a]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1855 NormalizeTo1(pav,NAA);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1856
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1857 for (a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1858 p[0][a] = p[L+1][a] = pav[a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1859
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1860 // DEBUGGING output
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1861 if (v>=3)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1862 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1863 switch (pcm)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1864 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1865 case 0:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1866 cout<<"No pseudocounts added (-pcm 0)\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1867 return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1868 case 1:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1869 cout<<"Adding constant AA pseudocount admixture of "<<pca<<" to HMM "<<name<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1870 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1871 case 2:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1872 cout<<"Adding divergence-dependent AA pseudocounts (-pcm 2) with admixture of "
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1873 <<pca/(1.+pow((Neff_HMM-1.)/pcb,pcc))<<" to HMM "<<name<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1874 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1875 } //end switch (pcm)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1876 cout<<"\nAverage amino acid frequencies WITH pseudocounts in HMM: \nProf: ";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1877 for (a=0; a<20; ++a) printf("%4.1f ",100*pav[a]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1878 cout<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1879 if (v>=4)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1880 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1881 cout<<"\nAmino acid frequencies WITHOUT pseudocounts:\n A R N D C Q E G H I L K M F P S T W Y V\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1882 for (i=1; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1883 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1884 printf("%3i: ",i);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1885 sum=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1886 for (a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1887 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1888 sum+=f[i][a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1889 printf("%4.1f ",100*f[i][a]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1890 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1891 printf(" sum=%5.3f\n",sum);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1892 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1893 cout<<"\nAmino acid frequencies WITH pseudocounts:\n A R N D C Q E G H I L K M F P S T W Y V\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1894 for (i=1; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1895 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1896 printf("%3i: ",i);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1897 sum=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1898 for (a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1899 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1900 sum+=p[i][a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1901 printf("%4.1f ",100*p[i][a]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1902 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1903 printf(" sum=%5.3f\n",sum);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1904 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1905 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1906 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1907 return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1908 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1909
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1910
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1911 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1912 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1913 * @brief Factor Null model into HMM t
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1914 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1915 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1916 HMM::IncludeNullModelInHMM(HMM& q, HMM& t)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1917 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1918
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1919 int i,j; //query and template match state indices
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1920 int a; //amino acid index
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1921
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1922 switch (par.columnscore)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1923 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1924 default:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1925 case 0: // Null model with background prob. from database
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1926 for (a=0; a<20; ++a) pnul[a]=pb[a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1927 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1928
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1929 case 1: // Null model with background prob. equal average from query and template
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1930 for (a=0; a<20; ++a) pnul[a]=0.5*(q.pav[a]+t.pav[a]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1931 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1932
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1933 case 2: // Null model with background prob. from template protein
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1934 for (a=0; a<20; ++a) pnul[a]=t.pav[a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1935 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1936
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1937 case 3: // Null model with background prob. from query protein
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1938 for (a=0; a<20; ++a) pnul[a]=q.pav[a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1939 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1940
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1941 case 4: // Null model with background prob. equal average from query and template
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1942 for (a=0; a<20; ++a) pnul[a]=sqrt(q.pav[a]*t.pav[a]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1943 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1944
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1945 case 10: // Separated column scoring for Stochastic Backtracing (STILL USED??)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1946 for (i=0; i<=q.L+1; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1947 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1948 float sum = 0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1949 for (a=0; a<20; ++a) sum += pb[a]*q.p[i][a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1950 sum = 1.0/sqrt(sum);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1951 for (a=0; a<20; ++a) q.p[i][a]*=sum;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1952 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1953 for (j=0; j<=t.L+1; j++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1954 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1955 float sum = 0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1956 for (a=0; a<20; ++a) sum += pb[a]*t.p[j][a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1957 sum = 1.0/sqrt(sum);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1958 for (a=0; a<20; ++a) t.p[j][a]*=sum;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1959 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1960 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1961
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1962 case 11: // log co-emission probability (no null model)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1963 for (a=0; a<20; ++a) pnul[a]=0.05;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1964 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1965
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1966 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1967
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1968 // !!!!! ATTENTION!!!!!!! after this t.p is not the same as after adding pseudocounts !!!
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1969 //Introduce amino acid weights into template (for all but SOP scores)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1970 if (par.columnscore!=10)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1971 for (a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1972 for (j=0; j<=t.L+1; j++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1973 t.p[j][a]/=pnul[a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1974
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1975 if (v>=5)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1976 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1977 cout<<"\nAverage amino acid frequencies\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1978 cout<<" A R N D C Q E G H I L K M F P S T W Y V\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1979 cout<<"Q: ";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1980 for (a=0; a<20; ++a) printf("%4.1f ",100*q.pav[a]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1981 cout<<"\nT: ";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1982 for (a=0; a<20; ++a) printf("%4.1f ",100*t.pav[a]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1983 cout<<"\nNull: ";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1984 for (a=0; a<20; ++a) printf("%4.1f ",100*pnul[a]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1985 cout<<"\npb: ";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1986 for (a=0; a<20; ++a) printf("%4.1f ",100*pb[a]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1987 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1988
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1989
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1990 return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1991 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1992
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1993
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1994 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1995 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1996 * @brief Write HMM to output file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1997 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1998 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1999 HMM::WriteToFile(char* outfile)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2000 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2001 const int SEQLEN=100; // number of residues per line for sequences to be displayed
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2002 int i,a;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2003
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2004 if (trans_lin) {fprintf(stderr,"Error: Writing transition pseudocounts in linear representation not allowed. Please report this error to the HHsearch developers.\n"); exit(6);}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2005
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2006 FILE *outf=NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2007 if (strcmp(outfile,"stdout"))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2008 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2009 if (par.append) outf=fopen(outfile,"a"); else outf=fopen(outfile,"w");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2010 if (!outf) OpenFileError(outfile);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2011 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2012 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2013 outf = stdout;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2014 if (v>=2) cout<<"Writing HMM to "<<outfile<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2015
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2016 // fprintf(outf,"HHsearch HHM format 1.5\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2017 fprintf(outf,"HHsearch 1.5\n"); // format specification
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2018 fprintf(outf,"NAME %s\n",longname); // name of first sequence
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2019 fprintf(outf,"FAM %s\n",fam); // family name
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2020 char file_nopath[NAMELEN];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2021 RemovePath(file_nopath,file);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2022 fprintf(outf,"FILE %s\n",file_nopath); // base name of alignment file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2023
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2024 // Print command line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2025 fprintf(outf,"COM ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2026 for (int i=0; i<par.argc; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2027 if (strlen(par.argv[i])<=100)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2028 fprintf(outf,"%s ",par.argv[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2029 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2030 fprintf(outf,"<%i characters> ",(int)strlen(par.argv[i]));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2031 fprintf(outf,"\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2032
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2033 // print out date stamp
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2034 time_t* tp=new(time_t);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2035 *tp=time(NULL);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2036 fprintf(outf,"DATE %s",ctime(tp));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2037 delete tp; tp = NULL; /* really? FS */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2038
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2039 // Print out some statistics of alignment
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2040 fprintf(outf,"LENG %i match states, %i columns in multiple alignment\n",L,l[L]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2041 fprintf(outf,"FILT %i out of %i sequences passed filter (-id %i -cov %i -qid %i -qsc %.2f -diff %i)\n",N_filtered,N_in,par.max_seqid,par.coverage,par.qid,par.qsc,par.Ndiff);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2042 fprintf(outf,"NEFF %-4.1f\n",Neff_HMM);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2043
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2044 // Print selected sequences from alignment (including secondary structure and confidence values, if known)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2045 fprintf(outf,"SEQ\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2046 for (int n=0; n<n_display; n++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2047 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2048 fprintf(outf,">%s\n",sname[n]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2049 //first sequence character starts at 1; 0 not used.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2050 for(unsigned int j=0; j<strlen(seq[n]+1); j+=SEQLEN) fprintf(outf,"%-.*s\n",SEQLEN,seq[n]+1+j);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2051 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2052 fprintf(outf,"#\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2053
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2054 // print null model background probabilities from substitution matrix
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2055 fprintf(outf,"NULL ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2056 for (a=0; a<20; ++a) fout(outf,-iround(fast_log2(pb[s2a[a]])*HMMSCALE ));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2057 fprintf(outf,"\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2058
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2059 // print table header line with amino acids
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2060 fprintf(outf,"HMM ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2061 for (a=0; a<20; ++a) fprintf(outf,"%1c\t",i2aa(s2a[a]));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2062 fprintf(outf,"\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2063
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2064 // print table header line with state transitions
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2065 fprintf(outf," M->M\tM->I\tM->D\tI->M\tI->I\tD->M\tD->D\tNeff\tNeff_I\tNeff_D\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2066
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2067 // print out transition probabilities from begin state (virtual match state)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2068 fprintf(outf," ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2069 for (a=0; a<=D2D; ++a) fout(outf,-iround(tr[0][a]*HMMSCALE));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2070 fout(outf,iround(Neff_M[0]*HMMSCALE));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2071 fout(outf,iround(Neff_I[0]*HMMSCALE));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2072 fout(outf,iround(Neff_D[0]*HMMSCALE));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2073 fprintf(outf,"\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2074
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2075 // Start loop for printing HMM columns
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2076 int h=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2077 for (i=1; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2078 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2079
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2080 while(islower(seq[nfirst][h]) && seq[nfirst][h]) h++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2081 fprintf(outf,"%1c %-4i ",seq[nfirst][h++],i);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2082
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2083 // Print emission probabilities for match state
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2084 for (a=0; a<20; ++a) fout(outf,-iround(fast_log2(p[i][s2a[a]])*HMMSCALE ));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2085 fprintf(outf,"%-i",l[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2086 fprintf(outf,"\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2087
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2088 // Print transition probabilities
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2089 fprintf(outf," ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2090 for (a=0; a<=D2D; ++a) fout(outf,-iround(tr[i][a]*HMMSCALE));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2091 fout(outf,iround(Neff_M[i]*HMMSCALE));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2092 fout(outf,iround(Neff_I[i]*HMMSCALE));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2093 fout(outf,iround(Neff_D[i]*HMMSCALE));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2094 fprintf(outf,"\n\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2095 } // end for(i)-loop for printing HMM columns
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2096
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2097 fprintf(outf,"//\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2098 fclose(outf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2099 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2100
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2101 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2102 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2103 * @brief Write HMM to output file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2104 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2105 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2106 HMM::InsertCalibration(char* infile)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2107 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2108 char* line = new(char[LINELEN]); // input line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2109 char** lines = new(char*[3*L+100000]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2110 int nline=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2111 int l;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2112 char done=0; // inserted new 'EVD mu sigma' line?
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2113
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2114 // Read from infile all lines and insert the EVD line with lamda and mu coefficients
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2115 ifstream inf;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2116 inf.open(infile, ios::in);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2117 if (!inf) OpenFileError(infile);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2118 if (v>=2) cout<<"Recording calibration coefficients in "<<infile<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2119
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2120 while (inf.getline(line,LINELEN) && !(line[0]=='/' && line[1]=='/') && nline<2*/*MAXRES*/par.maxResLen)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2121 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2122
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2123 // Found an EVD lamda mu line? -> remove
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2124 while (!done && !strncmp(line,"EVD ",3) && !(line[0]=='/' && line[1]=='/') && nline<2*/*MAXRES*/par.maxResLen)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2125 inf.getline(line,LINELEN);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2126 if ((line[0]=='/' && line[1]=='/') || nline>=2*/*MAXRES*/par.maxResLen)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2127 {fprintf(stderr,"Error: wrong format in %s. Expecting hhm format\n",infile); exit(1);}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2128
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2129 // Found the SEQ line? -> insert calibration before this line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2130 if (!done && (!strncmp("SEQ",line,3) || !strncmp("HMM",line,3)) && (isspace(line[3]) || line[3]=='\0'))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2131 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2132 done=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2133 lines[nline]=new(char[128]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2134 if (!lines[nline]) MemoryError("space to read in HHM file for calibration");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2135 sprintf(lines[nline],"EVD %-7.4f %-7.4f",lamda,mu);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2136 nline++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2137 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2138 lines[nline]=new(char[strlen(line)+1]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2139 if (!lines[nline]) MemoryError("space to read in HHM file for calibration");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2140 strcpy (lines[nline],line);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2141 nline++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2142 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2143 inf.close();
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2144
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2145 // Write to infile all lines
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2146 FILE* infout=fopen(infile,"w");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2147 if (!infout) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2148 cerr<<endl<<"WARNING in "<<program_name<<": no calibration coefficients written to "<<infile<<":\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2149 cerr<<"Could not open file for writing.\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2150 return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2151 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2152 for (l=0; l<nline; l++) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2153 fprintf(infout,"%s\n",lines[l]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2154 delete[] lines[l]; lines[l] = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2155 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2156 fprintf(infout,"//\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2157 fclose(infout);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2158 delete[] line; line = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2159 delete[] lines; lines = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2160 return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2161 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2162
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2163 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2164 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2165 * @brief Write HMM to output file in HMMER format
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2166 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2167 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2168 HMM::WriteToFileHMMER(char* outfile)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2169 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2170 const int INTSCALE=1000; //scaling factor in HMMER files
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2171 const float pBD=0.50;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2172 const int LOG2pBD=iround(fast_log2(pBD)*INTSCALE);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2173 const int LOG2pBM=iround(fast_log2(1-pBD)*INTSCALE);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2174 const float pJB=1.0/350;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2175 const int LOG2pJB=iround(fast_log2(pJB)*INTSCALE);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2176 const int LOG2pJJ=iround(fast_log2(1-pJB)*INTSCALE);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2177 const float pEJ=0.5;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2178 const int LOG2pEJ=iround(fast_log2(pEJ)*INTSCALE);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2179 const int LOG2pEC=iround(fast_log2(1-pEJ)*INTSCALE);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2180 char c;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2181 int i,a;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2182
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2183 if (trans_lin) {fprintf(stderr,"Error: Writing transition pseudocounts in linear representation not allowed. Please report this error to the HHsearch developers.\n"); exit(6);}
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2184
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2185 FILE *outf=NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2186 if (strcmp(outfile,"stdout"))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2187 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2188 if (par.append) outf=fopen(outfile,"a"); else outf=fopen(outfile,"w");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2189 if (!outf) OpenFileError(outfile);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2190 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2191 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2192 outf = stdout;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2193 if (v>=2) cout<<"Writing HMM to "<<outfile<<"\n";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2194
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2195 fprintf(outf,"HMMER2.0 [hhmake %s]\n",VERSION_AND_DATE);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2196 fprintf(outf,"NAME %s\n",file); // base name of alignment file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2197 fprintf(outf,"DESC %s\n",longname);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2198 fprintf(outf,"LENG %i\n",L);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2199 fprintf(outf,"ALPH Amino\n"); // amino acid seuqences (not DNA)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2200 fprintf(outf,"RF yes\n"); // reference annotation flag
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2201 fprintf(outf,"CS yes\n"); // consensus structure annotation flag
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2202 fprintf(outf,"MAP yes\n"); // write MA column number after each line of aa probabilities
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2203
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2204 fprintf(outf,"COM "); // print out command line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2205 for (i=0; i<=par.argc-1; ++i) fprintf(outf,"%s ",par.argv[i]); fprintf(outf,"\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2206
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2207 fprintf(outf,"NSEQ %i\n",N_filtered); // print number of sequences after filtering
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2208
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2209 // Date stamp
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2210 time_t* tp=new(time_t);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2211 *tp=time(NULL);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2212 fprintf(outf,"DATE %s",ctime(tp));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2213 delete tp; tp = NULL; /* really? FS */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2214
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2215 // Print out secondary structure
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2216 if (nss_dssp>=0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2217 fprintf(outf,"SSDSS %s\n",seq[nss_dssp]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2218 if (nsa_dssp>=0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2219 fprintf(outf,"SADSS %s\n",seq[nsa_dssp]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2220 if (nss_pred>=0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2221 fprintf(outf,"SSPRD %s\n",seq[nss_pred]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2222 if (nss_conf>=0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2223 fprintf(outf,"SSCNF %s\n",seq[nss_conf]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2224
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2225
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2226 // Special Plan7 transitions that control repeated detection of profile HMM within sequence
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2227 fprintf(outf,"XT %6i %6i %6i %6i %6i %6i %6i %6i\n",LOG2pJB,LOG2pJJ,LOG2pEC,LOG2pEJ,LOG2pJB,LOG2pJJ,LOG2pJB,LOG2pJJ);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2228 fprintf(outf,"NULT -4 -8455\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2229
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2230
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2231 // Null model background probabilities from substitution matrix
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2232 fprintf(outf,"NULE ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2233 for (a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2234 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2235 float lg2=fast_log2(pb[s2a[a]]*20.0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2236 if (lg2<-99.999) fprintf(outf," *"); else fprintf(outf," %6i",iround(lg2*INTSCALE));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2237 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2238 fprintf(outf,"\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2239
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2240 // Table header line with amino acids
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2241 fprintf(outf,"HMM ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2242 for (a=0; a<20; ++a) fprintf(outf," %1c ",i2aa(s2a[a]));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2243 fprintf(outf,"\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2244
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2245 // Table header line with state transitions
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2246 fprintf(outf," m->m m->i m->d i->m i->i d->m d->d b->m m->e\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2247
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2248 // Transition probabilities from begin state
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2249 fprintf(outf," %6i * %6i\n",LOG2pBM,LOG2pBD);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2250
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2251 // Start loop for printing HMM columns
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2252 int h=1, hss=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2253 for (i=1; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2254 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2255
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2256 // Emission probabilities for match state
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2257 fprintf(outf," %5i",i);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2258 for (a=0; a<20; ++a) fprintf(outf," %6i",imax(-9999,iround(fast_log2(p[i][s2a[a]]/pb[s2a[a]])*INTSCALE)));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2259 fprintf(outf," %5i",l[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2260 fprintf(outf,"\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2261
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2262 // Emission probabilities (relative to null model) for insert state
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2263 while(islower(seq[nfirst][h]) && seq[nfirst][h]) h++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2264 if (i==L)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2265 fprintf(outf," %1c * * * * * * * * * * * * * * * * * * * *\n",seq[nfirst][h++]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2266 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2267 fprintf(outf," %1c 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",seq[nfirst][h++]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2268
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2269 // Transition probabilities
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2270 if (nss_dssp>=0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2271 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2272 while(islower(seq[nss_dssp][hss]) && seq[nss_dssp][hss]) hss++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2273 c=seq[nss_dssp][hss++];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2274 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2275 else c=' ';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2276 fprintf(outf," %1c",c);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2277 if (i==1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2278 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2279 for (a=0; a<=D2D; ++a) fprintf(outf," %6i",imax(-9999,iround(tr[i][a]*INTSCALE)));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2280 fprintf(outf," %6i *\n",LOG2pBM);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2281 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2282 else if (i==L)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2283 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2284 for (a=0; a<=D2D; ++a) fprintf(outf," *");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2285 fprintf(outf," * 0\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2286 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2287 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2288 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2289 for (a=0; a<=D2D; ++a) fprintf(outf," %6i",imax(-9999,iround(tr[i][a]*INTSCALE)));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2290 fprintf(outf," * *\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2291 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2292 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2293
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2294 fprintf(outf,"//\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2295 fclose(outf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2296 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2297
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2298
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2299 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2300 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2301 * @brief Transform log to lin transition probs
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2302 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2303 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2304 HMM::Log2LinTransitionProbs(float beta)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2305 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2306 if (trans_lin==1) return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2307 trans_lin=1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2308 for (int i=0; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2309 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2310 for (int a=0; a<NTRANS; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2311 tr[i][a] = fpow2(beta*tr[i][a]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2312 /* FIXME valgrind says: "Conditional jump or move depends on
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2313 * uninitialised value(s)" when using hmm iteration
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2314 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2315 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2316 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2317
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2318
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2319 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2320 * @brief Set query columns in His-tags etc to Null model distribution
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2321 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2322 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2323 HMM::NeutralizeTags()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2324 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2325 char* qseq = seq[nfirst];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2326 char* pt;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2327 int a,i;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2328
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2329 if (NULL == qseq){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2330 return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2331 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2332
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2333 // Neutralize His tag
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2334 if ( (pt=strstr(qseq,"HHHHH")) )
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2335 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2336 int i0 = pt-qseq+1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2337 if (v>=2) printf("Neutralized His-tag at position %i\n",i0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2338 for (i=imax(i0-5,1); i<i0; ++i) // neutralize leading 5 columns
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2339 for (a=0; a<NAA; ++a) p[i][a]=pb[a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2340 for (; (*pt)!='H'; ++i,++pt) // neutralize His columns
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2341 for (a=0; a<NAA; ++a) p[i][a]=pb[a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2342 i0=i;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2343 for (; i<imin(i0+5,L+1); ++i) // neutralize trailing 5 columns
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2344 for (a=0; a<NAA; ++a) p[i][a]=pb[a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2345 if (v>=3) printf("start:%i end:%i\n",imax(i0-5,1),i-1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2346 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2347
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2348 // Neutralize C-myc tag
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2349 if ( (pt=strstr(qseq,"EQKLISEEDL")) )
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2350 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2351 if (v>=2) printf("Neutralized C-myc-tag at position %i\n",int(pt-qseq)+1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2352 for (i=pt-qseq+1; i<=pt-qseq+10; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2353 for (a=0; a<NAA; ++a) p[i][a]=pb[a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2354 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2355 // Neutralize FLAG tag
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2356 if ( (pt=strstr(qseq,"DYKDDDDK")) )
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2357 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2358 if (v>=2) printf("Neutralized FLAG-tag at position %i\n",int(pt-qseq)+1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2359 for (i=pt-qseq+1; i<=pt-qseq+8; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2360 for (a=0; a<NAA; ++a) p[i][a]=pb[a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2361 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2362 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2363
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2364
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2365
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2366 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2367 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2368 * @brief Calculate effective number of sequences using profiles INCLUDING pseudocounts
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2369 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2370 float
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2371 HMM::CalcNeff()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2372 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2373 float Neff=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2374 for (int i=1; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2375 for (int a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2376 if (p[i][a]>1E-10) Neff-=p[i][a]*fast_log2(p[i][a]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2377 return fpow2(Neff/L);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2378 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2379
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2380
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2381 /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2382 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2383 * @brief Calculate consensus of HMM (needed to merge HMMs later)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2384 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2385 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2386 HMM::CalculateConsensus()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2387 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2388 int i; // position in query
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2389 int a; // amino acid
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2390 if (!Xcons) Xcons = new char[/*MAXRES*/par.maxResLen+2];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2391 for (i=1; i<=L; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2392 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2393 float max=f[i][0]-pb[0];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2394 for (a=1; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2395 if (f[i][a]-pb[a]>max) Xcons[i]=a;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2396 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2397 Xcons[0]=Xcons[L+1]=ENDGAP;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2398 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2399
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2400 // /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2401 // // Store linear transition probabilities
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2402 // /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2403 // void HMM::StoreLinearTransitionProbs()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2404 // {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2405 // int i; // position in query
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2406 // for (i=0; i<=L+1; ++i) if (!tr_lin[i]) tr_lin[i] = new(float[NTRANS]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2407 // for (i=0; i<=L+1; ++i)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2408 // {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2409 // tr_lin[i][M2M] = fpow2(tr[i][M2M]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2410 // tr_lin[i][M2I] = fpow2(tr[i][M2I]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2411 // tr_lin[i][M2D] = fpow2(tr[i][M2D]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2412 // tr_lin[i][D2M] = fpow2(tr[i][M2D]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2413 // tr_lin[i][D2D] = fpow2(tr[i][D2D]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2414 // tr_lin[i][I2M] = fpow2(tr[i][I2M]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2415 // tr_lin[i][I2I] = fpow2(tr[i][I2I]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2416 // }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2417 // }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2418
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2419
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2420 // #define Weff(Neff) (1.0+par.neffa*(Neff-1.0)+(par.neffb-4.0*par.neffa)/16.0*(Neff-1.0)*(Neff-1.0))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2421
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2422 // /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2423 // // Initialize f[i][a] with query HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2424 // /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2425 // void HMM::MergeQueryHMM(HMM& q, float wk[])
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2426 // {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2427 // int i; // position in query
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2428 // int a; // amino acid
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2429 // float Weff_M, Weff_D, Weff_I;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2430 // for (i=1; i<=L; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2431 // {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2432 // Weff_M = Weff(q.Neff_M[i]-1.0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2433 // Weff_D = Weff(q.Neff_D[i]-1.0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2434 // Weff_I = Weff(q.Neff_I[i]-1.0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2435 // for (a=0; a<20; a++) f[i][a] = q.f[i][a]*wk[i]*Weff_M;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2436 // tr_lin[i][M2M] = q.tr_lin[i][M2M]*wk[i]*Weff_M;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2437 // tr_lin[i][M2I] = q.tr_lin[i][M2I]*wk[i]*Weff_M;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2438 // tr_lin[i][M2D] = q.tr_lin[i][M2D]*wk[i]*Weff_M;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2439 // tr_lin[i][D2M] = q.tr_lin[i][D2M]*wk[i]*Weff_D;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2440 // tr_lin[i][D2D] = q.tr_lin[i][D2D]*wk[i]*Weff_D;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2441 // tr_lin[i][I2M] = q.tr_lin[i][I2M]*wk[i]*Weff_I;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2442 // tr_lin[i][I2I] = q.tr_lin[i][I2I]*wk[i]*Weff_I;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2443 // }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2444 // }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2445
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2446
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2447
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2448 // /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2449 // // Normalize probabilities in total merged super-HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2450 // /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2451 // void HMM::NormalizeHMMandTransitionsLin2Log()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2452 // {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2453 // int i; // position in query
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2454 // int a; // amino acid
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2455 // for (i=0; i<=L+1; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2456 // {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2457 // float sum=0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2458 // for (a=0; a<20; a++) sum += f[i][a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2459 // for (a=0; a<20; a++) f[i][a]/=sum;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2460 // sum = tr_lin[i][M2M] + tr_lin[i][M2I] + tr_lin[i][M2D];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2461 // tr_lin[i][M2M] /= sum;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2462 // tr_lin[i][M2I] /= sum;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2463 // tr_lin[i][M2D] /= sum;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2464 // tr[i][M2M] = fast_log2(tr_lin[i][M2M]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2465 // tr[i][M2I] = fast_log2(tr_lin[i][M2I]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2466 // tr[i][M2D] = fast_log2(tr_lin[i][M2D]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2467 // sum = tr_lin[i][D2M] + tr_lin[i][D2D];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2468 // tr_lin[i][D2M] /= sum;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2469 // tr_lin[i][D2D] /= sum;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2470 // tr[i][D2M] = fast_log2(tr_lin[i][D2M]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2471 // tr[i][D2D] = fast_log2(tr_lin[i][D2D]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2472 // sum = tr_lin[i][I2M] + tr_lin[i][I2I];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2473 // tr_lin[i][I2M] /= sum;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2474 // tr_lin[i][I2I] /= sum;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2475 // tr[i][I2M] = fast_log2(tr_lin[i][I2M]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2476 // tr[i][I2I] = fast_log2(tr_lin[i][I2I]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2477 // }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2478 // }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2479
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2480
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2481 // UNCOMMENT TO ACTIVATE COMPOSITIONALLY BIASED PSEUDOCOUNTS BY RESCALING THE RATE MATRIX
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2482
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2483 // /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2484 // //// Function to minimize
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2485 // /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2486 // double RescaleMatrixFunc(double x[])
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2487 // {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2488 // double sum=0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2489 // for (int a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2490 // {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2491 // double za=0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2492 // for (int b=0; b<20; ++b) za+=P[a][b]*x[b];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2493 // sum += (x[a]*za-qav[a])*(x[a]*za-qav[a]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2494 // }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2495 // return sum;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2496 // }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2497
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2498 // /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2499 // //// Gradient of function to minimize
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2500 // /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2501 // void RescaleMatrixFuncGrad(double grad[], double x[])
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2502 // {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2503 // double z[20] = {0.0};
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2504 // double w[20];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2505 // double tmp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2506 // for (int a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2507 // for (int b=0; b<20; ++b) z[a] += P[a][b]*x[b];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2508
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2509 // for (int a=0; a<20; ++a) w[a] = x[a]*z[a]-qav[a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2510 // for (int a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2511 // {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2512 // tmp = w[a]*z[a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2513 // for (int b=0; b<20; ++b) tmp += P[a][b]*x[b]*w[b];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2514 // grad[a] = 2.0*tmp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2515 // }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2516 // return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2517 // }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2518
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2519
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2520 // /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2521 // //// Rescale a substitution matrix to biased aa frequencies in global vector qav[a]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2522 // /////////////////////////////////////////////////////////////////////////////////////
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2523 // void HMM::RescaleMatrix()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2524 // {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2525 // int a,b;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2526 // int code;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2527 // double x[21]; // scaling factor
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2528 // double val_min;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2529 // const int len=20;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2530 // const int max_iterations=50;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2531
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2532 // if (v>=2) printf("Adjusting rate matrix to query amino acid composition ...\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2533
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2534 // // Put amino acid frequencies into global array (needed to call WNLIB's conjugate gradient method)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2535 // for (a=0; a<20; ++a) qav[a] = pav[a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2536
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2537 // // Initialize scaling factors x[a]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2538 // for (a=0; a<20; ++a) x[a]=pow(qav[a]/pb[a],0.73); // Initialize
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2539
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2540 // // Call conjugate gradient minimization method from WNLIB
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2541 // wn_conj_gradient_method(&code,&val_min,x,len,&RescaleMatrixFunc,&RescaleMatrixFuncGrad,max_iterations);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2542
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2543
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2544 // // Calculate z[a] = sum_b Pab*xb
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2545 // float sum_err=0.0f;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2546 // float sum = 0.0f;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2547 // for (a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2548 // {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2549 // float za=0.0f; // za = sum_b Pab*xb
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2550 // for (b=0; b<20; ++b) za+=P[a][b]*x[b];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2551 // sum_err += (x[a]*za/qav[a]-1)*(x[a]*za/qav[a]-1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2552 // sum += x[a]*za;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2553 // }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2554 // if (sum_err>1e-3 & v>=1) fprintf(stderr,"WARNING: adjusting rate matrix by CG resulted in residual error of %5.3f.\n",sum_err);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2555
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2556 // // Rescale rate matrix
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2557 // for (a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2558 // for (b=0; b<20; ++b)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2559 // {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2560 // P[a][b] *= x[a]*x[b]/sum;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2561 // R[a][b] = P[a][b]/qav[b];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2562 // }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2563
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2564 // // How well approximated?
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2565 // if (v>=3)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2566 // {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2567 // // Calculate z[a] = sum_b Pab*xb
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2568 // float z[21];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2569 // for (a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2570 // for (z[a]=0.0, b=0; b<20; ++b) z[a]+=P[a][b];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2571 // printf("Adjust A R N D C Q E G H I L K M F P S T W Y V\nErr? ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2572 // for (a=0; a<20; ++a) printf("%4.0f ",1000*z[a]/qav[a]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2573 // cout<<endl<<"xa ";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2574 // for (a=0; a<20; ++a) fprintf(stdout,"%4.2f ",x[a]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2575 // cout<<endl;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2576 // }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2577
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2578 // // Evaluate sequence identity underlying substitution matrix
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2579 // if (v>=3)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2580 // {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2581 // float id=0.0f;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2582 // float entropy=0.0f;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2583 // float entropy_qav=0.0f;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2584 // float mut_info=0.0f;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2585 // for (a=0; a<20; ++a) id += P[a][a];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2586 // for (a=0; a<20; ++a) entropy_qav-=qav[a]*fast_log2(qav[a]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2587 // for (a=0; a<20; ++a)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2588 // for (b=0; b<20; ++b)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2589 // {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2590 // entropy-=P[a][b]*fast_log2(R[a][b]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2591 // mut_info += P[a][b]*fast_log2(P[a][b]/qav[a]/qav[b]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2592 // }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2593
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2594 // fprintf(stdout,"Rescaling rate matrix: sequence identity = %2.0f%%; entropy per column = %4.2f bits (out of %4.2f); mutual information = %4.2f bits\n",100*id,entropy,entropy_qav,mut_info);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2595 // }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2596 // return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2597 // }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2598
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2599
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2600 /* @* HMM::ClobberGlobal (eg, q,t)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2601 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2602 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2603 HMM::ClobberGlobal(void){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2604
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2605 for (int i = 0; i < n_display; i++){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2606 if (sname[i]){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2607 delete[] sname[i]; sname[i] = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2608 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2609 if (seq[i]){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2610 delete[] seq[i]; seq[i] = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2611 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2612 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2613 Neff_M[0] = Neff_I[0] = Neff_D[0] = 0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2614 longname[0] = '\0'; file[0] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2615 ss_dssp[0] = sa_dssp[0] = ss_pred[0] = ss_conf[0] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2616 Xcons = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2617 l[0] = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2618 L = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2619 Neff_HMM = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2620 n_display = N_in = N_filtered = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2621 nss_dssp = nsa_dssp = nss_pred = nss_conf = nfirst = ncons = -1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2622 lamda = 0.0; mu = 0.0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2623 name[0] = longname[0] = fam[0] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2624
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2625 for (int i = 0; i < NAA; i++){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2626 pav[i] = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2627 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2628
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2629 /* @= */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2630 return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2631
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2632 } /* this is the end of ClobberGlobal() */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2633
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2634
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2635 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2636 * EOF hhhmm-C.h
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2637 */