comparison clustalomega/clustal-omega-0.2.0/src/hhalign/hhfunc-C.h @ 0:ff1768533a07

Migrated tool version 0.2 from old tool shed archive to new tool shed repository
author clustalomega
date Tue, 07 Jun 2011 17:04:25 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:ff1768533a07
1 /* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
3 /*********************************************************************
4 * Clustal Omega - Multiple sequence alignment
5 *
6 * Copyright (C) 2010 University College Dublin
7 *
8 * Clustal-Omega is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
12 *
13 * This file is part of Clustal-Omega.
14 *
15 ********************************************************************/
16
17 /*
18 * RCS $Id: hhfunc-C.h 199 2011-02-21 18:24:49Z fabian $
19 */
20
21 /*
22 * Changelog: Michael Remmert made changes to hhalign stand-alone code
23 * FS implemented some of the changes on 2010-11-11 -> MR1
24 *
25 * did not incorporate SS prediction PSIpred (yet); functions are:
26 * CalculateSS(3),
27 */
28
29
30 // hhfunc.C
31
32
33 /**
34 * AddBackgroundFrequencies()
35 *
36 * @brief add background frequencies (derived from HMM) to
37 * sequence/profile
38 *
39 * @param[in,out] ppfFreq,
40 * [in] residue frequencies of sequence/profile,
41 * [out] overlayed with HMM background frequencies
42 * @param[in,out] ppfPseudoF,
43 * [in] residue frequencies+pseudocounts of sequence/profile,
44 * [out] overlayed with HMM background frequencies+pseudocounts
45 * @param[in] iSeqLen,
46 * length of sequence/profile (not aligned to HMM)
47 * @pram[in] prHMM,
48 * background HMM
49 * @param[in] ppcSeq,
50 * sequences/profile to be 'softened'
51 * @param[in] pcPrealigned,
52 * sequence aligned to HMM, this is not quite a consensus,
53 * it is identical to 1st sequence but over-writes gap information,
54 * if other sequences in profile have (non-gap) residues
55 * @param[in] iPreCnt
56 * number of sequences pre-aligned (pcPrealigned is 'consensus' of these sequences)
57 * @param[in] pcRepresent
58 * sequence representative of HMM, aligned to pcSeq0
59 */
60 void
61 AddBackgroundFrequencies(float **ppfFreq, float **ppfPseudoF, float **ppfPseudoTr,
62 int iSeqLen, hmm_light *prHMM,
63 char **ppcSeq, char *pcPrealigned, int iPreCnt, char *pcRepresent)
64 {
65
66 char *pcS = pcPrealigned; /* current residue in pre-aligned sequence */
67 char *pcH = pcRepresent; /* current residue in pre-aligned HMM */
68 int iS = 0; /* position in un-aligned sequence (corresponds to pcS) */
69 int iH = 0; /* position in un-aligned HMM (corresponds to pcH) */
70 int iA; /* residue iterator */
71 int iT; /* transition state iterator */
72 float fFWeight = 0.50 / sqrt((float)(iPreCnt)); /* weight of backgroud frequencies */ /* FIXME: tune value, 0.50 default */
73 //float fFWeight = 0.75;
74 float fFThgiew = UNITY - fFWeight; /* weight of 'true' frequencies */
75 float fGWeight = 0.50 / sqrt((float)(iPreCnt)); /* weight of backgroud transitions */ /* FIXME: tune value, 0.50 default */
76 //float fGWeight = 0.50 /*/ (float)(iPreCnt)*/; /* weight of backgroud transitions */ /* FIXME: tune value, 0.50 default */
77 float fGThgiew = UNITY - fGWeight; /* weight of 'true' transitions */
78 float fAux;
79 /* zf1SeqTrans[] are the (phenomenological) transition probabilities (+ pseudo-counts)
80 for a single sequence. It is almost certain (0.99) to stay in a match state, and very
81 unlikely (0.05) to go from match to insertion/deletion */
82 float zf1SeqTrans[] = {0.98, 0.01, 0.01, 0.25, 0.75, 0.25, 0.75};
83 float zf1SeqInit[] = {0.98, 0.01, 0.01, 0.99, 0.01, 0.99, 0.01};
84 float zf1SeqDel[] = {0.24, 0.01, 0.75, 0.01, 0.01, 0.01, 0.01};
85 float zf1SeqRevrt[] = {0.98, 0.01, 0.01, 0.01, 0.01, 0.99, 0.01};
86
87 if ( (NULL == pcPrealigned) || (NULL == pcRepresent) ){
88 /*printf("%s/%s:%d: WARNING HMM=NULL -- didn't think I would get here (carry on, no danger)\n",
89 __FUNCTION__, __FILE__, __LINE__);*/
90 return;
91 }
92
93 if (NULL == prHMM->p){
94 printf("%s:%s:%d: WARNING -- Background Pseudocounts point to NULL\n"
95 "\tthis is not intended - don't add background but CONTINUE\n",
96 __FUNCTION__, __FILE__, __LINE__);
97 return;
98
99 }
100
101 /* FIXME: should be 0 (FS thinks) but -1 gives better results */
102 iH = iS = 0/*-1*//*+1*/;
103 while ( ('\0' != *pcS) && ('\0' != *pcH) ){
104
105 if ( ('-' != *pcH) && ('-' != *pcS) ){
106 iH++;
107 iS++;
108
109 #if 0
110 /* match state
111 * - HMM had a gap in previous position (now closed)
112 * FIXME: this does not really work */
113 if ((iH > 0) && ('-' == *(pcH-1))){
114
115 for (iT = 0; iT < STATE_TRANSITIONS; iT++){
116 ppfPseudoTr[iS-1][iT] = log2f(zf1SeqRevrt[iT]);
117 }
118 }
119 #endif
120
121 #if 1
122 /* do frequencies -- this is not really useful;
123 frequencies are derived from HMM
124 and contain already pseudocounts (PCs),
125 adding frequencies and then PCs will add PCs _twice_
126 results are better than not to add them,
127 but not as good as PCs */
128 for (iA = 0; iA < AMINOACIDS; iA++){
129 fAux = fFThgiew * ppfFreq[iS][iA] + fFWeight * prHMM->f[iH][iA];
130 ppfFreq[iS][iA] = fAux;
131 }
132 #endif
133 /* do pseudo-counts */
134 for (iA = 0; iA < AMINOACIDS; iA++){
135 fAux =
136 fFThgiew * ppfPseudoF[iS][iA] + fFWeight * prHMM->p[iH][iA];
137 ppfPseudoF[iS][iA] = fAux;
138 }
139 #if 0 /* do state transitions */
140 for (iT = 0; iT < STATE_TRANSITIONS; iT++){
141 #if 1
142 /* this is a very crude method,
143 which averages the logarithms of the transitions,
144 effectively performing a geometric mean -
145 this presumably violates normalisation.
146 however, results are surprisingly good */
147 fAux =
148 fGThgiew * ppfPseudoTr[iS][iT] +
149 fGWeight * prHMM->tr[iH][iT];
150 ppfPseudoTr[iS][iT] = fAux;
151 #else /* crude averaging */
152 /* There are 2 things to consider:
153 (1) one should really blend the probabilities of
154 the transitions, however, by default we have
155 the logarithms thereof.
156 So must exponentiate, blend, and then take log again.
157 This is expensive, and does not seem to lead to better
158 results than blending the logarithms
159 (and violating normalisation)
160 (2) transition probabilities for a single sequence
161 are really easy, there are no insert/delete transitions.
162 However, there is a begin state that is different
163 from the main body.
164 But again, this does not seem to make a blind bit
165 of difference
166 */
167 if (iS > 0){
168 fAux =
169 fGThgiew * zf1SeqTrans[iT] +
170 fGWeight * prHMM->linTr[iH][iT];
171 ppfPseudoTr[iS][iT] = log2f(fAux);
172 }
173 else {
174 fAux =
175 fGThgiew * zf1SeqInit[iT] +
176 fGWeight * prHMM->linTr[iH][iT];
177 ppfPseudoTr[iS][iT] = log2f(fAux);
178 }
179 #endif /* mixing of linear/log */
180 } /* 0 <= iT < STATE_TRANSITIONS */
181 #endif /* did state transitions */
182 } /* was Match -- neither HMM nor sequence have gap */
183
184 else if ('-' == *pcH){
185 /* sequence opened up gap in HMM */
186 #if 0
187 if ((iH > 0) && ('-' != *(pcH-1)) && (iS > 0)){
188 /* this is the first gap in HMM
189 * FIXME: this does not really work */
190 for (iT = 0; iT < STATE_TRANSITIONS; iT++){
191 ppfPseudoTr[iS-1][iT] = log2f(zf1SeqDel[iT]);
192 }
193 }
194 else {
195 /* do nothing, keep single sequence values exactly as they are*/
196 }
197 #endif
198 iS++;
199 }
200 else if ('-' == *pcS){
201 /* here the single sequence has a gap,
202 and the HMM (as a whole) does not. There may be individual gaps
203 in the HMM at this stage. By ignoring this we say that the HMM
204 dominates the overall behaviour - as in the M2M state as well
205 */
206 iH++;
207 }
208
209 pcH++;
210 pcS++;
211
212 } /* !EO[seq/hmm] */
213
214 return;
215
216 } /* this is the end of AddBackgroundFrequencies() */
217
218
219
220 /**
221 * ReadAndPrepare()
222 *
223 * @brief Read HMM input file or transfer alignment
224 * and add pseudocounts etc.
225 *
226 * @param[in] iRnPtype
227 * type of read/prepare
228 * enum {INTERN_ALN_2_HMM = 0, READ_ALN_2_HMM, READ_HMM_2_HMM};
229 * @param[in] ppcProf
230 * alignment
231 * @param[in] iCnt
232 * number of seqs in alignment
233 * @param[in,out] prHMM,
234 * [in] if sequences read/prepared, [out] if HMM from file
235 * @param[in] pcPrealigned,
236 * (single) sequence aligned to background HMM
237 * @param[in] pcRepresent,
238 * sequence representing HMM aligned to individual sequence
239 * param[in] pdExWeights
240 * (external) sequence weights, derived from tree
241 * @param[in] infile
242 * name of file with HMM info (formerly also alignment)
243 * @param[out] q
244 * HMM structure with transition probabilities, residue frequencies etc
245 * @param[???] qali
246 * FIXME: what is qali?
247 *
248 * @return FAILURE on error, OK otherwise
249 */
250 int
251 ReadAndPrepare(int iRnPtype,
252 char **ppcProf, int iCnt, hmm_light *prHMM,
253 char *pcPrealigned, char *pcRepresent, double *pdExWeights,
254 char* infile, HMM& q, Alignment* qali=NULL) {
255
256 //#ifndef CLUSTALO_NOFILE
257 char path[NAMELEN];
258
259 /* NOTE: there are different scenarios:
260
261 (i) ("" != infile) - read HMM from file,
262 transfer frequency/transition/pseudo-count (f/t/p) info into prHMM
263
264 (ii) ('\0' != ppcProf[0]) - transfer sequence/alignment into q/qali,
265 don't save f/t/p into prHMM,
266 on the contrary, if prior f/t/p available then add it to q/qali,
267 this is only done if (1==iCnt)
268
269 (iii) ('\0' == ppcProf[0]) - re-cycle old HMM information
270 recreate a HMM from previous data
271 */
272
273 /********************************/
274 /*** (o) Recycle internal HMM ***/
275 /********************************/
276 if ( (INTERN_ALN_2_HMM == iRnPtype) && (iCnt <= 0) ){
277
278 /* NOTE: here we are writing into a HMM structure/class;
279 memory has been allocated for this in hhalign.cpp;
280 however, as iCnt<=0, there may not be memory for
281 prHMM->n_display sequences/names.
282 But then, there doesn't have to be.
283 At this stage we are just copying one HMM into another HMM,
284 sequences are irrelevant. The only sequence of (marginal)
285 interest is the consensus sequence */
286 /* FIXME: check that prHMM is valid -- how? */
287
288 const int ciCons = 0;
289 const int ciNoof = ciCons+1;
290 const int ciInvalid = -1;
291 q.n_display = ciNoof; /* only consensus */
292 q.sname = NULL;
293 q.ncons = ciCons;
294 q.nfirst = ciCons;//prHMM->nfirst;
295 q.nss_dssp = ciInvalid;//prHMM->nss_dssp;
296 q.nsa_dssp = ciInvalid;//prHMM->nsa_dssp;
297 q.nss_pred = ciInvalid;//prHMM->nss_pred;
298 q.nss_conf = ciInvalid;//prHMM->nss_conf;
299 q.L = prHMM->L;
300 q.N_in = prHMM->N_in;
301 q.N_filtered = prHMM->N_filtered;
302 /* NOTE: I (FS) think that only ever 1 sequence will be transferred here,
303 that is, the consensus sequence. However, we might want to allow
304 (in the future) to transfer more sequences,
305 hence the awkward for() loop */
306 #if 0
307 for (int i = prHMM->ncons+0; i < prHMM->ncons+q.n_display; i++){
308 /* NOTE: In the original hhalign code the first position
309 is kept open ('\0'). This makes it difficult to use
310 string functions like strlen/strdup etc.
311 Insert a temporary gap (.) to facilitate string operations */
312 char cHead = prHMM->seq[i][0];
313 prHMM->seq[i][0] = '.';
314 q.seq[i] = strdup(prHMM->seq[i]);
315 prHMM->seq[i][0] = q.seq[i][0] = cHead;
316 }
317 #else
318 {
319 char cHead = prHMM->seq[prHMM->ncons][0];
320 prHMM->seq[prHMM->ncons][0] = '.';
321 q.seq[q.ncons] = strdup(prHMM->seq[prHMM->ncons]);
322 prHMM->seq[prHMM->ncons][0] = q.seq[q.ncons][0] = cHead;
323 }
324 #endif
325 const int NEFF_SCORE = 10; /* FIXME: Magic Number */
326 for (int i = 0; i < prHMM->L+1; i++){
327 q.Neff_M[i] = prHMM->Neff_M[i];
328 q.Neff_I[i] = prHMM->Neff_I[i];
329 q.Neff_D[i] = prHMM->Neff_D[i];
330 }
331 q.Neff_HMM = prHMM->Neff_HMM;
332 /* skip longname,name,file,fam,sfam,fold,cl */
333 q.lamda = prHMM->lamda;
334 q.mu = prHMM->mu;
335
336 HMMshadow rShadow = {0}; /* friend of HMM to access private members */
337 rShadow.copyShadowToHMM(q, *prHMM);
338
339 /* skip trans_lin,ss_dssp,sa_dssp,ss_pred,ss_conf,Xcons */
340 /* pav already done in copyShadowToHMM */
341 /* skip pnul */
342
343 return OK;
344
345
346 } /* INTERN_ALN_2_HMM && iCnt<=0 */
347
348 /******************************/
349 /*** (i) Read HMM from file ***/
350 /******************************/
351 char line[LINELEN] = {0}; // input line
352 FILE *inf = NULL;
353 //if ( (0 != strcmp(infile,"")) /*&& (iCnt > 0)*/ )
354 if ( (READ_HMM_2_HMM == iRnPtype) || (READ_ALN_2_HMM == iRnPtype)) {
355
356 if (0 == strcmp(infile,"")){
357 printf("%s:%s:%d:\n"
358 "\texpected to re %s from file but no file specified\n"
359 ""
360 , __FUNCTION__, __FILE__, __LINE__
361 , (READ_HMM_2_HMM==iRnPtype?"HMM":"alignment"));
362 return FAILURE;
363 }
364 inf = fopen(infile, "r");
365 if (!inf) OpenFileError(infile);
366 Pathname(path,infile);
367
368 //}
369 //else {
370 //inf = stdin;
371 //if (v>=2) printf("Reading HMM / multiple alignment from standard input ...\n(To get a help list instead, quit and type %s -h.)\n",program_name);
372 //*path='\0';
373 //}
374
375 fgetline(line,LINELEN-1,inf);
376 }
377
378 //if ( (0 != strcmp(infile,"")) && (iCnt > 0) )
379 if ( (READ_HMM_2_HMM == iRnPtype) ){
380
381 if (0 == strcmp(infile,"")){
382 printf("%s:%s:%d: expected to read HMM from file but no file-name\n",
383 __FUNCTION__, __FILE__, __LINE__);
384 }
385
386 // Is infile a HMMER3 file? /* MR1 */
387 if (!strncmp(line,"HMMER3",6))
388 {
389 if (v>=2) {
390 cout<<"Query file is in HMMER3 format\n";
391 cout<<"WARNING! Use of HMMER3 format as input results in dramatically loss of sensitivity!\n";
392 }
393
394 // Read 'query HMMER file
395 rewind(inf);
396 q.ReadHMMer3(inf,path);
397
398 // Don't add transition pseudocounts to query!!
399
400 // Generate an amino acid frequency matrix from f[i][a] with full pseudocount admixture (tau=1) -> g[i][a]
401 q.PreparePseudocounts();
402
403 // DON'T ADD amino acid pseudocounts to query: pcm=0! q.p[i][a] = f[i][a]
404 q.AddAminoAcidPseudocounts(0, par.pca, par.pcb, par.pcc);
405
406 /* further down there is a q.Log2LinTransitionProbs
407 but only if (iCnt>0), however, we still need it it here (i think),
408 there is no danger of doing this twice, as trans_lin is checked
409 FIXME (FS, 2011-01-12) */
410 /* further down there is a q.Log2LinTransitionProbs
411 but only if (iCnt>0), however, we still need it it here (i think),
412 there is no danger of doing this twice, as trans_lin is checked
413 FIXME (FS, 2011-01-12) */
414 //if (par.forward >= 1)
415 {
416 q.Log2LinTransitionProbs(1.0);
417 }
418
419 }
420 // ... or Is infile an old HMMER file?
421 else if (!strncmp(line,"HMMER",5)) {
422 if (v>=2) {
423 cout<<"Query file is in HMMER format\n";
424 cout<<"WARNING! Use of HMMER format as input results in dramatically loss of sensitivity!\n";
425 }
426
427 // Read 'query HMMER file
428 q.ReadHMMer(inf,path);
429 if (v>=2 && q.Neff_HMM>11.0)
430 fprintf(stderr,"WARNING: HMM %s looks too diverse (Neff=%.1f>11). Better check the underlying alignment... \n",q.name,q.Neff_HMM);
431
432 // Don't add transition pseudocounts to query!!
433
434 // Generate an amino acid frequency matrix from f[i][a] with full pseudocount admixture (tau=1) -> g[i][a]
435 q.PreparePseudocounts();
436
437 // DON'T ADD amino acid pseudocounts to query: pcm=0! q.p[i][a] = f[i][a]
438 q.AddAminoAcidPseudocounts(0, par.pca, par.pcb, par.pcc);
439
440 /* further down there is a q.Log2LinTransitionProbs
441 but only if (iCnt>0), however, we still need it it here (i think),
442 there is no danger of doing this twice, as trans_lin is checked
443 FIXME (FS, 2011-01-12) */
444 //if (par.forward >= 1)
445 {
446 q.Log2LinTransitionProbs(1.0);
447 }
448
449 } /* it was a HMMer file */
450
451 // ... or is it an hhm file?
452 else if (!strncmp(line,"NAME",4) || !strncmp(line,"HH",2)) {
453
454 if (v>=2) cout<<"Query file is in HHM format\n";
455
456 // Rewind to beginning of line and read query hhm file
457 rewind(inf);
458 q.Read(inf,path);
459 if (v>=2 && q.Neff_HMM>11.0)
460 fprintf(stderr,"WARNING: HMM %s looks too diverse (Neff=%.1f>11). Better check the underlying alignment... \n",q.name,q.Neff_HMM);
461
462 // Add transition pseudocounts to query -> q.p[i][a]
463 q.AddTransitionPseudocounts();
464
465 // Generate an amino acid frequency matrix from f[i][a] with full pseudocount admixture (tau=1) -> g[i][a]
466 q.PreparePseudocounts();
467
468 // Add amino acid pseudocounts to query: q.p[i][a] = (1-tau)*f[i][a] + tau*g[i][a]
469 q.AddAminoAcidPseudocounts();
470
471 } /* it was a HHM file */
472 else {
473 fprintf(stderr, "%s:%s:%d: Unknown HMM format in infile\n"
474 "infile=%s, #seq=%d\n"
475 , __FUNCTION__, __FILE__, __LINE__
476 , infile, iCnt);
477 return FAILURE;
478 }
479
480 /*fclose(inf);*/
481
482 /*** transfer class info to struct */
483 prHMM->n_display = q.n_display;
484 /* ignore sname*/
485 prHMM->seq = (char **)calloc((q.n_display+1), sizeof(char *));
486 /* FIXME valgrind says bytes get lost in the above calloc during
487 * hmm-iteration
488 */
489 for (int i = 0; i < q.n_display; i++){
490 /* NOTE: In the original hhalign code the first position
491 is kept open ('\0'). This makes it difficult to use
492 string functions like strlen/strdup etc.
493 Insert a temporary gap (.) to facilitate string operations */
494 char cHead = q.seq[i][0];
495 q.seq[i][0] = '.';
496 prHMM->seq[i] = strdup(q.seq[i]);
497 q.seq[i][0] = prHMM->seq[i][0] = cHead;
498 }
499 prHMM->ncons = q.ncons;
500 prHMM->nfirst = q.nfirst;
501 prHMM->nss_dssp = q.nss_dssp;
502 prHMM->nsa_dssp = q.nsa_dssp;
503 prHMM->nss_pred = q.nss_pred;
504 prHMM->nss_conf = q.nss_conf;
505 prHMM->L = q.L;
506 prHMM->N_in = q.N_in;
507 prHMM->N_filtered = q.N_filtered;
508 const int NEFF_SCORE = 10; /* FIXME: Magic Number */
509 prHMM->Neff_M = (float *)calloc(prHMM->L+1, sizeof(float));
510 prHMM->Neff_I = (float *)calloc(prHMM->L+1, sizeof(float));
511 prHMM->Neff_D = (float *)calloc(prHMM->L+1, sizeof(float));
512 /*for (int i = 0; i < prHMM->L+1; i++){
513 prHMM->Neff_M[i] = prHMM->Neff_I[i] = prHMM->Neff_D[i] = NEFF_SCORE;
514 }*/
515 prHMM->Neff_HMM = q.Neff_HMM;
516 /* skip longname,name,file,fam,sfam,fold,cl */
517 prHMM->lamda = q.lamda;
518 prHMM->mu = q.mu;
519
520 HMMshadow rShadow = {0}; /* friend of HMM to access private members */
521 rShadow.copyHMMtoShadow(q);
522
523 prHMM->f = (float **)calloc(prHMM->L+1, sizeof(float *));
524 prHMM->g = (float **)calloc(prHMM->L+1, sizeof(float *));
525 prHMM->p = (float **)calloc(prHMM->L+1, sizeof(float *));
526 prHMM->tr = (float **)calloc(prHMM->L+1, sizeof(float *));
527 prHMM->linTr = (float **)calloc(prHMM->L+1, sizeof(float *));
528 for (int i = 0; i < prHMM->L+1; i++){
529 prHMM->f[i] = (float *)calloc(AMINOACIDS, sizeof(float));
530 prHMM->g[i] = (float *)calloc(AMINOACIDS, sizeof(float));
531 prHMM->p[i] = (float *)calloc(AMINOACIDS, sizeof(float));
532 for (int j = 0; j < AMINOACIDS; j++){
533 prHMM->f[i][j] = (float)rShadow.f[i][j];
534 prHMM->g[i][j] = (float)rShadow.g[i][j];
535 prHMM->p[i][j] = (float)rShadow.p[i][j];
536 }
537 prHMM->tr[i] = (float *)calloc(STATE_TRANSITIONS, sizeof(float));
538 prHMM->linTr[i] = (float *)calloc(STATE_TRANSITIONS, sizeof(float));
539 for (int j = 0; j< STATE_TRANSITIONS; j++){
540 prHMM->tr[i][j] = (float)rShadow.tr[i][j];
541 prHMM->linTr[i][j] = fpow2(rShadow.tr[i][j]);
542 }
543 } /*0 <= i < prHMM->L+1 */
544 /* skip trans_lin,ss_dssp,sa_dssp,ss_pred,ss_conf,Xcons */
545 for (int j = 0; j < AMINOACIDS; j++){
546 prHMM->pav[j] = (float)rShadow.pav[j];
547 }
548 /* skip pnul */
549
550 } /* have read HHM from file */
551 /*else if ( ((NULL != ppcProf) && (iCnt > 0) && ('\0' != ppcProf[0][0])) ||
552 ( (0 != strcmp(infile,"") && (0 == iCnt) )) )*/
553 else if ( (INTERN_ALN_2_HMM == iRnPtype) || (READ_ALN_2_HMM == iRnPtype) ) {
554
555 if ( (INTERN_ALN_2_HMM == iRnPtype) &&
556 ( (NULL == ppcProf) || (iCnt <= 0) || ('\0' == ppcProf[0][0]) ) ){
557 printf("%s:%s:%d: was expecting internal alignment but\n"
558 "\tppcProf=%p, #seq=%d, ppcProf[0][0]=%c\n"
559 , __FUNCTION__, __FILE__, __LINE__
560 , ppcProf, iCnt, ppcProf[0][0]);
561 return FAILURE;
562 }
563 else if ( (READ_ALN_2_HMM == iRnPtype) &&
564 (0 == strcmp(infile,"")) ){
565 printf("%s:%s:%d: was expecting to read alignment from file but no filename\n"
566 , __FUNCTION__, __FILE__, __LINE__);
567 return FAILURE;
568 }
569
570 /*******************************/
571 /*** (ii) it is an alignment ***/
572 /*******************************/
573 /* transfer alignment information from clustal character array
574 into pali/q/t classes */
575 /*
576 * NOTE that emissions in HMMer file format contain pseudo-counts.
577 * HHM file format does not contain emission pseudo-counts.
578 * the structure that stores background HMM information does contain pseudo-counts
579 */
580 Alignment* pali;
581 if (qali==NULL){
582 pali=new Alignment(iCnt); /* FIXME: pass in iCnt to get rid of MAXSEQ */
583 }
584 else{
585 pali=qali;
586 }
587
588 if (par.calibrate) {
589 printf("\nError in %s: only HHM files can be calibrated.\n",program_name);
590 printf("Build an HHM file from your alignment with 'hhmake -i %s' and rerun hhsearch with the hhm file\n\n",infile);
591 exit(1);
592 }
593
594 if (v>=2 && strcmp(infile,"stdin")) cout<<infile<<" is in A2M, A3M or FASTA format\n";
595
596 /* Read alignment from infile into matrix X[k][l] as ASCII
597 (and supply first line as extra argument) */
598 //if (iCnt > 0)
599 if (INTERN_ALN_2_HMM == iRnPtype){
600 pali->Transfer(ppcProf, iCnt);
601 }
602 //else if (0 == iCnt)
603 else if (READ_ALN_2_HMM == iRnPtype){
604 pali->Read(inf, infile, line);
605 }
606 else {
607 printf("%s:%s:%d: FATAL problem\n"
608 "infile = (%s), #seq = %d\n"
609 , __FUNCTION__, __FILE__, __LINE__
610 , infile, iCnt);
611 return FAILURE;
612 }
613
614 /* Convert ASCII to int (0-20), throw out all insert states,
615 record their number in I[k][i]
616 and store marked sequences in name[k] and seq[k] */
617 pali->Compress(infile);
618
619 /* Sort out the nseqdis most dissimilar sequences for display
620 in the output alignments */
621 pali->FilterForDisplay(par.max_seqid, par.coverage, par.qid,
622 par.qsc,par.nseqdis);
623
624 // Filter alignment for min score per column with core query profile, defined by coverage_core and qsc_core
625 //if (par.coresc>-10) pali->HomologyFilter(par.coverage_core, par.qsc_core, par.coresc);
626
627 /* Remove sequences with seq. identity larger than seqid percent
628 (remove the shorter of two) */
629 pali->N_filtered = pali->Filter(par.max_seqid, par.coverage,
630 par.qid, par.qsc, par.Ndiff);
631
632 /* Calculate pos-specific weights,
633 AA frequencies and transitions -> f[i][a], tr[i][a] */
634 pali->FrequenciesAndTransitions(q);
635 if (v>=2 && q.Neff_HMM>11.0)
636 fprintf(stderr,"WARNING: alignment %s looks too diverse (Neff=%.1f>11). Better check it with an alignment viewer... \n",q.name,q.Neff_HMM);
637
638 /*printf("%d %d %f %d (N,Nf,Neff,L) %s:%s:%d\n"
639 , q.N_in, q.N_filtered, q.Neff_HMM, q.L, __FUNCTION__, __FILE__, __LINE__);*/
640
641 // Add transition pseudocounts to query -> p[i][a]
642 q.AddTransitionPseudocounts();
643
644 /* Generate an amino acid frequency matrix from f[i][a]
645 with full pseudocount admixture (tau=1) -> g[i][a] */
646 q.PreparePseudocounts();
647
648 /* Add amino acid pseudocounts to query:
649 p[i][a] = (1-tau)*f[i][a] + tau*g[i][a] */
650 q.AddAminoAcidPseudocounts();
651
652 /* ****** add aligned background pseudocounts ***** */
653 HMMshadow rShadowQ = {0};
654 rShadowQ.copyHMMtoShadow(q);
655
656 AddBackgroundFrequencies(rShadowQ.f, rShadowQ.p, rShadowQ.tr,
657 q.L, prHMM,
658 q.seq, pcPrealigned, iCnt, pcRepresent);
659
660
661 if (qali==NULL){
662 delete(pali); pali = NULL;
663 }
664
665 } /* else if (NULL != ppcProf) // not hmmer/hhalign but alignment */
666
667 //else if ((prHMM->L > 0) && ('\0' == ppcProf[0][0]))
668 else if (INTERN_HMM_2_HMM == iRnPtype){
669
670 /******************************************/
671 /*** (iii) re-cycle old HMM information ***/
672 /******************************************/
673
674 if (prHMM->L <= 0){
675 printf("%s:%s:%d: was expecting to copy HMM structure but L=%d\n"
676 , __FUNCTION__, __FILE__, __LINE__, prHMM->L);
677 }
678
679 printf("%s:%s:%d: RE-CYCLING HMM\n", __FUNCTION__, __FILE__, __LINE__);
680
681 #if 0
682 q.n_display = prHMM->n_display;
683 /* ignore sname*/
684 for (int i = 0; i < q.n_display; i++){
685 /* NOTE: In the original hhalign code the first position
686 is kept open ('\0'). This makes it difficult to use
687 string functions like strlen/strdup etc.
688 Insert a temporary gap (.) to facilitate string operations */
689 char cHead = prHMM->seq[i][0];
690 prHMM->seq[i][0] = '.';
691 q.seq[i] = strdup(prHMM->seq[i]);
692 q.seq[i][0] = prHMM->seq[i][0] = cHead;
693 }
694 q.nfirst = prHMM->nfirst;
695 #else
696 q.n_display = 1;
697 q.nfirst = 0;
698 char cHead = prHMM->seq[prHMM->nfirst][0];
699 prHMM->seq[prHMM->nfirst][0] = '.';
700 q.seq[0] = strdup(prHMM->seq[prHMM->nfirst]);
701 q.seq[q.n_display-1][0] = prHMM->seq[prHMM->nfirst][0] = cHead;
702 #endif
703 q.ncons = prHMM->ncons;
704 q.nss_dssp = prHMM->nss_dssp;
705 q.nsa_dssp = prHMM->nsa_dssp;
706 q.nss_pred = prHMM->nss_pred;
707 q.nss_conf = prHMM->nss_conf;
708 q.L = prHMM->L;
709 q.N_in = prHMM->N_in;
710 q.N_filtered = prHMM->N_filtered;
711 #define NEFF_SCORE 10 /* FIXME: Magic Number */
712 /*for (int i; i < prHMM->L+1; i++){
713 q.Neff_M[i] = q.Neff_I[i] = q.Neff_D[i] = NEFF_SCORE;
714 }*/
715 q.Neff_HMM = prHMM->Neff_HMM;
716 /* skip longname,name,file,fam,sfam,fold,cl */
717 q.lamda = prHMM->lamda;
718 q.mu = prHMM->mu;
719
720 HMMshadow rShadow = {0}; /* friend of HMM to access private members */
721 rShadow.copyShadowToHMM(q, *prHMM);
722
723 }
724
725 if (iCnt > 0){
726 if (par.forward>=1) q.Log2LinTransitionProbs(1.0);
727 }
728
729 if (NULL != inf){
730 fclose(inf);
731 }
732
733 return OK;
734
735 } /*** end: ReadAndPrepare() ***/
736
737 /**
738 * FreeHMMstruct()
739 *
740 * @brief FIXME
741 *
742 * @param[in,out]
743 */
744 extern "C" void
745 FreeHMMstruct(hmm_light *prHMM)
746 {
747 int i;
748
749 if (NULL != prHMM->f){
750 for (i = 0; i < prHMM->L+1; i++){
751 if (NULL != prHMM->f[i]){
752 free(prHMM->f[i]); prHMM->f[i] = NULL;
753 }
754 } /* 0 <= i < prHMM->L+1 */
755 free(prHMM->f); prHMM->f = NULL;
756 }
757 if (NULL != prHMM->g){
758 for (i = 0; i < prHMM->L+1; i++){
759 if (NULL != prHMM->g[i]){
760 free(prHMM->g[i]); prHMM->g[i] = NULL;
761 }
762 } /* 0 <= i < prHMM->L+1 */
763 free(prHMM->g); prHMM->g = NULL;
764 }
765 if (NULL != prHMM->p){
766 for (i = 0; i < prHMM->L+1; i++){
767 if (NULL != prHMM->p[i]){
768 free(prHMM->p[i]); prHMM->p[i] = NULL;
769 }
770 } /* 0 <= i < prHMM->L+1 */
771 free(prHMM->p); prHMM->p = NULL;
772 }
773 if (NULL != prHMM->tr){
774 for (i = 0; i < prHMM->L+1; i++){
775 if (NULL != prHMM->tr[i]){
776 free(prHMM->tr[i]); prHMM->tr[i] = NULL;
777 }
778 } /* 0 <= i < prHMM->L+1 */
779 free(prHMM->tr); prHMM->tr = NULL;
780 }
781 if (NULL != prHMM->linTr){
782 for (i = 0; i < prHMM->L+1; i++){
783 if (NULL != prHMM->linTr[i]){
784 free(prHMM->linTr[i]); prHMM->linTr[i] = NULL;
785 }
786 } /* 0 <= i < prHMM->L+1 */
787 free(prHMM->linTr); prHMM->linTr = NULL;
788 }
789
790 if (NULL != prHMM->Neff_M){
791 free(prHMM->Neff_M); prHMM->Neff_M = NULL;
792 }
793 if (NULL != prHMM->Neff_I){
794 free(prHMM->Neff_I); prHMM->Neff_I = NULL;
795 }
796 if (NULL != prHMM->Neff_D){
797 free(prHMM->Neff_D); prHMM->Neff_D = NULL;
798 }
799
800 if (NULL != prHMM->seq){
801 for (i = 0; i < prHMM->n_display; i++){
802 if (NULL != prHMM->seq[i]){
803 free(prHMM->seq[i]); prHMM->seq[i] = NULL;
804 }
805 }
806 free(prHMM->seq); prHMM->seq = NULL;
807 }
808
809 memset(prHMM, 0, sizeof(hmm_light));
810
811 } /*** end: FreeHMMstruct() ***/
812
813
814 /**
815 * @brief comparisin function for ascending qsort() of doubles
816 */
817 int
818 CompDblAsc(const void *pv1, const void *pv2){
819
820 double d1 = *(double *)pv1;
821 double d2 = *(double *)pv2;
822
823 if (d1 > d2) { return +1; }
824 else if (d1 < d2) { return -1; }
825 else { return 0; }
826
827 } /*** end: CompDblAsc() ***/
828
829
830 /**
831 * AlnToHMM2()
832 *
833 * @brief convert alignment into HMM (hhmake)
834 *
835 * @param[out] prHMM
836 * structure with pseudocounts etc
837 * @param[in] pcHMM_input
838 * name of file with HMM
839 *
840 */
841 extern "C" int
842 AlnToHMM2(hmm_light *rHMM_p,
843 char **ppcSeq, int iN){
844
845 if (0 == par.M){
846 SetDefaults();
847 SetSubstitutionMatrix();
848 par.cons = 1;
849 par.M = 2;
850 par.forward=2;
851 par.Mgaps=100;
852 const int ciGoodMeasureSeq = 10;
853 int iAuxLen = strlen(ppcSeq[0]);
854 par.maxResLen = iAuxLen;
855 par.maxResLen += ciGoodMeasureSeq;
856 par.maxColCnt = iAuxLen + ciGoodMeasureSeq;
857 par.max_seqid=DEFAULT_FILTER;
858 par.loc=0; par.mact=0;
859 /* some minor parameters, affecting alignment (i think) */
860 par.p = 0.0; /* minimum threshold for inclusion in hit list */
861 par.Z = 100; /* minimum threshold for inclusion in hit list and alignment listing */
862 par.z = 1; /* min number of lines in hit list */
863 par.B = 100; /* max number of alignments */
864 par.b = 1; /* min number of alignments */
865 par.E = 1e6; /* maximum threshold for inclusion in hit list and alignment listing */
866 par.altali=1;par.hitrank=0;par.showcons=1; par.showdssp=1;par.showpred=1;par.nseqdis=iN;par.cons=1;
867 }
868
869 const int ciGoodMeasure = 10;
870 int iLen = strlen(ppcSeq[0]) + ciGoodMeasure;
871 if (iLen > par.maxResLen){
872 par.maxResLen = par.maxColCnt = iLen;
873 }
874 HMM rTemp(iN+2, iLen); /* temporary template */
875 Alignment rTempAli(iN+2, iLen); /* temporary alignment */
876 int iParCons = par.cons;
877
878 /*printf(">>>>>>>>>>> %s:%s:%d: there are %d sequences\n", __FUNCTION__, __FILE__, __LINE__, iN);*/
879
880 par.cons = 1;
881 if (OK != ReadAndPrepare(INTERN_ALN_2_HMM,
882 ppcSeq, iN, rHMM_p,
883 NULL/*prealigned*/, NULL/*representative*/, NULL/*weights*/, //YES/*transfer*/,
884 (char*)("")/*in-file*/, rTemp, &rTempAli)) {
885 return FAILURE;
886 }
887 par.cons = iParCons;
888
889 /******/
890 /*** transfer class info to struct */
891 rHMM_p->n_display = rTemp.n_display;
892 rHMM_p->sname = NULL;
893 rHMM_p->seq = (char **)calloc((rTemp.n_display+1), sizeof(char *));
894
895 for (int i = 0; i < rTemp.n_display; i++){
896 /* NOTE: In the original hhalign code the first position
897 is kept open ('\0'). This makes it difficult to use
898 string functions like strlen/strdup etc.
899 Insert a temporary gap (.) to facilitate string operations */
900 char cHead = rTemp.seq[i][0];
901 rTemp.seq[i][0] = '.';
902 rHMM_p->seq[i] = strdup(rTemp.seq[i]);
903 rTemp.seq[i][0] = rHMM_p->seq[i][0] = cHead;
904 }
905 rHMM_p->ncons = rTemp.ncons;
906 rHMM_p->nfirst = rTemp.nfirst;
907 if (-1 == rHMM_p->ncons){
908 /* ncons needed later for alignment of
909 representative sequence and copy of profile.
910 ncons not always set (-1 default),
911 this will cause segmentation fault.
912 nfirst (probably) right index -
913 no problem if not */
914 rHMM_p->ncons = rTemp.nfirst;
915 }
916 rHMM_p->nss_dssp = rTemp.nss_dssp;
917 rHMM_p->nsa_dssp = rTemp.nsa_dssp;
918 rHMM_p->nss_pred = rTemp.nss_pred;
919 rHMM_p->nss_conf = rTemp.nss_conf;
920 rHMM_p->L = rTemp.L;
921 rHMM_p->N_in = rTemp.N_in;
922 rHMM_p->N_filtered = rTemp.N_filtered;
923 #define NEFF_SCORE 10 /* FIXME: Magic Number */ //// get rid of that, FS, 2010-12-22
924 rHMM_p->Neff_M = (float *)calloc(rHMM_p->L+1, sizeof(float));
925 rHMM_p->Neff_I = (float *)calloc(rHMM_p->L+1, sizeof(float));
926 rHMM_p->Neff_D = (float *)calloc(rHMM_p->L+1, sizeof(float));
927 for (int i = 0; i < rHMM_p->L+1; i++){
928 rHMM_p->Neff_M[i] = rHMM_p->Neff_I[i] = rHMM_p->Neff_D[i] = NEFF_SCORE; //// get rid of that, FS, 2010-12-22
929 }
930 rHMM_p->Neff_HMM = rTemp.Neff_HMM;
931 /* skip longname,name,file,fam,sfam,fold,cl */
932 rHMM_p->lamda = rTemp.lamda;
933 rHMM_p->mu = rTemp.mu;
934
935 HMMshadow rShadow = {0}; /* friend of HMM to access private members */
936 rShadow.copyHMMtoShadow(rTemp);
937
938 rHMM_p->Neff_M = (float *)calloc(rHMM_p->L+1, sizeof(float));
939 rHMM_p->Neff_I = (float *)calloc(rHMM_p->L+1, sizeof(float));
940 rHMM_p->Neff_D = (float *)calloc(rHMM_p->L+1, sizeof(float));
941 rHMM_p->f = (float **)calloc(rHMM_p->L+1, sizeof(float *));
942 rHMM_p->g = (float **)calloc(rHMM_p->L+1, sizeof(float *));
943 rHMM_p->p = (float **)calloc(rHMM_p->L+1, sizeof(float *));
944 rHMM_p->tr = (float **)calloc(rHMM_p->L+1, sizeof(float *));
945 rHMM_p->linTr = (float **)calloc(rHMM_p->L+1, sizeof(float *));
946
947 for (int i = 0; i < rHMM_p->L+1; i++){
948 rHMM_p->Neff_M[i] = (float)rShadow.Neff_M[i];
949 rHMM_p->Neff_I[i] = (float)rShadow.Neff_I[i];
950 rHMM_p->Neff_D[i] = (float)rShadow.Neff_D[i];
951 rHMM_p->f[i] = (float *)calloc(AMINOACIDS, sizeof(float));
952 rHMM_p->g[i] = (float *)calloc(AMINOACIDS, sizeof(float));
953 rHMM_p->p[i] = (float *)calloc(AMINOACIDS, sizeof(float));
954 for (int j = 0; j < AMINOACIDS; j++){
955 rHMM_p->f[i][j] = (float)rShadow.f[i][j];
956 rHMM_p->g[i][j] = (float)rShadow.g[i][j];
957 rHMM_p->p[i][j] = (float)rShadow.p[i][j];
958 }
959 rHMM_p->tr[i] = (float *)calloc(STATE_TRANSITIONS, sizeof(float));
960 rHMM_p->linTr[i] = (float *)calloc(STATE_TRANSITIONS, sizeof(float));
961 for (int j = 0; j< STATE_TRANSITIONS; j++){
962 rHMM_p->tr[i][j] = (float)rShadow.tr[i][j];
963 rHMM_p->linTr[i][j] = fpow2(rShadow.tr[i][j]);
964 }
965 } /*0 <= i < prHMM->L+1 */
966 /* skip trans_lin,ss_dssp,sa_dssp,ss_pred,ss_conf,Xcons */
967 for (int j = 0; j < AMINOACIDS; j++){
968 rHMM_p->pav[j] = (float)rShadow.pav[j];
969 }
970 /* skip pnul */
971
972
973 /******/
974
975
976 rTemp.ClobberGlobal();
977 rTempAli.ClobberGlobal();
978
979 return OK;
980
981 } /*** end of AlnToHMM2() ***/
982
983
984 /**
985 * HHMake_Wrapper()
986 *
987 * @brief turn alignment (from file) into HHM (HMM) on file
988 *
989 * @param[out] hmm_out
990 * name of file with HMM info corresponding to tmp_aln
991 * @param[in] tmp_aln
992 * name of file with alignment, to be turned into HMM (HHM)
993 *
994 */
995 extern "C" int
996 HHMake_Wrapper(char *tmp_aln, char *hmm_out)
997 {
998
999 HMM rTemp; /* temporary template */
1000 Alignment rTempAli; /* temporary alignment */
1001 hmm_light *rHMM_p = NULL;
1002
1003 /** Note:
1004 this is a wrapper for a stand-alone program hhmake.
1005 hhmake uses a different set of parameters than hhalign.
1006 However, all parameters are GLOBAL.
1007 So at this point we register the hhalign settings,
1008 reset them to hhmake settings and set them back
1009 at the end of the function
1010 */
1011
1012 /* save settings from hhalign */
1013 int iParShowcons=par.showcons;
1014 int iParAppend=par.append;
1015 int iParNseqdis=par.nseqdis;
1016 int iParMark=par.mark;
1017 int iParMaxSeqid=par.max_seqid;
1018 int iParQid=par.qid;
1019 double dParQsc=par.qsc;
1020 int iParCoverage=par.coverage;
1021 int iParNdiff=par.Ndiff;
1022 int iParCoverageCore=par.coverage_core;
1023 double dParQscCore=par.qsc_core;
1024 double dParCoresc=par.coresc;
1025 int iParM=par.M;
1026 int iParMgaps=par.Mgaps;
1027 int iParMatrix=par.matrix;
1028 int iParPcm=par.pcm;
1029 int iParPcw=par.pcw;
1030 double dParGapb=par.gapb;
1031 int iParWg=par.wg;
1032
1033 /* these are settings suitable for hhmake */
1034 par.showcons=1; // write consensus sequence into hhm file
1035 par.append=0; // overwrite output file
1036 par.nseqdis=10; // maximum number of query or template sequences to be recoreded in HMM and diplayed in output alignments
1037 par.mark=0; // 1: only marked sequences (or first) get displayed; 0: most divergent ones get displayed
1038 par.max_seqid=90; // default for maximum sequence identity threshold
1039 par.qid=0; // default for maximum sequence identity threshold
1040 par.qsc=-20.0f; // default for minimum score per column with query
1041 par.coverage=0; // default for minimum coverage threshold
1042 par.Ndiff=100; // pick Ndiff most different sequences from alignment
1043 par.coverage_core=30; // Minimum coverage for sequences in core alignment
1044 par.qsc_core=0.3f; // Minimum score per column with core alignment (HMM)
1045 par.coresc=-20.0f; // Minimum score per column with core alignment (HMM)
1046 par.M=1; // match state assignment is by A2M/A3M
1047 par.Mgaps=50; // above this percentage of gaps, columns are assigned to insert states
1048 par.matrix=0; // Subst.matrix 0: Gonnet, 1: HSDM, 2: BLOSUM50 3: BLOSUM62
1049 par.pcm=0; // no amino acid and transition pseudocounts added
1050 par.pcw=0; // wc>0 weighs columns according to their intra-clomun similarity
1051 par.gapb=0.0; // default values for transition pseudocounts; 0.0: add no transition pseudocounts!
1052 par.wg=0; // 0: use local sequence weights 1: use local ones
1053
1054
1055 if (OK != ReadAndPrepare(READ_ALN_2_HMM,
1056 NULL, 0, rHMM_p, NULL, NULL, NULL,
1057 tmp_aln, rTemp, &rTempAli)) {
1058 return FAILURE;
1059 }
1060
1061 rTemp.WriteToFile(hmm_out);
1062
1063
1064
1065 /* restore settings from hhalign */
1066 par.showcons=iParShowcons;
1067 par.append=iParAppend;
1068 par.nseqdis=iParNseqdis;
1069 par.mark=iParMark;
1070 par.max_seqid=iParMaxSeqid;
1071 par.qid=iParQid;
1072 par.qsc=dParQsc;
1073 par.coverage=iParCoverage;
1074 par.Ndiff=iParNdiff;
1075 par.coverage_core=iParCoverageCore;
1076 par.qsc_core=dParQscCore;
1077 par.coresc=dParCoresc;
1078 par.M=iParM;
1079 par.Mgaps=iParMgaps;
1080 par.matrix=iParMatrix;
1081 par.pcm=iParPcm;
1082 par.pcw=iParPcw;
1083 par.gapb=dParGapb;
1084 par.wg=iParWg;
1085
1086
1087 /* prepare exit */
1088 rTemp.ClobberGlobal();
1089 rTempAli.ClobberGlobal();
1090
1091 return OK;
1092 }
1093
1094 /**
1095 * readHMMWrapper()
1096 *
1097 * @brief read HMM from file, copy (relevant) info into struct
1098 *
1099 * @param[out] prHMM
1100 * structure with pseudocounts etc, probably uninitialised on entry
1101 * @param[in] pcHMM_input
1102 * name of file with HMM
1103 *
1104 */
1105 extern "C" int
1106 readHMMWrapper(hmm_light *rHMM_p,
1107 char *pcHMM_input)
1108 {
1109 par.maxResLen = 15002;
1110 HMM rTemp(1000,par.maxResLen); /* temporary template */
1111 Alignment rTempAli; /* temporary alignment */
1112 /* AW changed init from {0} to 0 because it failed to compile with
1113 * gcc 4.3.3 with the following error:
1114 * error: braces around initializer for non-aggregate type
1115 */
1116 /* FS taken out initialiser alltogether */
1117
1118 /* 0th arg of RnP is the type of RnP, ie,
1119 enum {INTERN_ALN_2_HMM = 0, READ_ALN_2_HMM, READ_HMM_2_HMM};*/
1120 /* 1st arg of ReadAndPrepare() is ppcSeqs, 2nd is #seq */
1121 /* FIXME: at this stage the 3rd arg, rHMM_p, only acts as a dummy.
1122 this is rather silly, as it is the actual struct that will
1123 carry the HMM info at the end.
1124 If we write it already in ReadAndPrepare() then we could
1125 dispense with all this friend-class nonsense */
1126 if (OK != ReadAndPrepare(READ_HMM_2_HMM,
1127 NULL, 0, rHMM_p, NULL, NULL, NULL,
1128 pcHMM_input, rTemp, &rTempAli)) {
1129 return FAILURE;
1130 }
1131
1132 /* an important piece of information I want to get out of here
1133 is the consenssus sequence. there are however certain
1134 Pfam HMMs that don't trigger consensus calculation.
1135 at the moment I (FS) don't understand why this is
1136 (or rather why this is not). The proper place to do this
1137 should be inside ReadAndPrepare():ReadHMMer(), but
1138 I have not quite penetrated the logic there.
1139 Therefore I try to catch this condition at this point (here)
1140 and rectify it.
1141 */
1142 if (-1 == rHMM_p->ncons){
1143 int i, iA;
1144 rHMM_p->ncons = rHMM_p->nfirst;
1145
1146 for (i = 0; i < rHMM_p->L; i++){
1147 double dPmax = 0.00;
1148 int iAmax = -1;
1149 for (iA = 0; iA < AMINOACIDS; iA++){
1150 if (rHMM_p->f[i][iA] > dPmax){
1151 iAmax = iA;
1152 dPmax = rHMM_p->f[i][iA];
1153 }
1154 } /* (0 <= iA < AMINOACIDS) */
1155 rHMM_p->seq[rHMM_p->ncons][i] = i2aa(iAmax);
1156 } /* (0 <= i < rHMM_p->L) */
1157
1158 } /* ncons not set */
1159
1160
1161 rTemp.ClobberGlobal();
1162 rTempAli.ClobberGlobal();
1163
1164 return OK;
1165
1166 } /*** end: readHMMWrapper() ***/
1167
1168
1169
1170
1171
1172
1173 /////////////////////////////////////////////////////////////////////////////
1174 /**
1175 * @brief Do precalculations for q and t to prepare comparison
1176 */
1177 void
1178 PrepareTemplate(HMM& q, HMM& t, int format)
1179 {
1180 if (format==0) // HHM format
1181 {
1182 // Add transition pseudocounts to template
1183 t.AddTransitionPseudocounts();
1184
1185 // Generate an amino acid frequency matrix from t.f[i][a] with max pseudocounts (tau=1) -> t.g[i][a]
1186 t.PreparePseudocounts();
1187
1188 // Add amino acid pseudocounts to template: t.p[i][a] = (1-tau)*f[i][a] + tau*g[i][a]
1189 t.AddAminoAcidPseudocounts();
1190 }
1191 else // HHMER format
1192 {
1193 // Don't add transition pseudocounts to template
1194 // t.AddTransitionPseudocounts(par.gapd, par.gape, par.gapf, par.gapg, par.gaph, par.gapi, 0.0);
1195
1196 // Generate an amino acid frequency matrix from f[i][a] with full pseudocount admixture (tau=1) -> g[i][a]
1197 t.PreparePseudocounts();
1198
1199 // DON'T ADD amino acid pseudocounts to temlate: pcm=0! t.p[i][a] = t.f[i][a]
1200 t.AddAminoAcidPseudocounts(0, par.pca, par.pcb, par.pcc);
1201 }
1202
1203 // Modify transition probabilities to include SS-dependent penalties
1204 if (par.ssgap) t.UseSecStrucDependentGapPenalties();
1205
1206 if (par.forward>=1) t.Log2LinTransitionProbs(1.0);
1207
1208 // Factor Null model into HMM t
1209 // ATTENTION! t.p[i][a] is divided by pnul[a] (for reasons of efficiency) => do not reuse t.p
1210 t.IncludeNullModelInHMM(q,t); // Can go BEFORE the loop if not dependent on template
1211
1212 return;
1213 }
1214
1215
1216 /*** end of hhfunc-C.h ***/