annotate clustalomega/clustal-omega-1.0.2/src/clustal-omega.c @ 1:bc707542e5de

Uploaded
author clustalomega
date Thu, 21 Jul 2011 13:35:08 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1 /* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
3 /*********************************************************************
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
4 * Clustal Omega - Multiple sequence alignment
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
5 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
6 * Copyright (C) 2010 University College Dublin
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
7 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
8 * Clustal-Omega is free software; you can redistribute it and/or
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
9 * modify it under the terms of the GNU General Public License as
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
10 * published by the Free Software Foundation; either version 2 of the
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
11 * License, or (at your option) any later version.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
12 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
13 * This file is part of Clustal-Omega.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
14 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
15 ********************************************************************/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
16
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
17 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
18 * RCS $Id: clustal-omega.c 254 2011-06-21 13:07:50Z andreas $
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
19 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
20
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
21 #ifdef HAVE_CONFIG_H
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
22 #include "config.h"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
23 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
24
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
25 #include <assert.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
26
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
27 #include "clustal-omega.h"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
28 #include "hhalign/general.h"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
29
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
30 /* The following comment block contains the frontpage/mainpage of the doxygen
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
31 * documentation. Please add some more info. FIXME add more
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
32 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
33
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
34 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
35 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
36 * @mainpage Clustal-Omega Documentation
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
37 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
38 * @section intro_sec Introduction
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
39 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
40 * For more information see http://www.clustal.org/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
41 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
42 * @section api_section API
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
43 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
44 * @subsection example_prog_subsection An Example Program
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
45 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
46 * To use libclustalo you will have to include the clustal-omega.h header and
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
47 * link against libclustalo. For linking against libclustalo you will have to
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
48 * use a C++ compiler, no matter if your program was written in C or C++.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
49 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
50 * First compile (no linking) your source (for an example see section "\ref
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
51 * example_src_subsubsec"):
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
52 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
53 * @code
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
54 * $ gcc -c -ansi -Wall clustalo-api-test.c
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
55 * @endcode
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
56 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
57 * Then link against libclustalo (we recommend the use of pkg-config as
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
58 * explained in \ref pkgconfig_subsubsec). Assuming Clustal Omega was installed
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
59 * in system-wide default directory (e.g. /usr) just type:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
60 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
61 * @code
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
62 * $ g++ -ansi -Wall -o clustalo-api-test clustalo-api-test.o -lclustalo
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
63 * @endcode
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
64 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
65 * Voila! Now you have your own alignment program which can be run with
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
66 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
67 * @code
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
68 * $ ./clustalo-api-test <your-sequence-input>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
69 * @endcode
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
70 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
71 * It's best to use the same compiler that you used for compiling libclustal.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
72 * If libclustal was compiled with OpenMP support, you will have to use OpenMP
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
73 * flags for you program as well.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
74 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
75 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
76 * @subsubsection pkgconfig_subsubsec Using pkg-config / Figuring out compiler flags
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
77 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
78 * Clustal Omega comes with support for <a
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
79 * href="http://pkg-config.freedesktop.org">pkg-config</a>, which means you
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
80 * can run
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
81 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
82 * @code
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
83 * $ pkg-config --cflags --libs clustalo
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
84 * @endcode
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
85 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
86 * to figure out cflags and library flags needed to compile and link against
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
87 * libclustalo. This is especially handy if Clustal Omega was installed to a
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
88 * non-standard directory.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
89 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
90 * You might have to change PKG_CONFIG_PATH. For example, if you used the prefix $HOME/local/ for
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
91 * installation then you will first need to set PKG_CONFIG_PATH:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
92 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
93 * @code
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
94 * $ export PKG_CONFIG_PATH=$HOME/local/lib/pkgconfig
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
95 * $ pkg-config --cflags --libs clustalo
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
96 * @endcode
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
97 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
98 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
99 * To compile your source use:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
100 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
101 * @code
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
102 * $ export PKG_CONFIG_PATH=$HOME/local/lib/pkgconfig
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
103 * $ gcc -c -ansi -Wall clustalo-api-test.c $(pkg-config --cflags clustalo)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
104 * $ g++ -ansi -Wall -o clustalo-api-test clustalo-api-test.o $(pkg-config --libs clustalo)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
105 * @endcode
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
106 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
107 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
108 * @subsubsection example_src_subsubsec Example Source Code
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
109 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
110 * @include "clustalo-api-test.c"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
111 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
112 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
113 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
114
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
115
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
116
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
117
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
118 /* FIXME: doc */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
119 /* the following are temporary flags while the code is still under construction;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
120 had problems internalising hhmake, so as temporary crutch
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
121 write alignment to file and get external hmmer/hhmake via system call
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
122 to read alignment and convert into HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
123 All this will go, once hhmake is properly internalised */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
124 #define INDIRECT_HMM 0 /* temp flag: (1) write aln to file, use system(hmmer/hhmake), (0) internal hhmake */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
125 #define USEHMMER 1 /* temp flag: use system(hmmer) to build HMM */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
126 #define USEHHMAKE (!USEHMMER) /* temp flag: use system(hhmake) to build HMM */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
127
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
128
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
129 /* shuffle order of input sequences */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
130 #define SHUFFLE_INPUT_SEQ_ORDER 0
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
131
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
132 /* sort input sequences by length */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
133 #define SORT_INPUT_SEQS 0
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
134
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
135
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
136 int iNumberOfThreads;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
137
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
138 /* broken, unused and lonely */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
139 static const int ITERATION_SCORE_IMPROVEMENT_THRESHOLD = 0.01;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
140
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
141
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
142 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
143 * @brief Print Long version information to pre-allocated char.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
144 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
145 * @note short version
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
146 * information is equivalent to PACKAGE_VERSION
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
147 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
148 * @param[out] pcStr
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
149 * char pointer to write to preallocated to hold iSize chars.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
150 * @param[in] iSize
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
151 * size of pcStr
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
152 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
153 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
154 PrintLongVersion(char *pcStr, int iSize)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
155 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
156 snprintf(pcStr, iSize, "version %s; code-name '%s'; build date %s",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
157 PACKAGE_VERSION, PACKAGE_CODENAME, __DATE__);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
158 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
159 /* end of PrintLongVersion() */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
160
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
161
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
162
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
163 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
164 * @brief free aln opts members
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
165 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
166 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
167 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
168 FreeAlnOpts(opts_t *prAlnOpts) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
169 if (NULL != prAlnOpts->pcGuidetreeInfile) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
170 CKFREE(prAlnOpts->pcGuidetreeInfile);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
171 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
172 if (NULL != prAlnOpts->pcGuidetreeOutfile) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
173 CKFREE(prAlnOpts->pcGuidetreeOutfile);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
174 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
175 if (NULL != prAlnOpts->pcDistmatOutfile) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
176 CKFREE(prAlnOpts->pcDistmatOutfile);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
177 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
178 if (NULL != prAlnOpts->pcDistmatInfile) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
179 CKFREE(prAlnOpts->pcDistmatInfile);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
180 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
181 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
182 /* end of FreeAlnOpts() */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
183
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
184
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
185
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
186 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
187 * @brief Sets members of given user opts struct to default values
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
188 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
189 * @param[out] prOpts
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
190 * User opt struct to initialise
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
191 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
192 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
193 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
194 SetDefaultAlnOpts(opts_t *prOpts) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
195 prOpts->bAutoOptions = FALSE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
196
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
197 prOpts->pcDistmatInfile = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
198 prOpts->pcDistmatOutfile = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
199
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
200 prOpts->iClusteringType = CLUSTERING_UPGMA;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
201 prOpts->iPairDistType = PAIRDIST_KTUPLE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
202 prOpts->bUseMbed = TRUE; /* FS, r250 -> */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
203 prOpts->bUseMbedForIteration = TRUE; /* FS, r250 -> */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
204 prOpts->pcGuidetreeOutfile = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
205 prOpts->pcGuidetreeInfile = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
206
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
207 prOpts->ppcHMMInput = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
208 prOpts->iHMMInputFiles = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
209
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
210 prOpts->iNumIterations = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
211 prOpts->bIterationsAuto = FALSE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
212 prOpts->iMaxGuidetreeIterations = INT_MAX;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
213 prOpts->iMaxHMMIterations = INT_MAX;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
214 prOpts->iMacRam = 2048; /* give 2GB to MAC algorithm. FS, r240 -> r241 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
215 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
216 /* end of SetDefaultAlnOpts() */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
217
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
218
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
219
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
220 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
221 * @brief Check logic of parsed user options. Will exit (call Log(&rLog, LOG_FATAL, ))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
222 * on Fatal logic error
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
223 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
224 * @param[in] prOpts
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
225 * Already parsed user options
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
226 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
227 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
228 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
229 AlnOptsLogicCheck(opts_t *prOpts)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
230 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
231 /* guide-tree & distmat
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
232 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
233 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
234 if (prOpts->pcDistmatInfile && prOpts->pcGuidetreeInfile) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
235 Log(&rLog, LOG_FATAL, "Read distances *and* guide-tree from file doesn't make sense.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
236 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
237
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
238 if (prOpts->pcDistmatOutfile && prOpts->pcGuidetreeInfile) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
239 Log(&rLog, LOG_FATAL, "Won't be able to save distances to file, because I got a guide-tree as input.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
240 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
241
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
242 /* combination of options that don't make sense when not iterating
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
243 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
244 if (prOpts->iNumIterations==0 && prOpts->bIterationsAuto != TRUE) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
245
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
246 if (prOpts->pcGuidetreeInfile && prOpts->pcGuidetreeOutfile) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
247 Log(&rLog, LOG_FATAL, "Got a guide-tree as input and output which doesn't make sense when not iterating.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
248 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
249 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
250 if (prOpts->pcGuidetreeInfile && prOpts->bUseMbed > 0) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
251 Log(&rLog, LOG_FATAL, "Got a guide-tree as input and was requested to cluster with mBed, which doesn't make sense when not iterating.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
252 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
253 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
254 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
255 AW: bUseMbedForIteration default since at least R252
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
256 if (prOpts->bUseMbedForIteration > 0) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
257 Log(&rLog, LOG_FATAL, "No iteration requested, but mbed for iteration was set. Paranoia exit.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
258 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
259 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
260 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
261
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
262 if (prOpts->iMacRam < 512) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
263
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
264 Log(&rLog, LOG_INFO, "Memory for MAC Algorithm quite low, Viterbi Algorithm may be triggered.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
265
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
266 if (prOpts->iMacRam < 1) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
267
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
268 Log(&rLog, LOG_WARN, "Viterbi Algorithm always turned on, increase MAC-RAM to turn on MAC.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
269 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
270 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
271
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
272 return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
273 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
274 /* end of AlnOptsLogicCheck() */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
275
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
276
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
277 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
278 * @brief FIXME doc
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
279 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
280 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
281 PrintAlnOpts(FILE *prFile, opts_t *prOpts)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
282 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
283 int iAux;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
284
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
285
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
286 /* keep in same order as struct */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
287 fprintf(prFile, "option: auto-options = %d\n", prOpts->bAutoOptions);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
288 fprintf(prFile, "option: distmat-infile = %s\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
289 NULL != prOpts->pcDistmatInfile? prOpts->pcDistmatInfile: "(null)");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
290 fprintf(prFile, "option: distmat-outfile = %s\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
291 NULL != prOpts->pcDistmatOutfile? prOpts->pcDistmatOutfile: "(null)");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
292 fprintf(prFile, "option: clustering-type = %d\n", prOpts->iClusteringType);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
293 fprintf(prFile, "option: pair-dist-type = %d\n", prOpts->iPairDistType);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
294 fprintf(prFile, "option: use-mbed = %d\n", prOpts->bUseMbed);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
295 fprintf(prFile, "option: use-mbed-for-iteration = %d\n", prOpts->bUseMbedForIteration);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
296 fprintf(prFile, "option: guidetree-outfile = %s\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
297 NULL != prOpts->pcGuidetreeOutfile? prOpts->pcGuidetreeOutfile: "(null)");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
298 fprintf(prFile, "option: guidetree-infile = %s\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
299 NULL != prOpts->pcGuidetreeInfile? prOpts->pcGuidetreeInfile: "(null)");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
300 for (iAux=0; iAux<prOpts->iHMMInputFiles; iAux++) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
301 fprintf(prFile, "option: hmm-input no %d = %s\n", iAux, prOpts->ppcHMMInput[iAux]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
302 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
303 fprintf(prFile, "option: hmm-input-files = %d\n", prOpts->iHMMInputFiles);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
304 fprintf(prFile, "option: num-iterations = %d\n", prOpts->iNumIterations);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
305 fprintf(prFile, "option: iterations-auto = %d\n", prOpts->bIterationsAuto);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
306 fprintf(prFile, "option: max-hmm-iterations = %d\n", prOpts->iMaxHMMIterations);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
307 fprintf(prFile, "option: max-guidetree-iterations = %d\n", prOpts->iMaxGuidetreeIterations);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
308 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
309 /* end of PrintAlnOpts() */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
310
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
311
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
312
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
313 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
314 * @brief Returns major version of HMMER. Whichever hmmbuild version
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
315 * is found first in your PATH will be used
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
316 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
317 * @return -1 on error, major hmmer version otherwise
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
318 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
319 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
320 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
321 HmmerVersion()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
322 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
323 char zcHmmerTestCall[] = "hmmbuild -h";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
324 FILE *fp = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
325 int iMajorVersion = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
326 char zcLine[16384];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
327
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
328 if (NULL == (fp = popen(zcHmmerTestCall, "r"))) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
329 Log(&rLog, LOG_ERROR, "Couldn't exec %s", zcHmmerTestCall);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
330 return -1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
331 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
332 while (fgets(zcLine, sizeof(zcLine), fp)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
333 char *pcLocate;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
334 if ((pcLocate = strstr(zcLine, "HMMER "))) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
335 iMajorVersion = atoi(&pcLocate[6]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
336 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
337 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
338 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
339 pclose(fp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
340
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
341 return iMajorVersion;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
342 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
343 /* end of HmmerVersion() */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
344
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
345
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
346
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
347 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
348 * @brief Create a HHM file from aligned sequences
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
349 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
350 * @warning Should be eliminated in the future
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
351 * as building routine should not create intermediate files
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
352 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
353 * @param[in] prMSeq
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
354 * Aligned mseq_t
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
355 * @param[in] pcHMMOut
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
356 * HMM output file name
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
357 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
358 * @return Non-zero on error
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
359 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
360 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
361 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
362 AlnToHHMFile(mseq_t *prMSeq, char *pcHMMOut)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
363 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
364 char *tmp_aln = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
365 int retcode = OK;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
366
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
367 assert(NULL!=prMSeq);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
368 assert(NULL!=pcHMMOut);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
369
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
370 if (FALSE == prMSeq->aligned) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
371 Log(&rLog, LOG_ERROR, "Sequences need to be aligned to create an HMM");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
372 return FAILURE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
373 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
374
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
375 /* Convert alignment to a2m, and call hhmake
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
376 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
377 * can't be static templates, or mktemp fails (at least on os x
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
378 * (with a bus error))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
379 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
380 * gcc says we should use mkstemp to avoid race conditions,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
381 * but that returns a file descriptor, which is of no use to
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
382 * us
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
383 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
384 /* NOTE: the following won't work on windows: missing /tmp/ */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
385 tmp_aln = CkStrdup("/tmp/clustalo_tmpaln_XXXXXX");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
386 if (NULL == mktemp(tmp_aln)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
387 Log(&rLog, LOG_ERROR, "Could not create temporary alignment filename");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
388 retcode = FAILURE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
389 goto cleanup_and_return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
390 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
391 if (WriteAlignment(prMSeq, tmp_aln, MSAFILE_A2M)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
392 Log(&rLog, LOG_ERROR, "Could not save alignment to %s", tmp_aln);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
393 retcode = FAILURE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
394 goto cleanup_and_return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
395 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
396
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
397 if (HHMake_Wrapper(tmp_aln, pcHMMOut)){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
398 Log(&rLog, LOG_ERROR, "Could not convert alignment %s into HHM", tmp_aln);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
399 retcode = FAILURE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
400 goto cleanup_and_return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
401 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
402
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
403
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
404 cleanup_and_return:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
405
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
406 if (NULL != tmp_aln) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
407 if (FileExists(tmp_aln)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
408 if (remove(tmp_aln)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
409 Log(&rLog, LOG_WARN, "Removing %s failed. Continuing anyway", tmp_aln);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
410 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
411 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
412 CKFREE(tmp_aln);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
413 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
414
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
415 return retcode;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
416
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
417 } /* end of AlnToHHMFile() */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
418
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
419
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
420
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
421 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
422 * @brief Create a HMM file from aligned sequences
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
423 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
424 * @warning Should be replaced in the future by some internal HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
425 * building routine that does not call external programs
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
426 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
427 * @param[in] prMSeq
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
428 * Aligned mseq_t
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
429 * @param[in] pcHMMOut
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
430 * HMM output file name
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
431 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
432 * @return Non-zero on error
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
433 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
434
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
435 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
436 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
437 AlnToHMMFile(mseq_t *prMSeq, const char *pcHMMOut)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
438 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
439 char *tmp_aln = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
440 char *tmp_hmm = NULL; /* only needed for hmmer3 to hmmer2 conversion */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
441 char cmdbuf[16384];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
442 int iHmmerVersion = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
443 int retcode = OK;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
444
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
445 assert(NULL!=prMSeq);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
446 assert(NULL!=pcHMMOut);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
447
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
448 if (FALSE == prMSeq->aligned) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
449 Log(&rLog, LOG_ERROR, "Sequences need to be aligned to create an HMM");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
450 return FAILURE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
451 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
452
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
453 iHmmerVersion = HmmerVersion();
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
454 if (2 != iHmmerVersion && 3 != iHmmerVersion) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
455 Log(&rLog, LOG_ERROR, "Could not find suitable HMMER binaries");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
456 return FAILURE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
457 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
458
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
459 /* Convert alignment to stockholm, call hmmbuild and then
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
460 * either hmmconvert (hmmer3) or hmmcalibrate (hmmer2)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
461 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
462 * can't be static templates, or mktemp fails (at least on os x
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
463 * (with a bus error))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
464 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
465 * gcc says we should use mkstemp to avoid race conditions,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
466 * but that returns a file descriptor, which is of no use to
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
467 * us
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
468 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
469 /* NOTE: the following won't work on windows: missing /tmp/ */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
470 tmp_aln = CkStrdup("/tmp/clustalo_tmpaln_XXXXXX");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
471 if (NULL == mktemp(tmp_aln)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
472 Log(&rLog, LOG_ERROR, "Could not create temporary alignment filename");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
473 retcode = FAILURE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
474 goto cleanup_and_return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
475 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
476 if (WriteAlignment(prMSeq, tmp_aln, MSAFILE_STOCKHOLM)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
477 Log(&rLog, LOG_ERROR, "Could not save alignment to %s", tmp_aln);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
478 retcode = FAILURE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
479 goto cleanup_and_return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
480 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
481
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
482 if (2 == iHmmerVersion) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
483 sprintf(cmdbuf, "hmmbuild %s %s >/dev/null && hmmcalibrate %s >/dev/null",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
484 pcHMMOut, tmp_aln, pcHMMOut);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
485 if (system(cmdbuf)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
486 Log(&rLog, LOG_ERROR, "Command '%s' failed", cmdbuf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
487 retcode = FAILURE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
488 goto cleanup_and_return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
489 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
490 } else if (3 == iHmmerVersion) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
491 /* NOTE: the following won't work on windows: missing /tmp/ */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
492 tmp_hmm = CkStrdup("/tmp/clustalo_tmphmm2_XXXXXX");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
493 if (NULL == mktemp(tmp_hmm)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
494 Log(&rLog, LOG_ERROR, "Could not create temporary hmm filename");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
495 retcode = FAILURE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
496 goto cleanup_and_return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
497 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
498 sprintf(cmdbuf, "hmmbuild %s %s >/dev/null && hmmconvert -2 %s > %s",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
499 tmp_hmm, tmp_aln, tmp_hmm, pcHMMOut);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
500 if (system(cmdbuf)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
501 Log(&rLog, LOG_ERROR, "Command '%s' failed", cmdbuf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
502 retcode = FAILURE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
503 goto cleanup_and_return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
504 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
505 } else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
506 CKFREE(tmp_aln);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
507 Log(&rLog, LOG_FATAL, "Internal error: Unknown Hmmer version %d", iHmmerVersion);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
508 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
509
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
510
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
511 cleanup_and_return:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
512
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
513 if (NULL != tmp_aln) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
514 if (FileExists(tmp_aln)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
515 if (remove(tmp_aln)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
516 Log(&rLog, LOG_WARN, "Removing %s failed. Continuing anyway", tmp_aln);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
517 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
518 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
519 CKFREE(tmp_aln);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
520 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
521 if (NULL != tmp_hmm) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
522 if (FileExists(tmp_hmm)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
523 if (remove(tmp_hmm)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
524 Log(&rLog, LOG_WARN, "Removing %s failed. Continuing anyway", tmp_hmm);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
525 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
526 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
527 CKFREE(tmp_hmm);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
528 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
529
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
530 return retcode;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
531 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
532 /* end of AlnToHMMFile() */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
533
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
534
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
535
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
536 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
537 * @brief Convert a multiple sequence structure into a HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
538 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
539 * @param[out] prHMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
540 * Pointer to preallocted HMM which will be set here
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
541 * @param[in] prMSeq
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
542 * Pointer to an alignment
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
543 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
544 * @return 0 on error, non-0 otherwise
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
545 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
546 * @see AlnToHMMFile()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
547 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
548 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
549 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
550 AlnToHMM(hmm_light *prHMM, mseq_t *prMSeq)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
551 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
552 char *pcHMM; /* temp hmm file */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
553
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
554 Log(&rLog, LOG_INFO,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
555 "Using HMMER version %d to calculate a new HMM.",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
556 HmmerVersion());
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
557 /* FIXME replace all this with internal HMM computation (HHmake) */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
558
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
559 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
560 * @warning the following probably won't work on windows: missing
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
561 * /tmp/. Should be ok on Cygwin though
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
562 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
563 pcHMM = CkStrdup("/tmp/clustalo-hmm-iter_XXXXXX");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
564 if (NULL == mktemp(pcHMM)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
565 Log(&rLog, LOG_ERROR, "Could not create temporary hmm filename");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
566 CKFREE(pcHMM);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
567 return FAILURE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
568 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
569
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
570 /* Create a HMM representing the current alignment
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
571 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
572 #if USEHMMER
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
573 if (AlnToHMMFile(prMSeq, pcHMM)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
574 Log(&rLog, LOG_ERROR, "AlnToHMMFile() on %s failed.", pcHMM);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
575 CKFREE(pcHMM);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
576 return FAILURE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
577 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
578 #elif USEHHMAKE
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
579 if (AlnToHHMFile(prMSeq, pcHMM)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
580 Log(&rLog, LOG_ERROR, "AlnToHHMFile() on %s failed.", pcHMM);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
581 CKFREE(pcHMM);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
582 return FAILURE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
583 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
584 /* Log(&rLog, LOG_FATAL, "Method to create HHM (HMM using hhmake) not installed yet"); */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
585 #else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
586 Log(&rLog, LOG_FATAL, "Unknown method to create temporary HMM");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
587 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
588
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
589 /* Read HMM information
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
590 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
591 if (OK != readHMMWrapper(prHMM, pcHMM)){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
592 Log(&rLog, LOG_ERROR, "Processing of HMM file %s failed", pcHMM);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
593 CKFREE(pcHMM);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
594 return FAILURE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
595 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
596
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
597 if (remove(pcHMM)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
598 Log(&rLog, LOG_WARN, "Removing %s failed. Continuing anyway", pcHMM);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
599 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
600 CKFREE(pcHMM);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
601
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
602 return OK;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
603 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
604 /* end of AlnToHMM() */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
605
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
606
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
607
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
608 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
609 * @brief FIXME
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
610 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
611 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
612 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
613 InitClustalOmega(int iNumThreadsRequested)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
614 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
615
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
616 #ifdef HAVE_OPENMP
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
617 iNumberOfThreads = iNumThreadsRequested;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
618 omp_set_num_threads(iNumberOfThreads);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
619 #else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
620 if (iNumThreadsRequested>1) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
621 Log(&rLog, LOG_FATAL, "Cannot change number of threads to %d. %s was build without OpenMP support.",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
622 iNumThreadsRequested, PACKAGE_NAME);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
623 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
624 iNumberOfThreads = 1; /* need to set this, even if build without support */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
625 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
626
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
627 Log(&rLog, LOG_INFO, "Using %d threads",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
628 iNumberOfThreads);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
629
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
630 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
631 /* end of InitClustalOmega() */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
632
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
633
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
634
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
635 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
636 * @brief Defines an alignment order, which adds sequences
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
637 * sequentially, i.e. one at a time starting with seq 1 & 2
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
638 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
639 * @param[out] piOrderLR_p
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
640 * order in which nodes/profiles are to be merged/aligned
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
641 * @param[in] iNumSeq
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
642 * Number of sequences
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
643 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
644 * @see TraverseTree()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
645 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
646 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
647 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
648 SequentialAlignmentOrder(int **piOrderLR_p, int iNumSeq)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
649 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
650 unsigned int uNodes = iNumSeq*2-1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
651 unsigned int uNodeCounter = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
652 unsigned int uSeqCounter = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
653
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
654 Log(&rLog, LOG_FATAL, "FIXME: Untested...");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
655
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
656 (*piOrderLR_p) = (int *)CKCALLOC(DIFF_NODE * uNodes, sizeof(int));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
657 /* loop over merge nodes, which have per definition even indices
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
658 * and set up children which have odd indices
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
659 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
660 uSeqCounter = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
661 for (uNodeCounter=iNumSeq; uNodeCounter<uNodes; uNodeCounter+=1) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
662 unsigned int uLeftChildNodeIndex = uNodeCounter-1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
663 unsigned int uRightChildNodeIndex = uNodeCounter-iNumSeq+1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
664 unsigned int uParentNodeIndex = uNodeCounter+1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
665
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
666 /* merge node setup */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
667 (*piOrderLR_p)[DIFF_NODE*uNodeCounter+LEFT_NODE] = uLeftChildNodeIndex;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
668 (*piOrderLR_p)[DIFF_NODE*uNodeCounter+RGHT_NODE] = uRightChildNodeIndex;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
669 (*piOrderLR_p)[DIFF_NODE*uNodeCounter+PRNT_NODE] = uParentNodeIndex;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
670 /* only setup left child if at first merge node, all other left childs
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
671 * should be merge nodes that are already set up. also correct
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
672 * left node number here.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
673 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
674 if (uNodeCounter==iNumSeq) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
675 (*piOrderLR_p)[DIFF_NODE*uNodeCounter+LEFT_NODE] = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
676
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
677 (*piOrderLR_p)[0+LEFT_NODE] = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
678 (*piOrderLR_p)[0+RGHT_NODE] = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
679 (*piOrderLR_p)[0+PRNT_NODE] = uNodeCounter;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
680 uSeqCounter++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
681
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
682 Log(&rLog, LOG_FORCED_DEBUG, "Set up first leaf with node counter %d: left=%d right=%d parent=%d",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
683 0,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
684 (*piOrderLR_p)[DIFF_NODE*uLeftChildNodeIndex+LEFT_NODE],
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
685 (*piOrderLR_p)[DIFF_NODE*uLeftChildNodeIndex+RGHT_NODE],
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
686 (*piOrderLR_p)[DIFF_NODE*uLeftChildNodeIndex+PRNT_NODE]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
687 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
688 Log(&rLog, LOG_FORCED_DEBUG, "Set up merge node with node counter %d: left=%d right=%d parent=%d",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
689 uNodeCounter, (*piOrderLR_p)[DIFF_NODE*uNodeCounter+LEFT_NODE],
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
690 (*piOrderLR_p)[DIFF_NODE*uNodeCounter+RGHT_NODE],
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
691 (*piOrderLR_p)[DIFF_NODE*uNodeCounter+PRNT_NODE]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
692
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
693 /* right child */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
694 (*piOrderLR_p)[DIFF_NODE*uRightChildNodeIndex+LEFT_NODE] = uSeqCounter;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
695 (*piOrderLR_p)[DIFF_NODE*uRightChildNodeIndex+RGHT_NODE] = uSeqCounter;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
696 (*piOrderLR_p)[DIFF_NODE*uRightChildNodeIndex+PRNT_NODE] = uNodeCounter;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
697 uSeqCounter++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
698
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
699 Log(&rLog, LOG_FORCED_DEBUG, "Set up leaf with node counter %d: left=%d right=%d parent=%d",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
700 uRightChildNodeIndex, (*piOrderLR_p)[DIFF_NODE*uRightChildNodeIndex+LEFT_NODE],
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
701 (*piOrderLR_p)[DIFF_NODE*uRightChildNodeIndex+RGHT_NODE],
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
702 (*piOrderLR_p)[DIFF_NODE*uRightChildNodeIndex+PRNT_NODE]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
703 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
704 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
705 /* end of SequentialAlignmentOrder() */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
706
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
707
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
708
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
709 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
710 * @brief Defines the alignment order by calculating a guide tree. In
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
711 * a first-step pairwise distances will be calculated (or read from a
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
712 * file). In a second step those distances will be clustered and a
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
713 * guide-tree created. Steps 1 and 2 will be skipped if a guide-tree
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
714 * file was given, in which case the guide-tree will be just read from
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
715 * the file.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
716 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
717 * @param[out] piOrderLR_p
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
718 * order in which nodes/profiles are to be merged/aligned
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
719 * @param[out] pdSeqWeights_p
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
720 * Sequence weights
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
721 * @param[out] pdSeqWeights_p
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
722 * Sequence weights
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
723 * @param[in] prMSeq
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
724 * The sequences from which the alignment order is to be calculated
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
725 * @param[in] iPairDistType
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
726 * Method of pairwise distance comparison
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
727 * @param[in] pcDistmatInfile
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
728 * If not NULL distances will be read from this file instead of being
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
729 * calculated
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
730 * @param[in] pcDistmatOutfile
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
731 * If not NULL computed pairwise distances will be written to this file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
732 * @param[in] iClusteringType
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
733 * Clustering method to be used to cluster the pairwise distances
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
734 * @param[in] pcGuidetreeInfile
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
735 * If not NULL guidetree will be read from this file. Skips pairwise
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
736 * distance and guidetree computation
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
737 * @param[in] pcGuidetreeOutfile
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
738 * If not NULL computed guidetree will be written to this file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
739 * @param[in] bUseMbed
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
740 * If TRUE, fast mBed guidetree computation will be employed
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
741 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
742 * @return Non-zero on error
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
743 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
744 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
745 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
746 AlignmentOrder(int **piOrderLR_p, double **pdSeqWeights_p, mseq_t *prMSeq,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
747 int iPairDistType, char *pcDistmatInfile, char *pcDistmatOutfile,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
748 int iClusteringType, char *pcGuidetreeInfile, char *pcGuidetreeOutfile,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
749 bool bUseMbed)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
750 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
751 /* pairwise distance matrix (tmat in 1.83) */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
752 symmatrix_t *distmat = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
753 /* guide tree */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
754 tree_t *prTree = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
755 int i = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
756
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
757
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
758 /* Shortcut for only two sequences: Do not compute k-tuple
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
759 * distances. Use the same logic as in TraverseTree() to setup
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
760 * piOrderLR_p. Changes there will have to be reflected here as
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
761 * well. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
762 if (2==prMSeq->nseqs) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
763 Log(&rLog, LOG_VERBOSE,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
764 "Have only two sequences: No need to compute pairwise score and compute a tree.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
765
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
766 (*piOrderLR_p) = (int*) CKMALLOC(DIFF_NODE * 3 * sizeof(int));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
767 (*piOrderLR_p)[DIFF_NODE*0+LEFT_NODE] = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
768 (*piOrderLR_p)[DIFF_NODE*0+RGHT_NODE] = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
769 (*piOrderLR_p)[DIFF_NODE*0+PRNT_NODE] = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
770
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
771 (*piOrderLR_p)[DIFF_NODE*1+LEFT_NODE] = 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
772 (*piOrderLR_p)[DIFF_NODE*1+RGHT_NODE] = 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
773 (*piOrderLR_p)[DIFF_NODE*1+PRNT_NODE] = 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
774
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
775 /* root */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
776 (*piOrderLR_p)[DIFF_NODE*2+LEFT_NODE] = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
777 (*piOrderLR_p)[DIFF_NODE*2+RGHT_NODE] = 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
778 (*piOrderLR_p)[DIFF_NODE*2+PRNT_NODE] = 2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
779
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
780 /* Same logic as CalcClustalWeights(). Changes there will
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
781 have to be reflected here as well. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
782 #if USE_WEIGHTS
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
783 (*pdWeights_p) = (double *) CKMALLOC(uNodeCount * sizeof(double));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
784 (*pdWeights_p)[0] = 0.5;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
785 (*pdWeights_p)[1] = 0.5;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
786 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
787
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
788 return OK;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
789 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
790
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
791
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
792 /* compute distance & guide tree, alternatively read distances or
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
793 * guide tree from file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
794 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
795 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
796 if (NULL != pcGuidetreeInfile) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
797 Log(&rLog, LOG_INFO, "Reading guide-tree from %s", pcGuidetreeInfile);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
798 if (GuideTreeFromFile(&prTree, prMSeq, pcGuidetreeInfile)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
799 Log(&rLog, LOG_ERROR, "Reading of guide tree %s failed.", pcGuidetreeInfile);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
800 return FAILURE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
801 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
802
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
803 } else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
804
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
805 if (bUseMbed) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
806 if (Mbed(&prTree, prMSeq, iPairDistType, pcGuidetreeOutfile)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
807 Log(&rLog, LOG_ERROR, "mbed execution failed.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
808 return FAILURE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
809 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
810 Log(&rLog, LOG_INFO, "Guide-tree computation (mBed) done.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
811 if (NULL != pcDistmatOutfile) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
812 Log(&rLog, LOG_INFO,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
813 "Ignoring request to write distance matrix (am in mBed mode)");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
814 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
815 } else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
816
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
817 if (PairDistances(&distmat, prMSeq, iPairDistType,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
818 0, prMSeq->nseqs, 0, prMSeq->nseqs,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
819 pcDistmatInfile, pcDistmatOutfile)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
820 Log(&rLog, LOG_ERROR, "Couldn't compute pair distances");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
821 return FAILURE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
822 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
823
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
824 /* clustering of distances to get guide tree
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
825 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
826 if (CLUSTERING_UPGMA == iClusteringType) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
827 char **labels;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
828 labels = (char**) CKMALLOC(prMSeq->nseqs * sizeof(char*));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
829 for (i=0; i<prMSeq->nseqs; i++) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
830 labels[i] = prMSeq->sqinfo[i].name;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
831 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
832
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
833 GuideTreeUpgma(&prTree, labels, distmat, pcGuidetreeOutfile);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
834 Log(&rLog, LOG_INFO, "Guide-tree computation done.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
835
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
836 CKFREE(labels);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
837 } else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
838 Log(&rLog, LOG_FATAL, "INTERNAL ERROR %s",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
839 "clustering method should have been checked before");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
840 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
841 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
842 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
843
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
844 #if USE_WEIGHTS
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
845 /* derive sequence weights from tree
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
846 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
847 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
848 Log(&rLog, LOG_INFO, "Calculating sequence weights");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
849 CalcClustalWeights(pdSeqWeights_p, prTree);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
850 for (i = 0; i < GetLeafCount(prTree); i++) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
851 Log(&rLog, LOG_VERBOSE,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
852 "Weight for seq no %d: %s = %f",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
853 i, prMSeq->sqinfo[i].name, (*pdSeqWeights_p)[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
854 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
855 #else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
856 Log(&rLog, LOG_DEBUG, "Not using weights");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
857 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
858
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
859
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
860 /* define traversing order of tree
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
861 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
862 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
863 TraverseTree(piOrderLR_p, prTree, prMSeq);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
864 if (rLog.iLogLevelEnabled <= LOG_DEBUG) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
865 /* FIXME: debug only, FS */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
866 uint uNodeIndex;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
867 FILE *fp = LogGetFP(&rLog, LOG_INFO);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
868 Log(&rLog, LOG_DEBUG, "left/right order after tree traversal");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
869 for (uNodeIndex = 0; uNodeIndex < GetNodeCount(prTree); uNodeIndex++) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
870 fprintf(fp, "%3d:\t%2d/%2d -> %d\n", i,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
871 (*piOrderLR_p)[DIFF_NODE*uNodeIndex+LEFT_NODE],
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
872 (*piOrderLR_p)[DIFF_NODE*uNodeIndex+RGHT_NODE],
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
873 (*piOrderLR_p)[DIFF_NODE*uNodeIndex+PRNT_NODE]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
874 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
875 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
876
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
877 FreeMuscleTree(prTree);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
878 FreeSymMatrix(&distmat);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
879
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
880 #if 0
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
881 Log(&rLog, LOG_FATAL, "DEBUG EXIT before leaving %s", __FUNCTION__);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
882 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
883 return OK;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
884 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
885 /* end of AlignmentOrder() */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
886
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
887
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
888
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
889 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
890 * @brief Set some options automatically based on number of sequences. Might
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
891 * overwrite some user-set options.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
892 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
893 * @param[out] prOpts
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
894 * Pointer to alignment options structure
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
895 * @param[in] iNumSeq
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
896 * Number of sequences to align
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
897 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
898 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
899 SetAutoOptions(opts_t *prOpts, int iNumSeq) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
900
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
901 Log(&rLog, LOG_INFO,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
902 "Setting options automatically based on input sequence characteristics (might overwrite some of your options).");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
903
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
904 /* AW: new version of mbed is always good (uses subclusters) */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
905 if (FALSE == prOpts->bUseMbed) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
906 Log(&rLog, LOG_INFO, "Auto settings: Enabling mBed.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
907 prOpts->bUseMbed = TRUE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
908 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
909
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
910 if (iNumSeq >= 1000) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
911 if (0 != prOpts->iNumIterations) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
912 Log(&rLog, LOG_INFO, "Auto settings: Disabling iterations.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
913 prOpts->iNumIterations = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
914 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
915
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
916 } else if (iNumSeq < 1000) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
917 if (1 != prOpts->iNumIterations) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
918 Log(&rLog, LOG_INFO, "Auto settings: Setting iteration to 1.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
919 prOpts->iNumIterations = 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
920 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
921 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
922 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
923 /* end of */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
924
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
925
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
926
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
927 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
928 * @brief The main alignment function which wraps everything else.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
929 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
930 * @param[out] prMSeq
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
931 * *the* multiple sequences structure
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
932 * @param[in] prMSeqProfile
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
933 * optional profile to align against
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
934 * @param[in] prOpts
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
935 * alignmemnt options to use
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
936 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
937 * @return 0 on success, -1 on failure
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
938 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
939 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
940 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
941 Align(mseq_t *prMSeq,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
942 mseq_t *prMSeqProfile,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
943 opts_t *prOpts,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
944 hhalign_para rHhalignPara) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
945
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
946 /* HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
947 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
948 /* structs with pseudocounts etc; one for each HMM infile, i.e.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
949 * index range: 0..iHMMInputFiles */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
950 hmm_light *prHMMs = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
951
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
952 /* MSA order in which nodes/profiles are to be merged/aligned
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
953 (order of nodes in guide tree (left/right)*/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
954 int *piOrderLR = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
955
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
956 /* weights per sequence */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
957 double *pdSeqWeights = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
958
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
959 /* Iteration
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
960 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
961 int iIterationCounter = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
962 double dAlnScore;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
963 /* last dAlnScore for iteration */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
964 double dLastAlnScore = -666.666;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
965
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
966 int i, j; /* aux */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
967
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
968 assert(NULL != prMSeq);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
969 if (NULL != prMSeqProfile) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
970 assert(TRUE == prMSeqProfile->aligned);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
971 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
972
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
973
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
974 /* automatic setting of options
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
975 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
976 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
977 if (prOpts->bAutoOptions) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
978 SetAutoOptions(prOpts, prMSeq->nseqs);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
979 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
980
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
981
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
982 #if SHUFFLE_INPUT_SEQ_ORDER
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
983 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
984 * shuffle input: only useful for testing/debugging
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
985 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
986 Log(&rLog, LOG_WARN, "Shuffling input sequences! (Will also change output order)");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
987 ShuffleMSeq(prMSeq);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
988 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
989
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
990
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
991 #if SORT_INPUT_SEQS
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
992 /*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
993 * sort input:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
994 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
995 * would ensure we *always* (unless we get into the mbed k-means stage)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
996 * get the same answer. usually you don't, because most pairwise alignment
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
997 * scores are in theory not symmetric, therefore sequence ordering might
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
998 * have an effect on the guide-tree. Sorting by length should get rid of
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
999 * this (and takes no time even for 100k seqs). Benchmark results on
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1000 * Balibase show almost no difference after sorting.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1001 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1002 Log(&rLog, LOG_WARN, "Sorting input seq by length! This will also change the output order");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1003 SortMSeqByLength(prMSeq, 'd');
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1004
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1005 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1006
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1007
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1008 /* Read backgrounds HMMs and store in prHMMs
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1009 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1010 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1011 if (0 < prOpts->iHMMInputFiles) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1012 int iHMMInfileIndex;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1013
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1014 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1015 * @warning old structure used to be initialised like this:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1016 * hmm_light rHMM = {0};
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1017 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1018 prHMMs = (hmm_light *) CKMALLOC(prOpts->iHMMInputFiles * sizeof(hmm_light));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1019
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1020 for (iHMMInfileIndex=0; iHMMInfileIndex<prOpts->iHMMInputFiles; iHMMInfileIndex++) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1021 char *pcHMMInput = prOpts->ppcHMMInput[iHMMInfileIndex];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1022 if (OK != readHMMWrapper(&prHMMs[iHMMInfileIndex], pcHMMInput)){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1023 Log(&rLog, LOG_ERROR, "Processing of HMM file %s failed", pcHMMInput);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1024 return -1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1025 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1026
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1027 #if 0
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1028 Log(&rLog, LOG_FORCED_DEBUG, "HMM length is %d", prHMMs[iHMMInfileIndex].L);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1029 Log(&rLog, LOG_FORCED_DEBUG, "n-display is %d", prHMMs[iHMMInfileIndex].n_display);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1030 for (i = 0; NULL != prHMMs[prOpts->iHMMInputFiles].seq[i]; i++){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1031 printf("seq[%d]: %s\n", i, prHMMs[iHMMInfileIndex].seq[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1032 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1033 Log(&rLog, LOG_FORCED_DEBUG, "Neff_HMM is %f", prHMMs[iHMMInfileIndex].Neff_HMM);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1034 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1035 if (rLog.iLogLevelEnabled <= LOG_DEBUG){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1036 Log(&rLog, LOG_DEBUG, "print frequencies");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1037 for (i = 0; i < prHMMs[iHMMInfileIndex].L; i++){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1038 #define PRINT_TAIL 5
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1039 if ( (PRINT_TAIL+1 == i) && (prHMMs[iHMMInfileIndex].L-PRINT_TAIL != i) ){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1040 printf("....\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1041 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1042 if ( (i > PRINT_TAIL) && (i < prHMMs[iHMMInfileIndex].L-PRINT_TAIL) ){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1043 continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1044 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1045 printf("%3d:", i);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1046 for (j = 0; j < 20; j++){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1047 printf("\t%1.3f", prHMMs[iHMMInfileIndex].f[i][j]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1048 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1049 printf("\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1050 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1051 } /* debug print block */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1052
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1053 CKFREE(prOpts->ppcHMMInput[iHMMInfileIndex]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1054 } /* for each background HMM file */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1055 CKFREE(prOpts->ppcHMMInput);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1056 } /* there were background HMM files */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1057
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1058
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1059
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1060 /* If the input ("non-profile") sequences are aligned, then turn
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1061 * the alignment into a HMM and add to the list of background HMMs
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1062 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1063 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1064 if (TRUE == prMSeq->aligned) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1065 /* FIXME: gcc warns about missing initialiser here (-Wall -Wextra -pedantic) */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1066 hmm_light rHMMLocal = {0};
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1067
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1068 Log(&rLog, LOG_INFO,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1069 "Input sequences are aligned. Will turn alignment into HMM and add it to the user provided background HMMs.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1070 if (OK !=
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1071 #if INDIRECT_HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1072 AlnToHMM(&rHMMLocal, prMSeq)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1073 #else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1074 AlnToHMM2(&rHMMLocal, prMSeq->seq, prMSeq->nseqs)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1075 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1076 ) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1077 Log(&rLog, LOG_ERROR, "Couldn't convert aligned input sequences to HMM. Will try to continue");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1078 } else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1079 prHMMs = (hmm_light *) CKREALLOC(prHMMs, ((prOpts->iHMMInputFiles+1) * sizeof(hmm_light)));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1080 memcpy(&(prHMMs[prOpts->iHMMInputFiles]), &rHMMLocal, sizeof(hmm_light));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1081 prOpts->iHMMInputFiles++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1082 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1083 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1084
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1085
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1086 /* If we have a profile turn it into a HMM and add to
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1087 * the list of background HMMs.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1088 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1089 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1090 if (NULL != prMSeqProfile) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1091 /* FIXME: gcc warns about missing initialiser here (-Wall -Wextra -pedantic) */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1092 hmm_light rHMMLocal = {0};
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1093 Log(&rLog, LOG_INFO,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1094 "Turning profile1 into HMM and will use it during progressive alignment.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1095 if (OK !=
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1096 #if INDIRECT_HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1097 AlnToHMM(&rHMMLocal, prMSeqProfile)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1098 #else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1099 AlnToHMM2(&rHMMLocal, prMSeqProfile->seq, prMSeqProfile->nseqs)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1100 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1101 ) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1102 Log(&rLog, LOG_ERROR, "Couldn't convert profile1 to HMM. Will try to continue");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1103 } else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1104 prHMMs = (hmm_light *) CKREALLOC(prHMMs, ((prOpts->iHMMInputFiles+1) * sizeof(hmm_light)));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1105 memcpy(&(prHMMs[prOpts->iHMMInputFiles]), &rHMMLocal, sizeof(hmm_light));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1106 prOpts->iHMMInputFiles++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1107 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1108 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1109
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1110
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1111 /* Now do a first alignment of the input sequences (prMSeq) adding
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1112 * all collected background HMMs
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1113 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1114 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1115 /* Determine progressive alignment order
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1116 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1117 if (TRUE == prMSeq->aligned) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1118 Log(&rLog, LOG_INFO, "%s %s",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1119 "Input sequences are aligned.",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1120 "Will use Kimura distances of aligned sequences.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1121 prOpts->iPairDistType = PAIRDIST_SQUIDID_KIMURA;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1122 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1123
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1124 #if 0
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1125 Log(&rLog, LOG_WARN, "Using a sequential alignment order.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1126 SequentialAlignmentOrder(&piOrderLR, prMSeq->nseqs);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1127 #else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1128 if (OK != AlignmentOrder(&piOrderLR, &pdSeqWeights, prMSeq,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1129 prOpts->iPairDistType,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1130 prOpts->pcDistmatInfile, prOpts->pcDistmatOutfile,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1131 prOpts->iClusteringType,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1132 prOpts->pcGuidetreeInfile, prOpts->pcGuidetreeOutfile,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1133 prOpts->bUseMbed)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1134 Log(&rLog, LOG_ERROR, "AlignmentOrder() failed. Cannot continue");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1135 return -1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1136 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1137 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1138
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1139 /* Progressive alignment of input sequences. Order defined by
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1140 * branching of guide tree (piOrderLR). Use optional
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1141 * background HMM information (prHMMs[0..prOpts->iHMMInputFiles-1])
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1142 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1143 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1144 dAlnScore = HHalignWrapper(prMSeq, piOrderLR, pdSeqWeights,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1145 2*prMSeq->nseqs -1/* nodes */,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1146 prHMMs, prOpts->iHMMInputFiles, -1, rHhalignPara);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1147 dLastAlnScore = dAlnScore;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1148 Log(&rLog, LOG_VERBOSE,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1149 "Alignment score for first alignment = %f", dAlnScore);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1150
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1151
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1152
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1153
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1154 /* ------------------------------------------------------------
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1155 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1156 * prMSeq is aligned now. Now start iterations if requested and save the
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1157 * alignment at the very end.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1158 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1159 * @note We discard the background HMM information at this point,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1160 * because it was already used. Could consider to make this choice
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1161 * optional. FIXME
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1162 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1163 * ------------------------------------------------------------ */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1164
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1165
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1166 /* iteration after first alignment was computed (if not profile-profile
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1167 * alignment)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1168 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1169 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1170 for (iIterationCounter=0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1171 (iIterationCounter < prOpts->iNumIterations || prOpts->bIterationsAuto);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1172 iIterationCounter++) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1173
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1174 hmm_light rHMMLocal = {0};
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1175 /* FIXME Keep copy of old alignment in case new one sucks? */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1176
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1177
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1178 if (iIterationCounter >= prOpts->iMaxHMMIterations
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1179 &&
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1180 iIterationCounter >= prOpts->iMaxGuidetreeIterations) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1181 Log(&rLog, LOG_VERBOSE, "Reached maximum number of HMM and guide-tree iterations");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1182 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1183 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1184
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1185 if (! prOpts->bIterationsAuto) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1186 Log(&rLog, LOG_INFO, "Iteration step %d out of %d",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1187 iIterationCounter+1, prOpts->iNumIterations);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1188 } else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1189 Log(&rLog, LOG_INFO, "Iteration step %d out of <auto>",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1190 iIterationCounter+1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1191 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1192 #if 0
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1193 if (rLog.iLogLevelEnabled <= LOG_VERBOSE) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1194 char zcIntermediate[1000] = {0};
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1195 char *pcFormat = "fasta";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1196 sprintf(zcIntermediate, "clustalo-aln-iter~%d~", iIterationCounter);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1197 if (WriteAlignment(prMSeq, zcIntermediate, MSAFILE_A2M)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1198 Log(&rLog, LOG_ERROR, "Could not save alignment to %s", zcIntermediate);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1199 return -1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1200 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1201 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1202 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1203
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1204
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1205 /* new guide-tree
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1206 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1207 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1208 if (iIterationCounter < prOpts->iMaxGuidetreeIterations) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1209 /* determine progressive alignment order
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1210 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1211 * few things are different now when calling AlignmentOrder:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1212 * - we have to ignore prOpts->pcDistmatInfile and pcGuidetreeInfile
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1213 * as they were used before
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1214 * - the corresponding outfiles are still valid though
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1215 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1216 /* Free stuff that has already been allocated by or further
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1217 * downstream of AlignmentOrder()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1218 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1219 if (NULL != piOrderLR)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1220 CKFREE(piOrderLR);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1221 if (NULL != pdSeqWeights)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1222 CKFREE(pdSeqWeights);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1223 if (AlignmentOrder(&piOrderLR, &pdSeqWeights, prMSeq,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1224 PAIRDIST_SQUIDID_KIMURA /* override */, NULL, prOpts->pcDistmatOutfile,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1225 prOpts->iClusteringType, NULL, prOpts->pcGuidetreeOutfile,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1226 prOpts->bUseMbedForIteration)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1227 Log(&rLog, LOG_ERROR, "AlignmentOrder() failed. Cannot continue");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1228 return -1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1229 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1230 } else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1231 Log(&rLog, LOG_INFO, "Skipping guide-tree iteration at iteration step %d (reached maximum)",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1232 iIterationCounter);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1233 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1234
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1235
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1236 /* new local hmm iteration
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1237 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1238 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1239 if (iIterationCounter < prOpts->iMaxHMMIterations) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1240 if (OK !=
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1241 #if INDIRECT_HMM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1242 AlnToHMM(&rHMMLocal, prMSeq)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1243 #else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1244 AlnToHMM2(&rHMMLocal, prMSeq->seq, prMSeq->nseqs)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1245 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1246 ) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1247 Log(&rLog, LOG_ERROR, "Couldn't convert alignment to HMM. Will stop iterating now...");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1248 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1249 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1250 } else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1251 Log(&rLog, LOG_INFO, "Skipping HMM iteration at iteration step %d (reached maximum)",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1252 iIterationCounter);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1253 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1254
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1255
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1256 /* align the sequences (again)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1257 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1258 dAlnScore = HHalignWrapper(prMSeq, piOrderLR, pdSeqWeights,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1259 2*prMSeq->nseqs -1/* nodes */, &rHMMLocal, 1, -1, rHhalignPara);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1260 Log(&rLog, LOG_VERBOSE,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1261 "Alignment score for alignmnent in hmm-iteration no %d = %f (last score = %f)",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1262 iIterationCounter+1, dAlnScore, dLastAlnScore);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1263
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1264
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1265 FreeHMMstruct(&rHMMLocal);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1266
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1267 #if 0
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1268 /* FIXME: need a better score for automatic iteration */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1269 if (prOpts->bIterationsAuto) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1270 /* automatic iteration: break if score improvement was not
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1271 * big enough
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1272 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1273 double dScoreImprovement = (dAlnScore-dLastAlnScore)/dLastAlnScore;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1274 if (dScoreImprovement < ITERATION_SCORE_IMPROVEMENT_THRESHOLD) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1275 Log(&rLog, LOG_INFO,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1276 "Stopping after %d guide-tree iterations. No further alignment score improvement achieved.",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1277 iIterationCounter+1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1278 /* use previous alignment */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1279 FreeMSeq(&prMSeq);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1280 Log(&rLog, LOG_FORCED_DEBUG, "FIXME: %s", "CopyMSeq breaks things in this context");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1281 CopyMSeq(&prMSeq, prMSeqCopy);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1282 /* FIXME: prOpts->pcDistmatOutfile and pcGuidetreeOutfile
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1283 * might have been updated, but then discarded here?
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1284 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1285 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1286 } else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1287 Log(&rLog, LOG_INFO,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1288 "Got a %d%% better score in iteration step %d",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1289 (int)dScoreImprovement*100, iIterationCounter+1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1290 FreeMSeq(&prMSeqCopy);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1291 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1292 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1293 dLastAlnScore = dAlnScore;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1294 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1295
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1296 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1297 /* end of iterations */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1298
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1299
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1300
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1301 /* Last step: if a profile was also provided then align now-aligned mseq
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1302 * with this profile
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1303 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1304 * Don't use the backgrounds HMMs anymore and don't iterate.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1305 * (which was done before).
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1306 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1307 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1308 if (NULL != prMSeqProfile) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1309 if (AlignProfiles(prMSeq, prMSeqProfile, rHhalignPara)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1310 Log(&rLog, LOG_ERROR, "An error occured during the profile/profile alignment");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1311 return -1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1312 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1313 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1314
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1315
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1316 if (NULL != piOrderLR) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1317 CKFREE(piOrderLR);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1318 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1319 if (NULL != pdSeqWeights) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1320 CKFREE(pdSeqWeights);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1321 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1322 if (0 < prOpts->iHMMInputFiles) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1323 for (i=0; i<prOpts->iHMMInputFiles; i++) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1324 FreeHMMstruct(&prHMMs[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1325 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1326 CKFREE(prHMMs);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1327 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1328
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1329 return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1330 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1331 /* end of Align() */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1332
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1333
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1334
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1335
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1336 /**
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1337 * @brief Align two profiles, ie two sets of prealigned sequences. Already
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1338 * aligned columns won't be changed.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1339 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1340 * @param[out] prMSeqProfile1
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1341 * First profile/aligned set of sequences. Merged alignment will be found in
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1342 * here.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1343 * @param[in] prMSeqProfile2
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1344 * First profile/aligned set of sequences
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1345 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1346 * @return 0 on success, -1 on failure
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1347 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1348 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1349 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1350 AlignProfiles(mseq_t *prMSeqProfile1,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1351 mseq_t *prMSeqProfile2, hhalign_para rHhalignPara) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1352
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1353 double dAlnScore;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1354
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1355 /* number of seqs in first half of joined profile */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1356 int iProfProfSeparator = prMSeqProfile1->nseqs;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1357
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1358 assert(TRUE == prMSeqProfile1->aligned);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1359 assert(TRUE == prMSeqProfile2->aligned);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1360
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1361 Log(&rLog, LOG_INFO, "Performing profile/profile alignment");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1362
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1363 /* Combine the available mseqs into prMSeq
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1364 * which will be aligned afterwards.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1365 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1366 JoinMSeqs(&prMSeqProfile1, prMSeqProfile2);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1367
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1368
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1369 /* set alignment flag explicitly to FALSE */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1370 prMSeqProfile1->aligned = FALSE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1371
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1372 dAlnScore = HHalignWrapper(prMSeqProfile1,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1373 NULL, /* no order */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1374 NULL, /* no weights */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1375 3, /* nodes: root+2profiles */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1376 NULL, 0 /* no bg-hmms */,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1377 iProfProfSeparator, rHhalignPara);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1378
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1379 Log(&rLog, LOG_VERBOSE, "Alignment score is = %f", dAlnScore);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1380
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1381 return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1382 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1383 /* end of AlignProfiles() */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1384
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1385