annotate clustalomega/clustal-omega-0.2.0/src/clustal/mbed.c @ 0:ff1768533a07

Migrated tool version 0.2 from old tool shed archive to new tool shed repository
author clustalomega
date Tue, 07 Jun 2011 17:04:25 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1 /* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
2
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
3 /*********************************************************************
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
4 * Clustal Omega - Multiple sequence alignment
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
5 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
6 * Copyright (C) 2010 University College Dublin
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
7 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
8 * Clustal-Omega is free software; you can redistribute it and/or
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
9 * modify it under the terms of the GNU General Public License as
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
10 * published by the Free Software Foundation; either version 2 of the
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
11 * License, or (at your option) any later version.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
12 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
13 * This file is part of Clustal-Omega.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
14 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
15 ********************************************************************/
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
16
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
17 /*
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
18 * RCS $Id: mbed.c 242 2011-05-27 14:04:21Z andreas $
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
19 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
20 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
21 * Reimplementation from scratch of mBed:
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
22 * Blackshields et al. (2010); PMID 20470396
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
23 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
24 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
25 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
26
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
27 #ifdef HAVE_CONFIG_H
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
28 #include "config.h"
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
29 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
30
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
31 #include <assert.h>
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
32 #include <float.h>
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
33
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
34 #include "mbed.h"
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
35 #include "pair_dist.h"
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
36 #include "symmatrix.h"
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
37 #include "ktuple_pair.h"
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
38 #include "tree.h"
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
39 #include "util.h"
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
40 #include "progress.h"
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
41 #include "queue.h"
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
42
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
43 #include "log.h"
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
44 #include "kmpp/KMeans.h"
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
45 #include "mbed.h"
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
46
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
47 #define TIMING 0
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
48 #if TIMING
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
49 #include "squid/stopwatch.h"
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
50 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
51
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
52
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
53 /* If FULL_WITHIN_CLUSTER_DISTANCES is not 0, distances within each
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
54 * bisecting kmeans subcluster are not estimated using the vectors,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
55 * but calculated normally (using ktuple or kimura). Surprisingly this
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
56 * results in 3% loss on a Homfam p24-h2010-08-09 subset (100-5000
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
57 * seqs in test, at least 5 ref seqs; MAX_SEQ 100 vs 10000; NUM_SEEDS
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
58 * log2 instead of log2^2). And of course it slows things down.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
59 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
60 #define FULL_WITHIN_CLUSTER_DISTANCES 1
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
61
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
62
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
63 /* Cluster size limits. Maximum is a soft limit, which might be
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
64 * exceeded if a K-Means split was unsuccesful, where unsuccesful
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
65 * might also mean that the minimum required number seqs. was not
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
66 * reached */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
67 #if FULL_WITHIN_CLUSTER_DISTANCES
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
68 static const int MAX_ALLOWED_SEQ_PER_PRECLUSTER = 100;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
69 static const int MIN_REQUIRED_SEQ_PER_PRECLUSTER = 1;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
70 #else
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
71 static const int MAX_ALLOWED_SEQ_PER_PRECLUSTER = 10000;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
72 static const int MIN_REQUIRED_SEQ_PER_PRECLUSTER = 100;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
73 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
74
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
75 /* How many restarts per bisecting kmeans split. 10 give 0.5% increase
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
76 * in quality on original HOMFAM over just 1. It also increases kmeans
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
77 * time by a factor of ~3, but overall time is insignificant
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
78 * compared to pairdist/progressive alignment part.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
79 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
80 static const int RESTARTS_PER_SPLIT = 10;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
81
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
82
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
83 /* Use standard kmeans (lloyds) or kmeans++. Both are almost
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
84 * indistinguishable here, although kmeans++ is slightly ahead the
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
85 * fewer seeds you pick (and it should be better in theory)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
86 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
87 #define USE_KMEANS_LLOYDS 0
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
88
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
89
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
90 #ifndef HAVE_LOG2
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
91 #define log2(x) (log(x) / 0.69314718055994530942)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
92 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
93 #define NUMBER_OF_SEEDS(n) pow(log2(((double)n)), 2)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
94
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
95
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
96 /* Seed selection method: SAMPLE_SEEDS_BY_LENGTH is the original mBed
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
97 * approach: Sample iSeeds sequence with constant stride from length-sorted X.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
98 * It might be better to pick the seeds randomly, because length sorting might
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
99 * be more prone to including outliers (e.g. very long and very short seqs).
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
100 * However, we could never observer such a thing. So just stick to the
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
101 * original version as this also removes the random element.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
102 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
103 enum SEED_SELECTION_TYPE {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
104 SELECT_SEEDS_RANDOMLY,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
105 SELECT_SEEDS_BY_LENGTH
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
106 };
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
107 #define SEED_SELECTION SELECT_SEEDS_BY_LENGTH
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
108
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
109
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
110 /* Tests on BAliBase (RV11,12,20,30,40,50; 10 runs each) show there is
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
111 * no difference between mbed-trees created from cosine or euclidean
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
112 * distances (simple version, just using disparities).
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
113 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
114 #define USE_EUCLIDEAN_DISTANCE 1
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
115
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
116
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
117 /* print some mbed pre-cluster usage to screen */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
118 #define PRINT_CLUSTER_DISTRIBUTION 0
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
119
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
120
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
121 #define TRACE 0
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
122
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
123
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
124 typedef struct {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
125 /* Number of final clusters
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
126 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
127 int iNClusters;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
128
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
129 /* Coordinates (columns) for each cluster (rows)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
130 * valid indices: [0...iNClusters][0...dim-1]
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
131 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
132 double **ppdClusterCenters;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
133
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
134 /* Dimensionality of input data and cluster center coordinates
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
135 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
136 int iDim;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
137
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
138 /* Number of objects per cluster
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
139 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
140 int *piNObjsPerCluster;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
141
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
142 /* Objects (indices) for each cluster. [i][j] will point to (index
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
143 * of) object j in cluster i
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
144 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
145 int **ppiObjIndicesPerCluster;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
146 } bisecting_kmeans_result_t;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
147
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
148
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
149
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
150 /**
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
151 * @brief Free KMeans result structure.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
152 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
153 * @param[out] prResult_p
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
154 * K-Means result to free
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
155 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
156 * @see NewKMeansResult()
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
157 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
158 void
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
159 FreeKMeansResult(bisecting_kmeans_result_t **prResult_p)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
160 {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
161 int iAux;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
162 CKFREE((*prResult_p)->piNObjsPerCluster);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
163 for (iAux=0; iAux<(*prResult_p)->iNClusters; iAux++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
164 CKFREE((*prResult_p)->ppiObjIndicesPerCluster[iAux]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
165 CKFREE((*prResult_p)->ppdClusterCenters[iAux]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
166 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
167 CKFREE((*prResult_p)->ppiObjIndicesPerCluster);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
168 CKFREE((*prResult_p)->ppdClusterCenters);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
169 (*prResult_p)->iNClusters = 0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
170 (*prResult_p)->iDim = 0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
171 CKFREE(*prResult_p);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
172 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
173 /*** end: FreeKMeansResult() ***/
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
174
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
175
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
176
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
177 /**
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
178 * @brief Allocate new KMeans result
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
179 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
180 * @param[out] prKMeansResult_p
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
181 * K-Means result to free
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
182 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
183 * @see NewKMeansResult()
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
184 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
185 void
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
186 NewKMeansResult(bisecting_kmeans_result_t **prKMeansResult_p)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
187 {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
188 (*prKMeansResult_p) = (bisecting_kmeans_result_t *)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
189 CKCALLOC(1, sizeof(bisecting_kmeans_result_t));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
190 (*prKMeansResult_p)->iNClusters = 0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
191 (*prKMeansResult_p)->iDim = 0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
192 (*prKMeansResult_p)->ppdClusterCenters = NULL;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
193 (*prKMeansResult_p)->piNObjsPerCluster = NULL;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
194 (*prKMeansResult_p)->ppiObjIndicesPerCluster = NULL;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
195
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
196 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
197 /*** end: NewKMeansResult() ***/
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
198
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
199
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
200
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
201 /**
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
202 * @brief Calculate the euclidean distance between two vectors
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
203 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
204 * @param[in] v1
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
205 * First vector with dim dimensions
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
206 * @param[in] v2
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
207 * Second vector with dim dimensions
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
208 * @param[in] dim
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
209 * Dimensionality of v1 and v2
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
210 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
211 * @return euclidean distance as double
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
212 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
213 * @note Could probably be inlined. Also perfect for SSE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
214 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
215 double
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
216 EuclDist(const double *v1, const double *v2, const int dim)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
217 {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
218 int i; /* aux */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
219 double dist; /* returned distance */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
220
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
221 assert(v1!=NULL);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
222 assert(v2!=NULL);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
223
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
224 dist = 0.0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
225 for (i=0; i<dim; i++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
226 dist += pow(v1[i]-v2[i], 2.0);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
227 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
228
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
229 return sqrt(dist);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
230 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
231 /*** end: EuclDist() ***/
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
232
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
233
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
234
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
235 /**
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
236 * @brief Calculate the cosine distance between two vectors
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
237 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
238 * @param[in] v1
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
239 * First vector with dim dimensions
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
240 * @param[in] v2
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
241 * Second vector with dim dimensions
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
242 * @param[in] dim
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
243 * Dimensionality of v1 and v2
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
244 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
245 * @return cosine distance as double
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
246 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
247 * @note Could probably be inlined. Also perfect for SSE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
248 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
249 double
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
250 CosDist(const double *v1, const double *v2, const int dim)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
251 {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
252 int i; /* aux */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
253 double dist; /* returned distance */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
254 double s, sq1, sq2;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
255
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
256 assert(v1!=NULL);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
257 assert(v2!=NULL);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
258
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
259 s = 0.0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
260 sq1 = 0.0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
261 sq2 = 0.0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
262 for (i=0; i<dim; i++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
263 s += (v1[i]*v2[i]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
264 sq1 += pow(v1[i], 2.0);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
265 sq2 += pow(v2[i], 2.0);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
266 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
267 sq1 = sqrt(sq1);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
268 sq2 = sqrt(sq2);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
269
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
270 if ((sq1 * sq2) < DBL_EPSILON) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
271 dist = 1 - s / (sq1 * sq2);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
272 } else {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
273 /* if the denominator gets small, the fraction gets big, hence dist
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
274 gets small: */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
275 dist = 0.0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
276 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
277 return dist;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
278 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
279 /*** end: CosDist() ***/
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
280
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
281
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
282
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
283 /**
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
284 * @brief convert sequences into mbed-like (distance) vector
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
285 * representation. Seeds (prMSeq sequence indices) have to be picked before
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
286 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
287 * @param[out] ppdSeqVec
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
288 * Pointer to preallocated matrix of size nseqs x iSeeds
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
289 * @param[in] prMSeq
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
290 * Sequences which are to be converted
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
291 * @param[in] piSeeds
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
292 * Array of sequences in prMSeq which are to be used as seeds.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
293 * @param[in] iNumSeeds
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
294 * Number of seeds/elements in piSeeds
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
295 * @param[in] iPairDistType
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
296 * Type of pairwise distance comparison
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
297 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
298 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
299 int
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
300 SeqToVec(double **ppdSeqVec, mseq_t *prMSeq,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
301 int *piSeeds, const int iNumSeeds,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
302 const int iPairDistType)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
303 {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
304 symmatrix_t *prDistmat = NULL;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
305 int iSeqIndex;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
306 int iSeedIndex;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
307 /* indices for restoring order */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
308 int *restore;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
309 /* sorted copy of piSeeds */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
310 int *piSortedSeeds;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
311
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
312 #if TIMING
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
313 Stopwatch_t *stopwatch = StopwatchCreate();
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
314 StopwatchZero(stopwatch);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
315 StopwatchStart(stopwatch);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
316 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
317
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
318 assert(NULL != ppdSeqVec);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
319 assert(iNumSeeds>0 && iNumSeeds<=prMSeq->nseqs);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
320
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
321 piSortedSeeds = CKMALLOC(iNumSeeds * sizeof(int));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
322 memcpy(piSortedSeeds, piSeeds, iNumSeeds*sizeof(int));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
323
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
324 /* need them sorted, otherwise the swapping approach below might
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
325 * break
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
326 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
327 qsort(piSortedSeeds, iNumSeeds, sizeof(int), IntCmp);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
328
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
329
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
330 /* rearrange mseq order so that we can use ktuple_pairdist code as
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
331 * is. That is, swap seeds and non-seeds such that the seeds end
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
332 * up in front of mseq. This way we can use the KTuplePairDist()
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
333 * code, without making a copy of mseq. Also, keep an array of
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
334 * indices which serves to restore the original sequence order
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
335 * after putting the seeds in front
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
336 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
337 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
338 restore = (int *) CKMALLOC(prMSeq->nseqs * sizeof(int));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
339 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
340 restore[iSeqIndex] = iSeqIndex;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
341 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
342 for (iSeedIndex=0; iSeedIndex<iNumSeeds; iSeedIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
343 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
344 Log(&rLog, LOG_FORCED_DEBUG, "Swapping seqs %d and %u", piSortedSeeds[iSeedIndex], iSeedIndex);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
345 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
346 if (piSortedSeeds[iSeedIndex]!=iSeedIndex) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
347 int swap;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
348 SeqSwap(prMSeq, piSortedSeeds[iSeedIndex], iSeedIndex);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
349
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
350 swap = restore[iSeedIndex];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
351 restore[iSeedIndex] = restore[piSortedSeeds[iSeedIndex]];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
352 restore[piSortedSeeds[iSeedIndex]] = swap;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
353 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
354 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
355 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
356 printf("DEBUG(%s|%s():%d): restore indices =",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
357 __FILE__, __FUNCTION__, __LINE__);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
358 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
359 printf(" %u:%u", iSeqIndex, restore[iSeqIndex]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
360 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
361 printf("\n");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
362
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
363 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
364 Log(&rLog, LOG_FORCED_DEBUG, "swapped seq no %d now: seq = %s",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
365 iSeqIndex, prMSeq->sqinfo[iSeqIndex].name);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
366 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
367 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
368
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
369
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
370 /* convert sequences into vectors of distances by calling pairwise
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
371 * distance function for each seq/seed pair
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
372 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
373 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
374 if (0 != PairDistances(&prDistmat, prMSeq, iPairDistType,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
375 0, iNumSeeds, 0, prMSeq->nseqs,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
376 NULL, NULL)) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
377 Log(&rLog, LOG_ERROR, "Could not compute pairwise distances for mbed.");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
378 FreeSymMatrix(&prDistmat);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
379 CKFREE(piSortedSeeds);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
380 CKFREE(restore);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
381 return -1;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
382 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
383 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
384 {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
385 char **labels;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
386 labels = (char **) CKMALLOC(prMSeq->nseqs * sizeof(char *));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
387 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
388 labels[iSeqIndex] = prMSeq->sqinfo[iSeqIndex].name;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
389 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
390 SymMatrixPrint(prDistmat, labels, NULL);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
391 CKFREE(labels);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
392 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
393 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
394
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
395
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
396 /* update ppdSeqVec according to prDistmat. keep in mind that we
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
397 * changed mseq order
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
398 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
399 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
400 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
401 for (iSeedIndex=0; iSeedIndex<iNumSeeds; iSeedIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
402 ppdSeqVec[restore[iSeqIndex]][iSeedIndex] =
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
403 SymMatrixGetValue(prDistmat, iSeqIndex, iSeedIndex);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
404 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
405 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
406
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
407
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
408 /* restore mseq order by reverse swapping
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
409 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
410 * need reverse order now, so start from top index
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
411 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
412 iSeedIndex=iNumSeeds-1;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
413 do {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
414 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
415 Log(&rLog, LOG_FORCED_DEBUG, "Swapping seqs %d and %d", piSortedSeeds[iSeedIndex], iSeedIndex);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
416 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
417 if (piSortedSeeds[iSeedIndex]!=iSeedIndex) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
418 int swap;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
419
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
420 SeqSwap(prMSeq, piSortedSeeds[iSeedIndex], iSeedIndex);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
421
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
422 swap = restore[iSeedIndex];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
423 restore[iSeedIndex] = restore[piSortedSeeds[iSeedIndex]];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
424 restore[piSortedSeeds[iSeedIndex]] = swap;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
425 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
426 } while (0 != iSeedIndex--);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
427 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
428 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
429 Log(&rLog, LOG_FORCED_DEBUG, "restored seq no %d: seq = %s %s",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
430 iSeqIndex, prMSeq->sqinfo[iSeqIndex].name, prMSeq->seq[iSeqIndex]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
431 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
432 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
433
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
434 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
435 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
436 printf("DEBUG: seq %-4u as distance vector =", iSeqIndex);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
437 for (iSeedIndex=0; iSeedIndex<iNumSeeds; iSeedIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
438 printf(" %f", ppdSeqVec[iSeqIndex][iSeedIndex]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
439 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
440 printf("\n");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
441 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
442 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
443
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
444
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
445 FreeSymMatrix(&prDistmat);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
446 CKFREE(restore);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
447 #if TIMING
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
448 StopwatchStop(stopwatch);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
449 StopwatchDisplay(stdout, "Total time for SeqToVec(): ", stopwatch);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
450 StopwatchFree(stopwatch);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
451 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
452
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
453
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
454 return 0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
455 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
456 /*** end: SeqToVec() ***/
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
457
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
458
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
459
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
460 /**
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
461 * @brief Select seeds to be used from an prMSeq
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
462 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
463 * @param[out] piSeeds
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
464 * Will store the indices of prMSeqs seqs used to be as seeds here. Must be preallocated.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
465 * @param[in] iNumSeeds
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
466 * Number of seeds to be picked
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
467 * @param[in] iSelectionMethod
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
468 * Seed selection method to be used
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
469 * @param[in] prMSeq
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
470 * The prMSeq structure to pick sequences from
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
471 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
472 * @return: Non-zero on failure
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
473 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
474 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
475 int
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
476 SeedSelection(int *piSeeds, int iNumSeeds, int iSelectionMethod, mseq_t *prMSeq)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
477 {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
478 /* seed iterator */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
479 int iSeedIndex;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
480 /* sequence iterator */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
481 int iSeqIndex;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
482
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
483 if (SELECT_SEEDS_RANDOMLY == iSelectionMethod) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
484 int *piPermArray;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
485
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
486 Log(&rLog, LOG_INFO,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
487 "Using %d seeds (randomly chosen) for mBed (from a total of %d sequences)",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
488 iNumSeeds, prMSeq->nseqs);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
489
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
490 PermutationArray(&piPermArray, iNumSeeds);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
491 /* copy to piSeeds */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
492 for (iSeedIndex=0; iSeedIndex<iNumSeeds; iSeedIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
493 piSeeds[iSeedIndex] = piPermArray[iSeedIndex];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
494 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
495 CKFREE(piPermArray);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
496
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
497 } else if (SELECT_SEEDS_BY_LENGTH == iSelectionMethod) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
498
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
499 /* step size for picking with constant stride */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
500 int iStep;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
501 int *piSeqLen = CKMALLOC(prMSeq->nseqs * sizeof(int));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
502 int *piOrder = CKMALLOC(prMSeq->nseqs * sizeof(int));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
503
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
504 Log(&rLog, LOG_INFO,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
505 "Using %d seeds (chosen with constant stride from length sorted seqs) for mBed (from a total of %d sequences)",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
506 iNumSeeds, prMSeq->nseqs);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
507
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
508 iStep = prMSeq->nseqs / iNumSeeds; /* iStep will never get too big
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
509 * due to rounding */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
510 /* first get an array of seq indices order according to
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
511 corresponding sequence length: piOrder */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
512 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
513 piSeqLen[iSeqIndex] = prMSeq->sqinfo[iSeqIndex].len;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
514 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
515 QSortAndTrackIndex(piOrder, piSeqLen, prMSeq->nseqs, 'd', FALSE);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
516 #if 0
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
517 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
518 Log(&rLog, LOG_FORCED_DEBUG, "Descending order (no %d): seq %d has len %d)",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
519 iSeqIndex, piOrder[iSeqIndex], piSeqLen[piOrder[iSeqIndex]]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
520 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
521 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
522 CKFREE(piSeqLen);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
523 iSeedIndex = 0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
524 for (iSeqIndex=0; iSeedIndex<iNumSeeds; iSeqIndex+=iStep) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
525 piSeeds[iSeedIndex++] = piOrder[iSeqIndex];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
526 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
527 CKFREE(piOrder);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
528
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
529 } else {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
530
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
531 Log(&rLog, LOG_ERROR, "Internal error: unknown seed selection type");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
532 return -1;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
533 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
534
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
535
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
536 #ifdef PICK_SEEDS_FROM_KMEANS_CLUSTERS
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
537 /* do initial mbedding and kmeans. then pick seeds from each cluster and
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
538 do full mbed with these seeds. idea is that those seeds represent the
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
539 sequence space better than random seeds. however, this reduces the
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
540 quality slightly. random is almost always better.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
541 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
542
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
543 if (1) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
544 /* seqs represented as distance vectors */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
545 double **ppdSeqVec;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
546 double dCost = -1.0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
547 /* assignments for each seq to the K newly created clusters */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
548 int *piKMeansClusterAssignments = CKMALLOC(prMSeq->nseqs * sizeof(int));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
549 double *pdKMeansClusterCenters = CKCALLOC(iNumSeeds * iNumSeeds, sizeof(double));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
550 /* create a copy of ppdSeqVec suitable for KMeans */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
551 double *pdKMeansVectors = CKMALLOC(prMSeq->nseqs * iNumSeeds * sizeof(double));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
552 bool *pbClusterUsed = CKCALLOC(iNumSeeds, sizeof(bool));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
553
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
554 Log(&rLog, LOG_FORCED_DEBUG, "%s", "FIXME Experimental feature: K-means on first round embedding to get better seeds");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
555 Log(&rLog, LOG_FORCED_DEBUG, "%s", "FIXME Reuse seeds from clusters");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
556 Log(&rLog, LOG_FORCED_DEBUG, "%s", "FIXME Could try log(n) instead of log(n)^2 in first round");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
557 Log(&rLog, LOG_FORCED_DEBUG, "%s", "FIXME hardcoded iPairDistType PAIRDIST_KTUPLE");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
558 Log(&rLog, LOG_FORCED_DEBUG, "%s", "FIXME Show that this is fast *and* good");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
559
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
560 /*
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
561 Log(&rLog, LOG_FORCED_DEBUG, "%s", "Overriding iNumSeeds");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
562 iNumSeeds = (int) pow(log2((double)prMSeq->nseqs), 2);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
563 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
564
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
565 ppdSeqVec = (double **) CKMALLOC(prMSeq->nseqs * sizeof(double *));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
566 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
567 ppdSeqVec[iSeqIndex] = (double *) CKMALLOC(iNumSeeds * sizeof(double));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
568 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
569 if (0 != SeqToVec(ppdSeqVec, prMSeq, piSeeds, iNumSeeds, PAIRDIST_KTUPLE)) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
570 Log(&rLog, LOG_ERROR, "Could not convert sequences into vectors for mbed");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
571 return -1;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
572 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
573
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
574 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
575 int iDim;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
576 for (iDim=0; iDim<iNumSeeds; ++iDim) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
577 pdKMeansVectors[iDim*iSeqIndex + iDim] = ppdSeqVec[iSeqIndex][iDim];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
578 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
579 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
580
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
581
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
582 Log(&rLog, LOG_FORCED_DEBUG, "%s\n", "FIXME hardcoded RESTARTS_PER_SPLIT");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
583 dCost = KMeans(prMSeq->nseqs, iNumSeeds, iNumSeeds,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
584 pdKMeansVectors,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
585 RESTARTS_PER_SPLIT, USE_KMEANS_LLOYDS,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
586 pdKMeansClusterCenters, piKMeansClusterAssignments);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
587 Log(&rLog, LOG_FORCED_DEBUG, "Best split cost = %f", dCost);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
588
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
589 Log(&rLog, LOG_FORCED_DEBUG, "%s", "FIXME Check for Nan in cluster centers");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
590
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
591 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
592 Log(&rLog, LOG_FORCED_DEBUG, "%s", "K-Means output:");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
593 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
594 Log(&rLog, LOG_FORCED_DEBUG, " Raw assignment: Seq %u (%s) to cluster %d",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
595 iSeqIndex, prMSeq->sqinfo[iSeqIndex].name,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
596 piKMeansClusterAssignments[iSeqIndex]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
597 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
598 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
599
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
600 Log(&rLog, LOG_FORCED_DEBUG, "FIXME %s", "proof of concept implementation: Pick first sequences from each clusters instead of reusing seeds");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
601 iSeedIndex = 0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
602 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
603 int iAssignedCluster = piKMeansClusterAssignments[iSeqIndex];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
604 if (pbClusterUsed[iAssignedCluster]) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
605 continue;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
606 } else {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
607 /*LOG_DEBUG("Picked seed %d from cluster %d", iSeqIndex,iAssignedCluster);*/
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
608 piSeeds[iSeedIndex++] = iSeqIndex;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
609 pbClusterUsed[iAssignedCluster] = TRUE;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
610 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
611 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
612 CKFREE(pbClusterUsed);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
613
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
614 CKFREE(pdKMeansVectors);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
615 CKFREE(pdKMeansClusterCenters);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
616 CKFREE(piKMeansClusterAssignments);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
617
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
618 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
619 /* if (1) */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
620 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
621
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
622
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
623 if (LOG_DEBUG <= rLog.iLogLevelEnabled) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
624 for (iSeedIndex=0; iSeedIndex<iNumSeeds; iSeedIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
625 Log(&rLog, LOG_DEBUG, "Picked sequence %d (%s) as seed no %d",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
626 piSeeds[iSeedIndex], prMSeq->sqinfo[piSeeds[iSeedIndex]].name, iSeedIndex);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
627 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
628 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
629
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
630 return 0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
631 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
632 /* end of SeedSelection() */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
633
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
634
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
635
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
636
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
637 /**
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
638 * @brief Bisecting K-Means clustering. Repeatedly calls K-Means with
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
639 * a K of 2 until no cluster has more than iMaxAllowedObjsPerCluster.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
640 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
641 * @param[out] prKMeansResult_p
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
642 * Result of Bisecting KMeans. Will be allocated here.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
643 * Caller has to free. See @see FreeKMeansResult()
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
644 * @param[in] iNObjs
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
645 * Number of objects/sequences to cluster
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
646 * @param[in] iDim
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
647 * Dimensionality of input data
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
648 * @param[in] ppdVectors
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
649 * each row holds iDim points for this object's coordinates
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
650 * @param[in] iMinRequiredObjsPerCluster
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
651 * Minimum number of objects per Cluster (inclusive)/
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
652 * @param[in] iMaxAllowedObjsPerCluster
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
653 * Maximum number of objects per Cluster (inclusive). Function returns once no
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
654 * cluster contains more then this number of objects. Soft limit!
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
655 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
656 * @note Convoluted code. Could use some restructuring. My apologies.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
657 * AW
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
658 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
659 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
660 void
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
661 BisectingKmeans(bisecting_kmeans_result_t **prKMeansResult_p,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
662 const int iNObjs, const int iDim, double **ppdVectors,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
663 const int iMinRequiredObjsPerCluster,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
664 const int iMaxAllowedObjsPerCluster)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
665 {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
666 int iN, iD;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
667 /* cluster centers for each cluster created at each split */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
668 double *pdKClusterCenters;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
669 /* keep track of updated object indices per newly created
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
670 * cluster */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
671 int *piCurObjToUpdate;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
672 /* number of times we called 2-means */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
673 int iNRounds;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
674 /* flag for unsuccessful cluster split */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
675 bool bNaNDetected = FALSE;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
676 /* flag for detected small cluster after split */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
677 bool bSmallClusterDetected;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
678 /* queue of clusters which are to be split */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
679 int_queue_t rClusterSplitQueue;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
680
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
681 #if TIMING
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
682 Stopwatch_t *stopwatch = StopwatchCreate();
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
683 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
684
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
685
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
686 piCurObjToUpdate = (int *) CKMALLOC(2 * sizeof(int));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
687
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
688 NewKMeansResult(prKMeansResult_p);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
689
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
690 /* new cluster centers created at each split/KMeans run
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
691 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
692 pdKClusterCenters = (double *) CKCALLOC(2 * iDim, sizeof(double));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
693
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
694 /* init results by setting a first cluster that contains all objects
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
695 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
696 (*prKMeansResult_p)->iNClusters = 1;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
697 (*prKMeansResult_p)->iDim = iDim;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
698 /* fake center coordinates of first cluster */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
699 (*prKMeansResult_p)->ppdClusterCenters =
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
700 (double **) CKMALLOC(1 * sizeof(double *));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
701 (*prKMeansResult_p)->ppdClusterCenters[0] =
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
702 (double *) CKMALLOC(iDim * sizeof(double));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
703 /* objects per cluster */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
704 (*prKMeansResult_p)->piNObjsPerCluster =
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
705 (int *) CKMALLOC(1 * sizeof(int));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
706 (*prKMeansResult_p)->piNObjsPerCluster[0] = iNObjs;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
707 /* object indices per cluster */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
708 (*prKMeansResult_p)->ppiObjIndicesPerCluster =
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
709 (int **) CKMALLOC(1 * sizeof(int *));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
710 (*prKMeansResult_p)->ppiObjIndicesPerCluster[0] = (int *)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
711 CKMALLOC(iNObjs * sizeof(int));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
712 for (iN=0; iN<iNObjs; iN++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
713 (*prKMeansResult_p)->ppiObjIndicesPerCluster[0][iN] = iN;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
714 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
715
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
716
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
717
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
718 /* Starting with the first cluster that now contains all the
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
719 * sequences keep splitting until no cluster contains more than
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
720 * iMaxAllowedObjsPerCluster
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
721 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
722 * Keep a queue of clusters (rClusterSplitQueue) to split
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
723 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
724 * At each split values/memory of the just split cluster will be
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
725 * reused and exactly one new only allocated.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
726 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
727 * Example:
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
728 * Given the following cluster assignments
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
729 * 0 0 0 0 1 1 2 2 2 3 3 3 3 3 3 3 4 4
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
730 * and a K-Means split in cluster 3 at |:
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
731 * 0 0 0 0 1 1 2 2 2 3 3 3 | 3 3 3 3 4 4
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
732 * The result should be this:
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
733 * 0 0 0 0 1 1 2 2 2 3 3 3 | 5 5 5 5 4 4
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
734 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
735 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
736 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
737 INT_QUEUE_INIT(&rClusterSplitQueue);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
738 if (iNObjs>iMaxAllowedObjsPerCluster) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
739 /* pish fake first cluster index */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
740 INT_QUEUE_PUSH(&rClusterSplitQueue, 0);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
741 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
742 iNRounds = 0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
743 while (! INT_QUEUE_EMPTY(&rClusterSplitQueue)) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
744 /* assignments for each seq to the K newly created clusters */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
745 int *piKClusterAssignments;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
746 /* number of objects in cluster that is to be split */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
747 int iNObjsInClusterToSplit;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
748 /* coordinates of points in cluster that is to be split
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
749 * array of size n*d where [d*i + j] gives coordinate j of point i
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
750 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
751 double *pdVectorsInClusterToSplit;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
752 /* copy of object indices in split cluster */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
753 int *piObjIndicesOfSplitCluster;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
754 /* best cost of kmeans rounds */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
755 double dCost = -1.0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
756
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
757 /* indices for the two created clusters
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
758 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
759 /* index of cluster to split */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
760 int iClusterToSplot;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
761 /* index of newly created cluster at each round. data for the
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
762 other created cluster goes to the one just split,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
763 i.e. (*piClusterToSplot) */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
764 int iNewClusterIdx;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
765
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
766 #if TIMING
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
767 StopwatchZero(stopwatch);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
768 StopwatchStart(stopwatch);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
769 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
770
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
771 INT_QUEUE_POP(&rClusterSplitQueue, &iClusterToSplot);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
772
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
773 iNObjsInClusterToSplit = (*prKMeansResult_p)->piNObjsPerCluster[iClusterToSplot];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
774 piKClusterAssignments = (int *)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
775 CKMALLOC(iNObjsInClusterToSplit * sizeof(int));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
776
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
777 pdVectorsInClusterToSplit = (double *)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
778 CKMALLOC(iNObjsInClusterToSplit * iDim * sizeof(double));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
779 for (iN=0; iN<iNObjsInClusterToSplit; iN++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
780 for (iD=0; iD<iDim; ++iD) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
781 int iThisObjIdx =
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
782 (*prKMeansResult_p)->ppiObjIndicesPerCluster[iClusterToSplot][iN];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
783 pdVectorsInClusterToSplit[iDim*iN + iD] = ppdVectors[iThisObjIdx][iD];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
784 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
785 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
786
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
787 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
788 Log(&rLog, LOG_FOCRED_DEBUG, "Round %d: Will split cluster %d which has %d objects",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
789 iNRounds, iClusterToSplot, iNObjsInClusterToSplit);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
790 fprintf(stderr, "DEBUG(%s|%s():%d): Object indices in cluster to split are:",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
791 __FILE__, __FUNCTION__, __LINE__);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
792 for (iN=0; iN<iNObjsInClusterToSplit; iN++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
793 fprintf(stderr, " %u",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
794 (*prKMeansResult_p)->ppiObjIndicesPerCluster[iClusterToSplot][iN]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
795 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
796 fprintf(stderr, "\n");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
797 (void) fflush(stderr);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
798 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
799
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
800 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
801 for (iN=0; iN<iNObjsInClusterToSplit; iN++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
802 fprintf(stderr,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
803 "DEBUG(%s|%s():%d): Coordinate of object %u (real index %u) in cluster to split:",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
804 __FILE__, __FUNCTION__, __LINE__, iN,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
805 (*prKMeansResult_p)->ppiObjIndicesPerCluster[iClusterToSplot][iN]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
806 for (iD=0; iD<iDim; iD++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
807 fprintf(stderr, " %f", pdVectorsInClusterToSplit[iDim*iN + iD]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
808 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
809 fprintf(stderr, "\n");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
810 (void) fflush(stderr);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
811 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
812 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
813
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
814 /* KMeans(1 "The number of points in the data set",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
815 * 2 "The number of clusters to look for",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
816 * 3 "The number of dimensions that the data set lives in",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
817 * 4 "points: An array of size n*d where points[d*i + j] gives coordinate j of point i",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
818 * 5 "attempts: The number of times to independently run k-means",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
819 * 6 "use_lloyds_method: uses kmpp if false, otherwise lloyds method",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
820 * 7 "centers: This can either be null or an array of size k*d.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
821 * In the latter case, centers[d*i + j] will give coordinate j of center i.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
822 * If the cluster is unused, it will contain NaN instead.",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
823 * 8 "assignments: This can either be null or an array of size n.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
824 * In the latter case, it will be filled with the cluster that each point is assigned to
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
825 * (an integer between 0 and k-1 inclusive).");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
826 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
827 dCost = KMeans(iNObjsInClusterToSplit, 2, iDim,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
828 pdVectorsInClusterToSplit,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
829 RESTARTS_PER_SPLIT, USE_KMEANS_LLOYDS,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
830 pdKClusterCenters, piKClusterAssignments);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
831
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
832 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
833 Log(&rLog, LOG_FORCED_DEBUG, "%s", "Raw K-Means output:");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
834 for (iN=0; iN<2; iN++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
835 fprintf(stderr, "DEBUG(%s|%s():%d): Cluster Center %u =",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
836 __FILE__, __FUNCTION__, __LINE__, iN);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
837 for (iD=0; iD<iDim; iD++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
838 fprintf(stderr, " %f", pdKClusterCenters[iN*iDim+iD]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
839 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
840 fprintf(stderr, "\n");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
841 (void) fflush(stderr);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
842 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
843 for (iN=0; iN<iNObjsInClusterToSplit; iN++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
844 Log(&rLog, LOG_FORCED_DEBUG, " Raw assignment: Seq %u to cluster %d (of #%u)",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
845 iN, piKClusterAssignments[iN], 2);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
846 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
847 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
848
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
849 /* real index of one of the newly created clusters. the other
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
850 * one is iClusterToSplot */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
851 iNewClusterIdx = (*prKMeansResult_p)->iNClusters;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
852
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
853
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
854 /* We don't want Clusters which are too small. Check here if a
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
855 * split created a small cluster and if yes, discard the
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
856 * solution. Because the cluster has already been removed from
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
857 * the queue, we can just continue.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
858 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
859 bSmallClusterDetected = FALSE;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
860 if (iMinRequiredObjsPerCluster>1) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
861 int iNObjsCluster[2];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
862 iNObjsCluster[0] = 0; /* first cluster */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
863 iNObjsCluster[1] = 0; /* second cluster */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
864 for (iN=0; iN<iNObjsInClusterToSplit; iN++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
865 iNObjsCluster[piKClusterAssignments[iN]]+=1;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
866 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
867
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
868 if (iNObjsCluster[0]<iMinRequiredObjsPerCluster
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
869 ||
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
870 iNObjsCluster[1]<iMinRequiredObjsPerCluster) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
871 bSmallClusterDetected = TRUE;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
872 Log(&rLog, LOG_FORCED_DEBUG, "Skipping this split because objs in 1st/2nd cluster = %d/%d < %d",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
873 iNObjsCluster[0], iNObjsCluster[1], iMinRequiredObjsPerCluster);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
874 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
875 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
876
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
877 /* Same logic as for small clusters applies if KMeans couldn't
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
878 * split the cluster. In this case one of its center
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
879 * coordinates will be NaN, which we check for in the
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
880 * following.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
881 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
882 if (! bSmallClusterDetected) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
883 for (iN=0; iN<2; iN++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
884 bNaNDetected = FALSE;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
885 for (iD=0; iD<iDim; iD++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
886 if (0 != isnan(pdKClusterCenters[iN*iDim+iN])) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
887 /* Got NaN as coordinate after splitting */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
888 Log(&rLog, LOG_WARN, "%s(): Can't split cluster no. %d which has %d objects any further. %s",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
889 __FUNCTION__,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
890 iClusterToSplot, iNObjsInClusterToSplit,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
891 "Hope it's not too big and doesn't slow things down."
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
892 );
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
893 bNaNDetected = TRUE;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
894 break;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
895 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
896 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
897 if (bNaNDetected) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
898 break;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
899 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
900 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
901 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
902
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
903 /* Discarding split of this cluster. It has been removed from the
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
904 * queue already. so just continue
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
905 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
906 if (bNaNDetected || bSmallClusterDetected) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
907 CKFREE(piKClusterAssignments);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
908 CKFREE(pdVectorsInClusterToSplit);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
909 continue;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
910 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
911
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
912
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
913 /* update cluster centers: pdClusterCenters
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
914 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
915 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
916 /* reuse memory of existing/old/split cluster
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
917 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
918 for (iN=0; iN<iDim; iN++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
919 double dCoord = pdKClusterCenters[0*iDim+iN];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
920 (*prKMeansResult_p)->ppdClusterCenters[iClusterToSplot][iN] = dCoord;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
921 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
922 /* realloc and set new one
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
923 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
924 (*prKMeansResult_p)->ppdClusterCenters = (double **)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
925 CKREALLOC((*prKMeansResult_p)->ppdClusterCenters,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
926 ((*prKMeansResult_p)->iNClusters+1) * sizeof(double *));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
927 (*prKMeansResult_p)->ppdClusterCenters[iNewClusterIdx] = (double *)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
928 CKMALLOC(iDim * sizeof(double));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
929 for (iD=0; iD<iDim; iD++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
930 double dCoord = pdKClusterCenters[1*iDim+iD];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
931 (*prKMeansResult_p)->ppdClusterCenters[iNewClusterIdx][iD] = dCoord;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
932 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
933 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
934 Log(&rLog, LOG_FORCED_DEBUG, "%s", "* Update of cluster centers done. Cluster centers so far:");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
935 for (iN=0; iN<(*prKMeansResult_p)->iNClusters+1; iN++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
936 fprintf(stderr, "DEBUG(%s|%s():%d): Center %u =",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
937 __FILE__, __FUNCTION__, __LINE__, iN);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
938 for (iD=0; iD<iDim; iD++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
939 fprintf(stderr, " %f", (*prKMeansResult_p)->ppdClusterCenters[iN][iD]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
940 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
941 fprintf(stderr, "\n");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
942 (void) fflush(stderr);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
943 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
944 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
945
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
946
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
947 /* update #seqs per cluster: piNObjsPerCluster
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
948 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
949 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
950 (*prKMeansResult_p)->piNObjsPerCluster = (int *)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
951 CKREALLOC((*prKMeansResult_p)->piNObjsPerCluster,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
952 ((*prKMeansResult_p)->iNClusters+1) * sizeof(int));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
953 /* init new and old one to zero */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
954 (*prKMeansResult_p)->piNObjsPerCluster[iClusterToSplot] = 0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
955 (*prKMeansResult_p)->piNObjsPerCluster[iNewClusterIdx] = 0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
956 /* now update values */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
957 for (iN=0; iN<iNObjsInClusterToSplit; iN++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
958 if (0 == piKClusterAssignments[iN]) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
959 (*prKMeansResult_p)->piNObjsPerCluster[iClusterToSplot] += 1;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
960 } else if (1 == piKClusterAssignments[iN]) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
961 (*prKMeansResult_p)->piNObjsPerCluster[iNewClusterIdx] += 1;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
962 } else {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
963 /* there used to be code for iK>=2 in r101 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
964 Log(&rLog, LOG_FATAL, "Internal error: split into more than two clusters (got assignment %d)",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
965 piKClusterAssignments[iN]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
966 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
967 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
968 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
969 Log(&rLog, LOG_FORCED_DEBUG, "%s", "* Update of NObjs per cluster done:");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
970 for (iN=0; iN<(*prKMeansResult_p)->iNClusters+1; iN++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
971 Log(&rLog, LOG_FORCED_DEBUG, "Cluster %d contains %d sequences",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
972 iN, (*prKMeansResult_p)->piNObjsPerCluster[iN]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
973 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
974 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
975 /* queue clusters if still they are still too big
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
976 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
977 if ((*prKMeansResult_p)->piNObjsPerCluster[iClusterToSplot] > iMaxAllowedObjsPerCluster) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
978 INT_QUEUE_PUSH(&rClusterSplitQueue, iClusterToSplot);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
979 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
980 if ((*prKMeansResult_p)->piNObjsPerCluster[iNewClusterIdx] > iMaxAllowedObjsPerCluster) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
981 INT_QUEUE_PUSH(&rClusterSplitQueue, iNewClusterIdx);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
982 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
983
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
984
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
985
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
986 /* update which objects belong to which cluster:
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
987 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
988 * note: piNObjsPerCluster needs to be already updated
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
989 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
990 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
991 /* create a copy of the object indices in the split cluster
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
992 * (original will be overwritten)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
993 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
994 piObjIndicesOfSplitCluster = (int *) CKMALLOC(iNObjsInClusterToSplit * sizeof(int));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
995 memcpy(piObjIndicesOfSplitCluster,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
996 (*prKMeansResult_p)->ppiObjIndicesPerCluster[iClusterToSplot],
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
997 iNObjsInClusterToSplit * sizeof(int));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
998
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
999 (*prKMeansResult_p)->ppiObjIndicesPerCluster = (int **)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1000 CKREALLOC((*prKMeansResult_p)->ppiObjIndicesPerCluster,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1001 ((*prKMeansResult_p)->iNClusters+1) * sizeof(int *));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1002
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1003
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1004 (*prKMeansResult_p)->ppiObjIndicesPerCluster[iClusterToSplot] =
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1005 (int *) CKREALLOC((*prKMeansResult_p)->ppiObjIndicesPerCluster[iClusterToSplot],
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1006 (*prKMeansResult_p)->piNObjsPerCluster[iClusterToSplot] * sizeof(int));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1007
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1008 (*prKMeansResult_p)->ppiObjIndicesPerCluster[iNewClusterIdx] =
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1009 (int *) CKMALLOC((*prKMeansResult_p)->piNObjsPerCluster[iNewClusterIdx] * sizeof(int));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1010
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1011
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1012 /* now reassign the object indices to the assigned cluster
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1013 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1014 piCurObjToUpdate[0] = 0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1015 piCurObjToUpdate[1] = 0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1016 for (iN=0; iN<iNObjsInClusterToSplit; iN++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1017 int iThisObjIdx = piObjIndicesOfSplitCluster[iN];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1018 int iThisClusterAssignment = piKClusterAssignments[iN];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1019 int iThisOffset = piCurObjToUpdate[iThisClusterAssignment];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1020 int iThisClusterIndex = 0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1021
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1022 if (0 == iThisClusterAssignment) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1023 iThisClusterIndex = iClusterToSplot;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1024 } else if (1 == iThisClusterAssignment) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1025 iThisClusterIndex = iNewClusterIdx;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1026 } else {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1027 /* there used to be code for iK>=2 in r101 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1028 Log(&rLog, LOG_FATAL, "Internal error: split into more than two clusters (got assignment %d)",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1029 piKClusterAssignments[iN]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1030 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1031 #if 0
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1032 Log(&rLog, LOG_FORCED_DEBUG, "Setting (*prKMeansResult_p)->ppiObjIndicesPerCluster[%d][%d] = %d",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1033 iThisClusterIndex, iThisOffset, iThisObjIdx);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1034 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1035 (*prKMeansResult_p)->ppiObjIndicesPerCluster[iThisClusterIndex][iThisOffset] = iThisObjIdx;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1036 piCurObjToUpdate[iThisClusterAssignment]+=1;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1037 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1038 CKFREE(piObjIndicesOfSplitCluster);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1039 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1040 for (iN=0; iN<(*prKMeansResult_p)->iNClusters+1; iN++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1041 int iObj;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1042 fprintf(stderr, "DEBUG(%s|%s():%d): Objects in cluster %u: ",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1043 __FILE__, __FUNCTION__, __LINE__, iN);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1044 for (iObj=0; iObj<(*prKMeansResult_p)->piNObjsPerCluster[iN]; iObj++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1045 fprintf(stderr, " %u", (*prKMeansResult_p)->ppiObjIndicesPerCluster[iN][iObj]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1046 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1047 fprintf(stderr, "\n");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1048 (void) fflush(stderr);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1049 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1050 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1051
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1052
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1053 #if TIMING
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1054 StopwatchStop(stopwatch);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1055 StopwatchDisplay(stdout, "Total time after next round in Bisecting-KMeans: ", stopwatch);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1056 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1057
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1058
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1059 /* finally: increase number of clusters
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1060 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1061 (*prKMeansResult_p)->iNClusters += 1;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1062 iNRounds += 1;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1063 CKFREE(piKClusterAssignments);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1064 CKFREE(pdVectorsInClusterToSplit);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1065
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1066 } /* while */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1067 INT_QUEUE_DESTROY(&rClusterSplitQueue);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1068
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1069
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1070 Log(&rLog, LOG_DEBUG,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1071 "Bisecting K-means finished after %d rounds (no more clusters to split)",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1072 iNRounds);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1073
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1074 #if TIMING
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1075 StopwatchFree(stopwatch);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1076 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1077
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1078 /* @note could use progress/timer */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1079
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1080 CKFREE(pdKClusterCenters);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1081 CKFREE(piCurObjToUpdate);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1082
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1083 return;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1084 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1085 /*** end: BisectingKmeans() ***/
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1086
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1087
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1088
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1089 /**
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1090 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1091 * @brief From scratch reimplementation of mBed: Blackshields et al.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1092 * (2010); PMID 20470396.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1093 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1094 * Idea is a follows:
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1095 * - convert sequences into vectors of distances
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1096 * - cluster the vectors using k-means
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1097 * - cluster each of the k clusters using upgma (used cached distances
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1098 * from above?)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1099 * - join the sub-clusters to create on tree (use UPGMA on k-means
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1100 * medoids)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1101 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1102 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1103 * @param[out] prMbedTree_p
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1104 * Created upgma tree. will be allocated here. use FreeMuscleTree()
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1105 * to free
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1106 * @param[in] prMSeq
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1107 * Multiple sequences
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1108 * @param[in] iPairDistType
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1109 * Distance measure for pairwise alignments
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1110 * @param[in] pcGuidetreeOut
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1111 * Passed down to GuideTreeUpgma()
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1112 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1113 * @return Zero on success, non-zero on error
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1114 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1115 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1116 int
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1117 Mbed(tree_t **prMbedTree_p, mseq_t *prMSeq, const int iPairDistType,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1118 const char *pcGuidetreeOut)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1119 {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1120 /* number of seeds */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1121 int iNumSeeds;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1122 /* seed indices matching prMSeq */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1123 int *piSeeds;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1124 /* seqs represented as distance vectors */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1125 double **ppdSeqVec;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1126 /* kmeans result */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1127 bisecting_kmeans_result_t *prKMeansResult = NULL;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1128 /* distance matrix of kmeans (pre-)cluster centers */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1129 symmatrix_t *prPreClusterDistmat = NULL;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1130 /* auxiliary for symmetric matrix output; tree routines etc */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1131 char **ppcLabels;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1132 int iNodeIndex;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1133 /* mapping of cluster-center tree node indices to corresponding
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1134 cluster */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1135 int *piClusterToTreeNode;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1136 int iClusterIndex;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1137 int iI, uJ;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1138 FILE *pfOut;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1139 progress_t *prSubClusterDistanceProgress;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1140 bool bPrintCR = (LOG_VERBOSE<=rLog.iLogLevelEnabled) ? FALSE : TRUE;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1141
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1142 #if FULL_WITHIN_CLUSTER_DISTANCES
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1143 Log(&rLog, LOG_WARN,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1144 "FULL_WITHIN_CLUSTER_DISTANCES (using min %d / max %d seqs per subcluster) is a quick-hack. Brace yourself...",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1145 MIN_REQUIRED_SEQ_PER_PRECLUSTER, MAX_ALLOWED_SEQ_PER_PRECLUSTER);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1146 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1147
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1148 #if MBED_TIMING
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1149 Stopwatch_t *stopwatch = StopwatchCreate();
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1150 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1151
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1152 assert(NULL != prMbedTree_p);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1153 assert(NULL != prMSeq);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1154
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1155
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1156 iNumSeeds = (int) NUMBER_OF_SEEDS(prMSeq->nseqs);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1157 if (iNumSeeds >= prMSeq->nseqs) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1158 /* -1 is condition for RandomUniqueIntArray */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1159 iNumSeeds = prMSeq->nseqs-1;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1160 Log(&rLog, LOG_DEBUG,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1161 "Automatically determined number of seeds is bigger (or equal) the number of sequences. Will set it to %d",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1162 iNumSeeds);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1163 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1164
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1165
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1166 /* Turn sequences into vectors of distances to the seeds
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1167 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1168 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1169 piSeeds = (int *) CKMALLOC(iNumSeeds * sizeof(int));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1170 if (0 != SeedSelection(piSeeds, iNumSeeds, SEED_SELECTION, prMSeq)) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1171 Log(&rLog, LOG_ERROR, "Something went wrong during seed selection for mbed");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1172 return -1;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1173 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1174 ppdSeqVec = (double **) CKMALLOC(prMSeq->nseqs * sizeof(double *));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1175 for (iI=0; iI<prMSeq->nseqs; iI++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1176 ppdSeqVec[iI] = (double *) CKMALLOC(iNumSeeds * sizeof(double));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1177 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1178 if (0 != SeqToVec(ppdSeqVec, prMSeq, piSeeds, iNumSeeds, iPairDistType)) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1179 Log(&rLog, LOG_ERROR, "Could not convert sequences into vectors for mbed");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1180 return -1;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1181 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1182 CKFREE(piSeeds);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1183
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1184
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1185 /* Calculate (pre-)clusters of sequence vectors by applying
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1186 * bisecting kmeans
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1187 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1188 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1189 #if MBED_TIMING
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1190 /* start clock only here, to make sure we don't include pairwise
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1191 * distance computation */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1192 StopwatchZero(stopwatch);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1193 StopwatchStart(stopwatch);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1194 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1195
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1196 BisectingKmeans(&prKMeansResult, prMSeq->nseqs, iNumSeeds, ppdSeqVec,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1197 MIN_REQUIRED_SEQ_PER_PRECLUSTER,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1198 MAX_ALLOWED_SEQ_PER_PRECLUSTER);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1199 Log(&rLog, LOG_INFO,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1200 "mBed created %u cluster/s (with a minimum of %d and a soft maximum of %d sequences each)",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1201 prKMeansResult->iNClusters,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1202 MIN_REQUIRED_SEQ_PER_PRECLUSTER,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1203 MAX_ALLOWED_SEQ_PER_PRECLUSTER);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1204
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1205
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1206 #if PRINT_CLUSTER_DISTRIBUTION
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1207 Log(&rLog, LOG_FORCED_DEBUG, "Bisecting Kmeans returned %d clusters", prKMeansResult->iNClusters);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1208 for (iI=0; iI<prKMeansResult->iNClusters; iI++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1209 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1210 int iD;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1211 Log(&rLog, LOG_FORCED_DEBUG, "Diagnostic output for cluster %d follows:", iI);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1212 fprintf(stderr, "DEBUG(%s|%s():%d): center coordinates =",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1213 __FILE__, __FUNCTION__, __LINE__);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1214 for (iD=0; iD<iNumSeeds; iD++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1215 fprintf(stderr, " %f", prKMeansResult->ppdClusterCenters[iI][iD]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1216 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1217 fprintf(stderr, "\n");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1218 fflush(stderr);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1219 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1220 Log(&rLog, LOG_FORCED_DEBUG, "Cluster %d has %d objects assigned",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1221 iI, prKMeansResult->piNObjsPerCluster[iI]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1222 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1223 for (uJ=0; uJ<prKMeansResult->piNObjsPerCluster[iI]; uJ++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1224 int iRealIndex = prKMeansResult->ppiObjIndicesPerCluster[iI][uJ];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1225
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1226 Log(&rLog, LOG_FORCED_DEBUG, "Cluster %u: object %u has index %u (= seq %s)",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1227 iI, uJ, iRealIndex, prMSeq->sqinfo[iRealIndex].name);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1228 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1229 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1230 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1231 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1232
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1233
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1234 /* Cluster pre-clusters produced by k-means.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1235 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1236 * Do this by calculating the vector distances of the cluster
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1237 * centers and applying UPGMA.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1238 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1239 * @note could try to force-balance the tree here
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1240 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1241 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1242 if (0 != NewSymMatrix(&prPreClusterDistmat,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1243 prKMeansResult->iNClusters, prKMeansResult->iNClusters)) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1244 Log(&rLog, LOG_FATAL, "%s", "Memory allocation for pre-cluster distance-matrix failed");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1245 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1246 for (iI=0; iI<prKMeansResult->iNClusters; iI++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1247 for (uJ=iI+1; uJ<prKMeansResult->iNClusters; uJ++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1248 double dDist;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1249 dDist = EuclDist(prKMeansResult->ppdClusterCenters[iI],
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1250 prKMeansResult->ppdClusterCenters[uJ],
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1251 iNumSeeds);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1252 SymMatrixSetValue(prPreClusterDistmat, iI, uJ, dDist);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1253 /* Log(&rLog, LOG_FORCED_DEBUG, "Euclidean distance between clusters %d and %d = %f",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1254 iI, uJ, dDist); */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1255 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1256 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1257
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1258
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1259 /* labels needed for the guide tree building routine only */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1260 ppcLabels = (char **) CKMALLOC(prKMeansResult->iNClusters * sizeof(char*));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1261 for (iI=0; iI<prKMeansResult->iNClusters; iI++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1262 ppcLabels[iI] = (char *) CKMALLOC(32 * sizeof(char));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1263 (void) snprintf(ppcLabels[iI], 32, "Subcluster-%u", iI);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1264 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1265 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1266 Log(&rLog, LOG_FORCED_DEBUG, "%s", "Distance matrix for pre-cluster centers:");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1267 SymMatrixPrint(prPreClusterDistmat, ppcLabels, NULL);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1268 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1269
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1270 GuideTreeUpgma(prMbedTree_p,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1271 ppcLabels, prPreClusterDistmat, NULL);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1272
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1273 for (iI=0; iI<prKMeansResult->iNClusters; iI++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1274 CKFREE(ppcLabels[iI]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1275 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1276 CKFREE(ppcLabels);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1277 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1278 Log(&rLog, LOG_FORCED_DEBUG, "%s", "Cluster-center guide-tree:");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1279 LogTree(*prMbedTree_p);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1280 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1281
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1282
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1283
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1284 /* Now replace each leaf in the pre-cluster-center tree
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1285 * appropriately, i.e. with the corresponding sub-cluster.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1286 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1287 * For each leaf/sub-cluster, create a distance matrix for the
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1288 * corresponding sequences. Use distances between vectors as
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1289 * approximated distances between sequences. Then create a
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1290 * guide-tree by applying UPGMA.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1291 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1292 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1293
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1294 /* Get a mapping of (pre)cluster number and leaf-node indices in
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1295 * the cluster-center tree. We can add trees to prMbedTree_p
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1296 * because AppendTrees() guarantees that no other than the node to
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1297 * append to changes.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1298 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1299 piClusterToTreeNode = (int*)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1300 CKMALLOC(prKMeansResult->iNClusters * sizeof(int));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1301 iNodeIndex = FirstDepthFirstNode(*prMbedTree_p);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1302 do {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1303 if (IsLeaf(iNodeIndex, *prMbedTree_p)) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1304 int iLeafId = GetLeafId(iNodeIndex, *prMbedTree_p);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1305 piClusterToTreeNode[iLeafId] = iNodeIndex;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1306 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1307 iNodeIndex = NextDepthFirstNode(iNodeIndex, *prMbedTree_p);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1308 } while (NULL_NEIGHBOR != iNodeIndex);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1309
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1310
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1311
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1312
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1313 /* Now step through all the leafs and replace them with the
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1314 * corresponding sub-trees
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1315 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1316 NewProgress(&prSubClusterDistanceProgress, LogGetFP(&rLog, LOG_INFO),
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1317 "Distance calculation within sub-clusters", bPrintCR);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1318 /* for each cluster */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1319 for (iClusterIndex=0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1320 iClusterIndex < prKMeansResult->iNClusters; iClusterIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1321 /* distance matrix for the sub-cluster */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1322 symmatrix_t *prWithinClusterDistances = NULL;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1323 int iNSeqInCluster;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1324 tree_t *prSubClusterTree = NULL;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1325
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1326 ProgressLog(prSubClusterDistanceProgress,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1327 iClusterIndex, prKMeansResult->iNClusters, FALSE);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1328
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1329 #if FULL_WITHIN_CLUSTER_DISTANCES
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1330 mseq_t *prSubClusterMSeq;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1331 int iPairDistType;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1332 int iSeqIndex;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1333
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1334 Log(&rLog, LOG_DEBUG,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1335 "%s\n", "Calling new Mbed use makes only sense if nseq>MAX_ALLOWED_SEQ_PER_PRECLUSTER");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1336
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1337 if (TRUE == prMSeq->aligned) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1338 iPairDistType = PAIRDIST_SQUIDID_KIMURA;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1339 } else {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1340 iPairDistType = PAIRDIST_KTUPLE;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1341 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1342 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1343
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1344 iNSeqInCluster = prKMeansResult->piNObjsPerCluster[iClusterIndex];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1345 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1346 Log(&rLog, LOG_FORCED_DEBUG, "#seqs in subcluster no %d = %d",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1347 iClusterIndex, iNSeqInCluster);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1348 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1349
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1350 #if FULL_WITHIN_CLUSTER_DISTANCES
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1351 /* create an mseq structure for sequences in this cluster
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1352 * don't need most of the members (e.g. orig_seq) but copy
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1353 * them anyway for the sake of completeness
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1354 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1355 NewMSeq(&prSubClusterMSeq);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1356
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1357 prSubClusterMSeq->nseqs = iNSeqInCluster;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1358 prSubClusterMSeq->seqtype = prMSeq->seqtype;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1359 if (NULL!=prMSeq->filename) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1360 prSubClusterMSeq->filename = CkStrdup(prMSeq->filename);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1361 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1362 prSubClusterMSeq->seq = (char **)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1363 CKMALLOC(prSubClusterMSeq->nseqs * sizeof(char *));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1364 prSubClusterMSeq->orig_seq = (char **)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1365 CKMALLOC(prSubClusterMSeq->nseqs * sizeof(char *));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1366 prSubClusterMSeq->sqinfo = (SQINFO *)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1367 CKMALLOC(prSubClusterMSeq->nseqs * sizeof(SQINFO));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1368
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1369 for (iSeqIndex=0; iSeqIndex<iNSeqInCluster; iSeqIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1370 int iRealSeqIndex = prKMeansResult->ppiObjIndicesPerCluster[iClusterIndex][iSeqIndex];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1371 prSubClusterMSeq->seq[iSeqIndex] = CkStrdup(prMSeq->seq[iRealSeqIndex]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1372 prSubClusterMSeq->orig_seq[iSeqIndex] = CkStrdup(prMSeq->orig_seq[iRealSeqIndex]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1373 SeqinfoCopy(&prSubClusterMSeq->sqinfo[iSeqIndex], &prMSeq->sqinfo[iRealSeqIndex]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1374 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1375 Log(&rLog, LOG_DEBUG, "seq no %d in cluster %d is %s (real index = %d)",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1376 iSeqIndex, iClusterIndex, prSubClusterMSeq->sqinfo[iSeqIndex].name,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1377 iRealSeqIndex);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1378 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1379 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1380 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1381
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1382
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1383 /* Create a distance matrix for this sub-cluster
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1384 * (prWithinClusterDistances) by using the vector distances or
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1385 * ktuple distances.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1386 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1387 * Then apply UPGMA to get a subcluster tree
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1388 * (prSubClusterTree) and append created tree to the
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1389 * pre-cluster-tree (prMbedTree_p)
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1390 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1391 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1392 #if FULL_WITHIN_CLUSTER_DISTANCES
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1393
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1394 /* compute distances, but be quiet */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1395 if (PairDistances(&prWithinClusterDistances, prSubClusterMSeq, iPairDistType,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1396 0, prSubClusterMSeq->nseqs, 0, prSubClusterMSeq->nseqs,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1397 NULL, NULL)) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1398 Log(&rLog, LOG_ERROR, "Couldn't compute pair distances");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1399 return -1;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1400 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1401 #else
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1402 if (NewSymMatrix(&prWithinClusterDistances, iNSeqInCluster, iNSeqInCluster)!=0) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1403 Log(&rLog, LOG_FATAL, "%s", "Memory allocation for disparity matrix failed");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1404 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1405 for (iI=0; iI<iNSeqInCluster; iI++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1406 int iRealIndexI = prKMeansResult->ppiObjIndicesPerCluster[iClusterIndex][iI];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1407
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1408 for (uJ=iI+1; uJ<iNSeqInCluster; uJ++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1409 int iRealIndexJ = prKMeansResult->ppiObjIndicesPerCluster[iClusterIndex][uJ];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1410 double dist;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1411
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1412 /* Log(&rLog, LOG_FORCED_DEBUG, "Cluster %d: compute distance between %d:%s and %d:%s",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1413 iClusterIndex, i, prMSeq->sqinfo[iRealIndexI].name,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1414 uJ, prMSeq->sqinfo[iRealIndexJ].name); */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1415
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1416 if (1 == USE_EUCLIDEAN_DISTANCE) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1417 dist = EuclDist(ppdSeqVec[iRealIndexI],
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1418 ppdSeqVec[iRealIndexJ], iNumSeeds);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1419 } else {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1420 dist = CosDist(ppdSeqVec[iRealIndexI],
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1421 ppdSeqVec[iRealIndexJ], iNumSeeds);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1422 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1423 SymMatrixSetValue(prWithinClusterDistances, iI, uJ, dist);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1424 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1425 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1426 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1427
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1428 /* labels needed for the guide tree building routine only */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1429 ppcLabels = (char**) CKMALLOC(iNSeqInCluster * sizeof(char*));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1430 for (iI=0; iI<iNSeqInCluster; iI++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1431 #if FULL_WITHIN_CLUSTER_DISTANCES
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1432 ppcLabels[iI] = prSubClusterMSeq->sqinfo[iI].name;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1433 #else
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1434 int iRealIndex = prKMeansResult->ppiObjIndicesPerCluster[iClusterIndex][iI];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1435 ppcLabels[iI] = prMSeq->sqinfo[iRealIndex].name;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1436 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1437 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1438 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1439 Log(&rLog, LOG_FORCED_DEBUG, "Distance matrix for seqs within sub cluster %d/%d",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1440 iClusterIndex, prKMeansResult->iNClusters);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1441 SymMatrixPrint(prWithinClusterDistances, ppcLabels, NULL);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1442 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1443
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1444
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1445 GuideTreeUpgma(&prSubClusterTree, ppcLabels,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1446 prWithinClusterDistances, NULL);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1447
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1448 CKFREE(ppcLabels); /* don't free members, they just point */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1449 #if 0
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1450 Log(&rLog, LOG_FORCED_DEBUG, "Cluster %d guide-tree:", iClusterIndex);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1451 LogTree(prSubClusterTree);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1452 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1453
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1454
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1455 /* The guide tree id's (that point to the sequences) now start
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1456 * from 0, i.e. the association with the prMSeq numbering is
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1457 * broken and fixed in the following
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1458 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1459 for (iNodeIndex = 0; iNodeIndex < (int)GetNodeCount(prSubClusterTree); iNodeIndex++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1460 if (IsLeaf(iNodeIndex, prSubClusterTree)) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1461 int iLeafId = GetLeafId(iNodeIndex, prSubClusterTree);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1462 int iRealId = prKMeansResult->ppiObjIndicesPerCluster[iClusterIndex][iLeafId];
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1463 #if 0
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1464 Log(&rLog, LOG_FORCED_DEBUG, "Correcting leaf node %d which has (wrong) id %d and name %s to id %d (prMSeq name %s)",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1465 iNodeIndex, iLeafId,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1466 GetLeafName(iNodeIndex, prSubClusterTree),
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1467 iRealId, prMSeq->sqinfo[iRealId].name);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1468 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1469 SetLeafId(prSubClusterTree, iNodeIndex, iRealId);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1470 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1471 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1472
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1473
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1474 /* Append the newly created tree (prSubClusterTree) to the
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1475 * corresponding node index of prMbedTree_p.
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1476 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1477 #if TRACE
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1478 Log(&rLog, LOG_FORCED_DEBUG, "Will join trees at leaf node %d = %s",
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1479 piClusterToTreeNode[iClusterIndex],
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1480 GetLeafName(piClusterToTreeNode[iClusterIndex], *prMbedTree_p));
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1481 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1482
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1483 AppendTree(*prMbedTree_p,
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1484 piClusterToTreeNode[iClusterIndex],
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1485 prSubClusterTree);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1486 /* Note: piClusterToTreeNode is still valid, because
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1487 * AppendTrees() guarantees that no other than the node to
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1488 * append to changes. */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1489
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1490 #if 0
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1491 Log(&rLog, LOG_FORCED_DEBUG, "%s", "prMbedTree_p after cluster %d has appended:", iClusterIndex);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1492 LogTree(*prMbedTree_p);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1493
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1494 if (0) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1495 char fname[] = "mbed-joined-tree.dnd";
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1496 FILE *pfOut;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1497 if (NULL == (pfOut = fopen(fname, "w"))) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1498 Log(&rLog, LOG_FATAL, "Couldn't open %s for writing", fname);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1499 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1500 MuscleTreeToFile(pfOut, *prMbedTree_p);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1501 Log(&rLog, LOG_FORCED_DEBUG, "Joined tree written to %s", fname);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1502 fclose(pfOut);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1503 Log(&rLog, LOG_FATAL, "DEBUG EXIT");
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1504 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1505 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1506
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1507 /* cleanup
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1508 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1509 FreeMuscleTree(prSubClusterTree);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1510 FreeSymMatrix(&prWithinClusterDistances);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1511 #if FULL_WITHIN_CLUSTER_DISTANCES
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1512 FreeMSeq(&prSubClusterMSeq);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1513 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1514 } /* end for each cluster */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1515 ProgressDone(prSubClusterDistanceProgress);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1516 FreeProgress(&prSubClusterDistanceProgress);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1517
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1518
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1519 if (NULL != pcGuidetreeOut) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1520 if (NULL == (pfOut = fopen(pcGuidetreeOut, "w"))) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1521 Log(&rLog, LOG_ERROR, "Couldn't open %s for writing", pcGuidetreeOut);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1522 } else {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1523 MuscleTreeToFile(pfOut, *prMbedTree_p);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1524 Log(&rLog, LOG_INFO, "Guide tree written to %s", pcGuidetreeOut);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1525 (void) fclose(pfOut);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1526 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1527 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1528
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1529
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1530 /* cleanup
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1531 *
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1532 */
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1533 #if MBED_TIMING
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1534 StopwatchStop(stopwatch);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1535 StopwatchDisplay(stdout, "mBed time (without pairwise distance computation): ", stopwatch);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1536 StopwatchFree(stopwatch);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1537 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1538
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1539 FreeKMeansResult(&prKMeansResult);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1540 FreeSymMatrix(&prPreClusterDistmat);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1541 for (iI=0; iI<prMSeq->nseqs; iI++) {
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1542 CKFREE(ppdSeqVec[iI]);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1543 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1544 CKFREE(ppdSeqVec);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1545 CKFREE(piClusterToTreeNode);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1546
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1547 #ifndef NDEBUG
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1548 TreeValidate(*prMbedTree_p);
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1549 #endif
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1550
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1551 return 0;
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1552 }
ff1768533a07 Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff changeset
1553 /*** end: Mbed() ***/