Mercurial > repos > clustalomega > clustalomega
annotate clustalomega/clustal-omega-0.2.0/src/clustal/mbed.c @ 0:ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
author | clustalomega |
---|---|
date | Tue, 07 Jun 2011 17:04:25 -0400 |
parents | |
children |
rev | line source |
---|---|
0
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1 /* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
2 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
3 /********************************************************************* |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
4 * Clustal Omega - Multiple sequence alignment |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
5 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
6 * Copyright (C) 2010 University College Dublin |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
7 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
8 * Clustal-Omega is free software; you can redistribute it and/or |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
9 * modify it under the terms of the GNU General Public License as |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
10 * published by the Free Software Foundation; either version 2 of the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
11 * License, or (at your option) any later version. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
12 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
13 * This file is part of Clustal-Omega. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
14 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
15 ********************************************************************/ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
16 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
17 /* |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
18 * RCS $Id: mbed.c 242 2011-05-27 14:04:21Z andreas $ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
19 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
20 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
21 * Reimplementation from scratch of mBed: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
22 * Blackshields et al. (2010); PMID 20470396 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
23 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
24 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
25 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
26 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
27 #ifdef HAVE_CONFIG_H |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
28 #include "config.h" |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
29 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
30 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
31 #include <assert.h> |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
32 #include <float.h> |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
33 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
34 #include "mbed.h" |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
35 #include "pair_dist.h" |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
36 #include "symmatrix.h" |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
37 #include "ktuple_pair.h" |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
38 #include "tree.h" |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
39 #include "util.h" |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
40 #include "progress.h" |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
41 #include "queue.h" |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
42 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
43 #include "log.h" |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
44 #include "kmpp/KMeans.h" |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
45 #include "mbed.h" |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
46 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
47 #define TIMING 0 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
48 #if TIMING |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
49 #include "squid/stopwatch.h" |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
50 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
51 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
52 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
53 /* If FULL_WITHIN_CLUSTER_DISTANCES is not 0, distances within each |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
54 * bisecting kmeans subcluster are not estimated using the vectors, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
55 * but calculated normally (using ktuple or kimura). Surprisingly this |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
56 * results in 3% loss on a Homfam p24-h2010-08-09 subset (100-5000 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
57 * seqs in test, at least 5 ref seqs; MAX_SEQ 100 vs 10000; NUM_SEEDS |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
58 * log2 instead of log2^2). And of course it slows things down. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
59 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
60 #define FULL_WITHIN_CLUSTER_DISTANCES 1 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
61 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
62 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
63 /* Cluster size limits. Maximum is a soft limit, which might be |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
64 * exceeded if a K-Means split was unsuccesful, where unsuccesful |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
65 * might also mean that the minimum required number seqs. was not |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
66 * reached */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
67 #if FULL_WITHIN_CLUSTER_DISTANCES |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
68 static const int MAX_ALLOWED_SEQ_PER_PRECLUSTER = 100; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
69 static const int MIN_REQUIRED_SEQ_PER_PRECLUSTER = 1; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
70 #else |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
71 static const int MAX_ALLOWED_SEQ_PER_PRECLUSTER = 10000; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
72 static const int MIN_REQUIRED_SEQ_PER_PRECLUSTER = 100; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
73 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
74 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
75 /* How many restarts per bisecting kmeans split. 10 give 0.5% increase |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
76 * in quality on original HOMFAM over just 1. It also increases kmeans |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
77 * time by a factor of ~3, but overall time is insignificant |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
78 * compared to pairdist/progressive alignment part. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
79 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
80 static const int RESTARTS_PER_SPLIT = 10; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
81 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
82 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
83 /* Use standard kmeans (lloyds) or kmeans++. Both are almost |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
84 * indistinguishable here, although kmeans++ is slightly ahead the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
85 * fewer seeds you pick (and it should be better in theory) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
86 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
87 #define USE_KMEANS_LLOYDS 0 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
88 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
89 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
90 #ifndef HAVE_LOG2 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
91 #define log2(x) (log(x) / 0.69314718055994530942) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
92 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
93 #define NUMBER_OF_SEEDS(n) pow(log2(((double)n)), 2) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
94 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
95 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
96 /* Seed selection method: SAMPLE_SEEDS_BY_LENGTH is the original mBed |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
97 * approach: Sample iSeeds sequence with constant stride from length-sorted X. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
98 * It might be better to pick the seeds randomly, because length sorting might |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
99 * be more prone to including outliers (e.g. very long and very short seqs). |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
100 * However, we could never observer such a thing. So just stick to the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
101 * original version as this also removes the random element. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
102 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
103 enum SEED_SELECTION_TYPE { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
104 SELECT_SEEDS_RANDOMLY, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
105 SELECT_SEEDS_BY_LENGTH |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
106 }; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
107 #define SEED_SELECTION SELECT_SEEDS_BY_LENGTH |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
108 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
109 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
110 /* Tests on BAliBase (RV11,12,20,30,40,50; 10 runs each) show there is |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
111 * no difference between mbed-trees created from cosine or euclidean |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
112 * distances (simple version, just using disparities). |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
113 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
114 #define USE_EUCLIDEAN_DISTANCE 1 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
115 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
116 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
117 /* print some mbed pre-cluster usage to screen */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
118 #define PRINT_CLUSTER_DISTRIBUTION 0 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
119 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
120 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
121 #define TRACE 0 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
122 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
123 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
124 typedef struct { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
125 /* Number of final clusters |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
126 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
127 int iNClusters; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
128 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
129 /* Coordinates (columns) for each cluster (rows) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
130 * valid indices: [0...iNClusters][0...dim-1] |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
131 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
132 double **ppdClusterCenters; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
133 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
134 /* Dimensionality of input data and cluster center coordinates |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
135 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
136 int iDim; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
137 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
138 /* Number of objects per cluster |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
139 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
140 int *piNObjsPerCluster; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
141 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
142 /* Objects (indices) for each cluster. [i][j] will point to (index |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
143 * of) object j in cluster i |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
144 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
145 int **ppiObjIndicesPerCluster; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
146 } bisecting_kmeans_result_t; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
147 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
148 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
149 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
150 /** |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
151 * @brief Free KMeans result structure. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
152 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
153 * @param[out] prResult_p |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
154 * K-Means result to free |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
155 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
156 * @see NewKMeansResult() |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
157 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
158 void |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
159 FreeKMeansResult(bisecting_kmeans_result_t **prResult_p) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
160 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
161 int iAux; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
162 CKFREE((*prResult_p)->piNObjsPerCluster); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
163 for (iAux=0; iAux<(*prResult_p)->iNClusters; iAux++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
164 CKFREE((*prResult_p)->ppiObjIndicesPerCluster[iAux]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
165 CKFREE((*prResult_p)->ppdClusterCenters[iAux]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
166 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
167 CKFREE((*prResult_p)->ppiObjIndicesPerCluster); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
168 CKFREE((*prResult_p)->ppdClusterCenters); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
169 (*prResult_p)->iNClusters = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
170 (*prResult_p)->iDim = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
171 CKFREE(*prResult_p); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
172 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
173 /*** end: FreeKMeansResult() ***/ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
174 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
175 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
176 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
177 /** |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
178 * @brief Allocate new KMeans result |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
179 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
180 * @param[out] prKMeansResult_p |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
181 * K-Means result to free |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
182 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
183 * @see NewKMeansResult() |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
184 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
185 void |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
186 NewKMeansResult(bisecting_kmeans_result_t **prKMeansResult_p) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
187 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
188 (*prKMeansResult_p) = (bisecting_kmeans_result_t *) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
189 CKCALLOC(1, sizeof(bisecting_kmeans_result_t)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
190 (*prKMeansResult_p)->iNClusters = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
191 (*prKMeansResult_p)->iDim = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
192 (*prKMeansResult_p)->ppdClusterCenters = NULL; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
193 (*prKMeansResult_p)->piNObjsPerCluster = NULL; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
194 (*prKMeansResult_p)->ppiObjIndicesPerCluster = NULL; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
195 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
196 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
197 /*** end: NewKMeansResult() ***/ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
198 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
199 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
200 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
201 /** |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
202 * @brief Calculate the euclidean distance between two vectors |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
203 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
204 * @param[in] v1 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
205 * First vector with dim dimensions |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
206 * @param[in] v2 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
207 * Second vector with dim dimensions |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
208 * @param[in] dim |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
209 * Dimensionality of v1 and v2 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
210 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
211 * @return euclidean distance as double |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
212 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
213 * @note Could probably be inlined. Also perfect for SSE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
214 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
215 double |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
216 EuclDist(const double *v1, const double *v2, const int dim) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
217 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
218 int i; /* aux */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
219 double dist; /* returned distance */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
220 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
221 assert(v1!=NULL); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
222 assert(v2!=NULL); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
223 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
224 dist = 0.0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
225 for (i=0; i<dim; i++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
226 dist += pow(v1[i]-v2[i], 2.0); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
227 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
228 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
229 return sqrt(dist); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
230 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
231 /*** end: EuclDist() ***/ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
232 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
233 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
234 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
235 /** |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
236 * @brief Calculate the cosine distance between two vectors |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
237 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
238 * @param[in] v1 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
239 * First vector with dim dimensions |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
240 * @param[in] v2 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
241 * Second vector with dim dimensions |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
242 * @param[in] dim |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
243 * Dimensionality of v1 and v2 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
244 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
245 * @return cosine distance as double |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
246 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
247 * @note Could probably be inlined. Also perfect for SSE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
248 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
249 double |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
250 CosDist(const double *v1, const double *v2, const int dim) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
251 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
252 int i; /* aux */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
253 double dist; /* returned distance */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
254 double s, sq1, sq2; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
255 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
256 assert(v1!=NULL); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
257 assert(v2!=NULL); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
258 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
259 s = 0.0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
260 sq1 = 0.0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
261 sq2 = 0.0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
262 for (i=0; i<dim; i++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
263 s += (v1[i]*v2[i]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
264 sq1 += pow(v1[i], 2.0); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
265 sq2 += pow(v2[i], 2.0); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
266 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
267 sq1 = sqrt(sq1); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
268 sq2 = sqrt(sq2); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
269 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
270 if ((sq1 * sq2) < DBL_EPSILON) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
271 dist = 1 - s / (sq1 * sq2); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
272 } else { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
273 /* if the denominator gets small, the fraction gets big, hence dist |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
274 gets small: */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
275 dist = 0.0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
276 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
277 return dist; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
278 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
279 /*** end: CosDist() ***/ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
280 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
281 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
282 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
283 /** |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
284 * @brief convert sequences into mbed-like (distance) vector |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
285 * representation. Seeds (prMSeq sequence indices) have to be picked before |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
286 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
287 * @param[out] ppdSeqVec |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
288 * Pointer to preallocated matrix of size nseqs x iSeeds |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
289 * @param[in] prMSeq |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
290 * Sequences which are to be converted |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
291 * @param[in] piSeeds |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
292 * Array of sequences in prMSeq which are to be used as seeds. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
293 * @param[in] iNumSeeds |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
294 * Number of seeds/elements in piSeeds |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
295 * @param[in] iPairDistType |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
296 * Type of pairwise distance comparison |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
297 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
298 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
299 int |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
300 SeqToVec(double **ppdSeqVec, mseq_t *prMSeq, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
301 int *piSeeds, const int iNumSeeds, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
302 const int iPairDistType) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
303 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
304 symmatrix_t *prDistmat = NULL; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
305 int iSeqIndex; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
306 int iSeedIndex; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
307 /* indices for restoring order */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
308 int *restore; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
309 /* sorted copy of piSeeds */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
310 int *piSortedSeeds; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
311 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
312 #if TIMING |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
313 Stopwatch_t *stopwatch = StopwatchCreate(); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
314 StopwatchZero(stopwatch); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
315 StopwatchStart(stopwatch); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
316 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
317 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
318 assert(NULL != ppdSeqVec); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
319 assert(iNumSeeds>0 && iNumSeeds<=prMSeq->nseqs); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
320 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
321 piSortedSeeds = CKMALLOC(iNumSeeds * sizeof(int)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
322 memcpy(piSortedSeeds, piSeeds, iNumSeeds*sizeof(int)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
323 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
324 /* need them sorted, otherwise the swapping approach below might |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
325 * break |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
326 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
327 qsort(piSortedSeeds, iNumSeeds, sizeof(int), IntCmp); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
328 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
329 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
330 /* rearrange mseq order so that we can use ktuple_pairdist code as |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
331 * is. That is, swap seeds and non-seeds such that the seeds end |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
332 * up in front of mseq. This way we can use the KTuplePairDist() |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
333 * code, without making a copy of mseq. Also, keep an array of |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
334 * indices which serves to restore the original sequence order |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
335 * after putting the seeds in front |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
336 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
337 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
338 restore = (int *) CKMALLOC(prMSeq->nseqs * sizeof(int)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
339 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
340 restore[iSeqIndex] = iSeqIndex; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
341 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
342 for (iSeedIndex=0; iSeedIndex<iNumSeeds; iSeedIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
343 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
344 Log(&rLog, LOG_FORCED_DEBUG, "Swapping seqs %d and %u", piSortedSeeds[iSeedIndex], iSeedIndex); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
345 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
346 if (piSortedSeeds[iSeedIndex]!=iSeedIndex) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
347 int swap; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
348 SeqSwap(prMSeq, piSortedSeeds[iSeedIndex], iSeedIndex); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
349 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
350 swap = restore[iSeedIndex]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
351 restore[iSeedIndex] = restore[piSortedSeeds[iSeedIndex]]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
352 restore[piSortedSeeds[iSeedIndex]] = swap; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
353 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
354 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
355 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
356 printf("DEBUG(%s|%s():%d): restore indices =", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
357 __FILE__, __FUNCTION__, __LINE__); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
358 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
359 printf(" %u:%u", iSeqIndex, restore[iSeqIndex]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
360 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
361 printf("\n"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
362 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
363 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
364 Log(&rLog, LOG_FORCED_DEBUG, "swapped seq no %d now: seq = %s", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
365 iSeqIndex, prMSeq->sqinfo[iSeqIndex].name); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
366 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
367 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
368 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
369 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
370 /* convert sequences into vectors of distances by calling pairwise |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
371 * distance function for each seq/seed pair |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
372 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
373 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
374 if (0 != PairDistances(&prDistmat, prMSeq, iPairDistType, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
375 0, iNumSeeds, 0, prMSeq->nseqs, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
376 NULL, NULL)) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
377 Log(&rLog, LOG_ERROR, "Could not compute pairwise distances for mbed."); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
378 FreeSymMatrix(&prDistmat); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
379 CKFREE(piSortedSeeds); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
380 CKFREE(restore); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
381 return -1; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
382 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
383 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
384 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
385 char **labels; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
386 labels = (char **) CKMALLOC(prMSeq->nseqs * sizeof(char *)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
387 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
388 labels[iSeqIndex] = prMSeq->sqinfo[iSeqIndex].name; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
389 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
390 SymMatrixPrint(prDistmat, labels, NULL); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
391 CKFREE(labels); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
392 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
393 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
394 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
395 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
396 /* update ppdSeqVec according to prDistmat. keep in mind that we |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
397 * changed mseq order |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
398 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
399 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
400 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
401 for (iSeedIndex=0; iSeedIndex<iNumSeeds; iSeedIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
402 ppdSeqVec[restore[iSeqIndex]][iSeedIndex] = |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
403 SymMatrixGetValue(prDistmat, iSeqIndex, iSeedIndex); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
404 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
405 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
406 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
407 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
408 /* restore mseq order by reverse swapping |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
409 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
410 * need reverse order now, so start from top index |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
411 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
412 iSeedIndex=iNumSeeds-1; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
413 do { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
414 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
415 Log(&rLog, LOG_FORCED_DEBUG, "Swapping seqs %d and %d", piSortedSeeds[iSeedIndex], iSeedIndex); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
416 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
417 if (piSortedSeeds[iSeedIndex]!=iSeedIndex) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
418 int swap; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
419 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
420 SeqSwap(prMSeq, piSortedSeeds[iSeedIndex], iSeedIndex); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
421 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
422 swap = restore[iSeedIndex]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
423 restore[iSeedIndex] = restore[piSortedSeeds[iSeedIndex]]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
424 restore[piSortedSeeds[iSeedIndex]] = swap; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
425 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
426 } while (0 != iSeedIndex--); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
427 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
428 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
429 Log(&rLog, LOG_FORCED_DEBUG, "restored seq no %d: seq = %s %s", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
430 iSeqIndex, prMSeq->sqinfo[iSeqIndex].name, prMSeq->seq[iSeqIndex]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
431 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
432 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
433 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
434 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
435 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
436 printf("DEBUG: seq %-4u as distance vector =", iSeqIndex); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
437 for (iSeedIndex=0; iSeedIndex<iNumSeeds; iSeedIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
438 printf(" %f", ppdSeqVec[iSeqIndex][iSeedIndex]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
439 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
440 printf("\n"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
441 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
442 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
443 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
444 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
445 FreeSymMatrix(&prDistmat); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
446 CKFREE(restore); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
447 #if TIMING |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
448 StopwatchStop(stopwatch); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
449 StopwatchDisplay(stdout, "Total time for SeqToVec(): ", stopwatch); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
450 StopwatchFree(stopwatch); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
451 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
452 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
453 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
454 return 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
455 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
456 /*** end: SeqToVec() ***/ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
457 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
458 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
459 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
460 /** |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
461 * @brief Select seeds to be used from an prMSeq |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
462 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
463 * @param[out] piSeeds |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
464 * Will store the indices of prMSeqs seqs used to be as seeds here. Must be preallocated. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
465 * @param[in] iNumSeeds |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
466 * Number of seeds to be picked |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
467 * @param[in] iSelectionMethod |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
468 * Seed selection method to be used |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
469 * @param[in] prMSeq |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
470 * The prMSeq structure to pick sequences from |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
471 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
472 * @return: Non-zero on failure |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
473 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
474 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
475 int |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
476 SeedSelection(int *piSeeds, int iNumSeeds, int iSelectionMethod, mseq_t *prMSeq) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
477 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
478 /* seed iterator */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
479 int iSeedIndex; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
480 /* sequence iterator */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
481 int iSeqIndex; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
482 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
483 if (SELECT_SEEDS_RANDOMLY == iSelectionMethod) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
484 int *piPermArray; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
485 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
486 Log(&rLog, LOG_INFO, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
487 "Using %d seeds (randomly chosen) for mBed (from a total of %d sequences)", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
488 iNumSeeds, prMSeq->nseqs); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
489 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
490 PermutationArray(&piPermArray, iNumSeeds); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
491 /* copy to piSeeds */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
492 for (iSeedIndex=0; iSeedIndex<iNumSeeds; iSeedIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
493 piSeeds[iSeedIndex] = piPermArray[iSeedIndex]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
494 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
495 CKFREE(piPermArray); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
496 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
497 } else if (SELECT_SEEDS_BY_LENGTH == iSelectionMethod) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
498 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
499 /* step size for picking with constant stride */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
500 int iStep; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
501 int *piSeqLen = CKMALLOC(prMSeq->nseqs * sizeof(int)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
502 int *piOrder = CKMALLOC(prMSeq->nseqs * sizeof(int)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
503 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
504 Log(&rLog, LOG_INFO, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
505 "Using %d seeds (chosen with constant stride from length sorted seqs) for mBed (from a total of %d sequences)", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
506 iNumSeeds, prMSeq->nseqs); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
507 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
508 iStep = prMSeq->nseqs / iNumSeeds; /* iStep will never get too big |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
509 * due to rounding */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
510 /* first get an array of seq indices order according to |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
511 corresponding sequence length: piOrder */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
512 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
513 piSeqLen[iSeqIndex] = prMSeq->sqinfo[iSeqIndex].len; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
514 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
515 QSortAndTrackIndex(piOrder, piSeqLen, prMSeq->nseqs, 'd', FALSE); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
516 #if 0 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
517 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
518 Log(&rLog, LOG_FORCED_DEBUG, "Descending order (no %d): seq %d has len %d)", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
519 iSeqIndex, piOrder[iSeqIndex], piSeqLen[piOrder[iSeqIndex]]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
520 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
521 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
522 CKFREE(piSeqLen); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
523 iSeedIndex = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
524 for (iSeqIndex=0; iSeedIndex<iNumSeeds; iSeqIndex+=iStep) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
525 piSeeds[iSeedIndex++] = piOrder[iSeqIndex]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
526 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
527 CKFREE(piOrder); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
528 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
529 } else { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
530 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
531 Log(&rLog, LOG_ERROR, "Internal error: unknown seed selection type"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
532 return -1; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
533 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
534 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
535 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
536 #ifdef PICK_SEEDS_FROM_KMEANS_CLUSTERS |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
537 /* do initial mbedding and kmeans. then pick seeds from each cluster and |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
538 do full mbed with these seeds. idea is that those seeds represent the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
539 sequence space better than random seeds. however, this reduces the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
540 quality slightly. random is almost always better. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
541 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
542 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
543 if (1) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
544 /* seqs represented as distance vectors */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
545 double **ppdSeqVec; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
546 double dCost = -1.0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
547 /* assignments for each seq to the K newly created clusters */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
548 int *piKMeansClusterAssignments = CKMALLOC(prMSeq->nseqs * sizeof(int)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
549 double *pdKMeansClusterCenters = CKCALLOC(iNumSeeds * iNumSeeds, sizeof(double)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
550 /* create a copy of ppdSeqVec suitable for KMeans */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
551 double *pdKMeansVectors = CKMALLOC(prMSeq->nseqs * iNumSeeds * sizeof(double)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
552 bool *pbClusterUsed = CKCALLOC(iNumSeeds, sizeof(bool)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
553 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
554 Log(&rLog, LOG_FORCED_DEBUG, "%s", "FIXME Experimental feature: K-means on first round embedding to get better seeds"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
555 Log(&rLog, LOG_FORCED_DEBUG, "%s", "FIXME Reuse seeds from clusters"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
556 Log(&rLog, LOG_FORCED_DEBUG, "%s", "FIXME Could try log(n) instead of log(n)^2 in first round"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
557 Log(&rLog, LOG_FORCED_DEBUG, "%s", "FIXME hardcoded iPairDistType PAIRDIST_KTUPLE"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
558 Log(&rLog, LOG_FORCED_DEBUG, "%s", "FIXME Show that this is fast *and* good"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
559 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
560 /* |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
561 Log(&rLog, LOG_FORCED_DEBUG, "%s", "Overriding iNumSeeds"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
562 iNumSeeds = (int) pow(log2((double)prMSeq->nseqs), 2); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
563 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
564 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
565 ppdSeqVec = (double **) CKMALLOC(prMSeq->nseqs * sizeof(double *)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
566 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
567 ppdSeqVec[iSeqIndex] = (double *) CKMALLOC(iNumSeeds * sizeof(double)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
568 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
569 if (0 != SeqToVec(ppdSeqVec, prMSeq, piSeeds, iNumSeeds, PAIRDIST_KTUPLE)) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
570 Log(&rLog, LOG_ERROR, "Could not convert sequences into vectors for mbed"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
571 return -1; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
572 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
573 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
574 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
575 int iDim; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
576 for (iDim=0; iDim<iNumSeeds; ++iDim) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
577 pdKMeansVectors[iDim*iSeqIndex + iDim] = ppdSeqVec[iSeqIndex][iDim]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
578 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
579 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
580 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
581 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
582 Log(&rLog, LOG_FORCED_DEBUG, "%s\n", "FIXME hardcoded RESTARTS_PER_SPLIT"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
583 dCost = KMeans(prMSeq->nseqs, iNumSeeds, iNumSeeds, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
584 pdKMeansVectors, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
585 RESTARTS_PER_SPLIT, USE_KMEANS_LLOYDS, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
586 pdKMeansClusterCenters, piKMeansClusterAssignments); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
587 Log(&rLog, LOG_FORCED_DEBUG, "Best split cost = %f", dCost); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
588 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
589 Log(&rLog, LOG_FORCED_DEBUG, "%s", "FIXME Check for Nan in cluster centers"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
590 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
591 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
592 Log(&rLog, LOG_FORCED_DEBUG, "%s", "K-Means output:"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
593 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
594 Log(&rLog, LOG_FORCED_DEBUG, " Raw assignment: Seq %u (%s) to cluster %d", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
595 iSeqIndex, prMSeq->sqinfo[iSeqIndex].name, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
596 piKMeansClusterAssignments[iSeqIndex]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
597 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
598 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
599 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
600 Log(&rLog, LOG_FORCED_DEBUG, "FIXME %s", "proof of concept implementation: Pick first sequences from each clusters instead of reusing seeds"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
601 iSeedIndex = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
602 for (iSeqIndex=0; iSeqIndex<prMSeq->nseqs; iSeqIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
603 int iAssignedCluster = piKMeansClusterAssignments[iSeqIndex]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
604 if (pbClusterUsed[iAssignedCluster]) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
605 continue; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
606 } else { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
607 /*LOG_DEBUG("Picked seed %d from cluster %d", iSeqIndex,iAssignedCluster);*/ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
608 piSeeds[iSeedIndex++] = iSeqIndex; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
609 pbClusterUsed[iAssignedCluster] = TRUE; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
610 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
611 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
612 CKFREE(pbClusterUsed); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
613 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
614 CKFREE(pdKMeansVectors); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
615 CKFREE(pdKMeansClusterCenters); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
616 CKFREE(piKMeansClusterAssignments); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
617 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
618 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
619 /* if (1) */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
620 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
621 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
622 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
623 if (LOG_DEBUG <= rLog.iLogLevelEnabled) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
624 for (iSeedIndex=0; iSeedIndex<iNumSeeds; iSeedIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
625 Log(&rLog, LOG_DEBUG, "Picked sequence %d (%s) as seed no %d", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
626 piSeeds[iSeedIndex], prMSeq->sqinfo[piSeeds[iSeedIndex]].name, iSeedIndex); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
627 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
628 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
629 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
630 return 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
631 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
632 /* end of SeedSelection() */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
633 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
634 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
635 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
636 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
637 /** |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
638 * @brief Bisecting K-Means clustering. Repeatedly calls K-Means with |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
639 * a K of 2 until no cluster has more than iMaxAllowedObjsPerCluster. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
640 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
641 * @param[out] prKMeansResult_p |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
642 * Result of Bisecting KMeans. Will be allocated here. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
643 * Caller has to free. See @see FreeKMeansResult() |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
644 * @param[in] iNObjs |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
645 * Number of objects/sequences to cluster |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
646 * @param[in] iDim |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
647 * Dimensionality of input data |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
648 * @param[in] ppdVectors |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
649 * each row holds iDim points for this object's coordinates |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
650 * @param[in] iMinRequiredObjsPerCluster |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
651 * Minimum number of objects per Cluster (inclusive)/ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
652 * @param[in] iMaxAllowedObjsPerCluster |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
653 * Maximum number of objects per Cluster (inclusive). Function returns once no |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
654 * cluster contains more then this number of objects. Soft limit! |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
655 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
656 * @note Convoluted code. Could use some restructuring. My apologies. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
657 * AW |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
658 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
659 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
660 void |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
661 BisectingKmeans(bisecting_kmeans_result_t **prKMeansResult_p, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
662 const int iNObjs, const int iDim, double **ppdVectors, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
663 const int iMinRequiredObjsPerCluster, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
664 const int iMaxAllowedObjsPerCluster) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
665 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
666 int iN, iD; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
667 /* cluster centers for each cluster created at each split */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
668 double *pdKClusterCenters; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
669 /* keep track of updated object indices per newly created |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
670 * cluster */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
671 int *piCurObjToUpdate; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
672 /* number of times we called 2-means */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
673 int iNRounds; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
674 /* flag for unsuccessful cluster split */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
675 bool bNaNDetected = FALSE; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
676 /* flag for detected small cluster after split */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
677 bool bSmallClusterDetected; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
678 /* queue of clusters which are to be split */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
679 int_queue_t rClusterSplitQueue; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
680 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
681 #if TIMING |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
682 Stopwatch_t *stopwatch = StopwatchCreate(); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
683 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
684 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
685 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
686 piCurObjToUpdate = (int *) CKMALLOC(2 * sizeof(int)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
687 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
688 NewKMeansResult(prKMeansResult_p); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
689 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
690 /* new cluster centers created at each split/KMeans run |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
691 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
692 pdKClusterCenters = (double *) CKCALLOC(2 * iDim, sizeof(double)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
693 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
694 /* init results by setting a first cluster that contains all objects |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
695 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
696 (*prKMeansResult_p)->iNClusters = 1; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
697 (*prKMeansResult_p)->iDim = iDim; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
698 /* fake center coordinates of first cluster */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
699 (*prKMeansResult_p)->ppdClusterCenters = |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
700 (double **) CKMALLOC(1 * sizeof(double *)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
701 (*prKMeansResult_p)->ppdClusterCenters[0] = |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
702 (double *) CKMALLOC(iDim * sizeof(double)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
703 /* objects per cluster */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
704 (*prKMeansResult_p)->piNObjsPerCluster = |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
705 (int *) CKMALLOC(1 * sizeof(int)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
706 (*prKMeansResult_p)->piNObjsPerCluster[0] = iNObjs; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
707 /* object indices per cluster */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
708 (*prKMeansResult_p)->ppiObjIndicesPerCluster = |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
709 (int **) CKMALLOC(1 * sizeof(int *)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
710 (*prKMeansResult_p)->ppiObjIndicesPerCluster[0] = (int *) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
711 CKMALLOC(iNObjs * sizeof(int)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
712 for (iN=0; iN<iNObjs; iN++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
713 (*prKMeansResult_p)->ppiObjIndicesPerCluster[0][iN] = iN; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
714 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
715 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
716 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
717 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
718 /* Starting with the first cluster that now contains all the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
719 * sequences keep splitting until no cluster contains more than |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
720 * iMaxAllowedObjsPerCluster |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
721 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
722 * Keep a queue of clusters (rClusterSplitQueue) to split |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
723 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
724 * At each split values/memory of the just split cluster will be |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
725 * reused and exactly one new only allocated. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
726 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
727 * Example: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
728 * Given the following cluster assignments |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
729 * 0 0 0 0 1 1 2 2 2 3 3 3 3 3 3 3 4 4 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
730 * and a K-Means split in cluster 3 at |: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
731 * 0 0 0 0 1 1 2 2 2 3 3 3 | 3 3 3 3 4 4 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
732 * The result should be this: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
733 * 0 0 0 0 1 1 2 2 2 3 3 3 | 5 5 5 5 4 4 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
734 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
735 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
736 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
737 INT_QUEUE_INIT(&rClusterSplitQueue); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
738 if (iNObjs>iMaxAllowedObjsPerCluster) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
739 /* pish fake first cluster index */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
740 INT_QUEUE_PUSH(&rClusterSplitQueue, 0); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
741 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
742 iNRounds = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
743 while (! INT_QUEUE_EMPTY(&rClusterSplitQueue)) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
744 /* assignments for each seq to the K newly created clusters */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
745 int *piKClusterAssignments; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
746 /* number of objects in cluster that is to be split */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
747 int iNObjsInClusterToSplit; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
748 /* coordinates of points in cluster that is to be split |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
749 * array of size n*d where [d*i + j] gives coordinate j of point i |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
750 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
751 double *pdVectorsInClusterToSplit; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
752 /* copy of object indices in split cluster */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
753 int *piObjIndicesOfSplitCluster; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
754 /* best cost of kmeans rounds */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
755 double dCost = -1.0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
756 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
757 /* indices for the two created clusters |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
758 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
759 /* index of cluster to split */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
760 int iClusterToSplot; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
761 /* index of newly created cluster at each round. data for the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
762 other created cluster goes to the one just split, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
763 i.e. (*piClusterToSplot) */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
764 int iNewClusterIdx; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
765 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
766 #if TIMING |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
767 StopwatchZero(stopwatch); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
768 StopwatchStart(stopwatch); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
769 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
770 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
771 INT_QUEUE_POP(&rClusterSplitQueue, &iClusterToSplot); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
772 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
773 iNObjsInClusterToSplit = (*prKMeansResult_p)->piNObjsPerCluster[iClusterToSplot]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
774 piKClusterAssignments = (int *) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
775 CKMALLOC(iNObjsInClusterToSplit * sizeof(int)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
776 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
777 pdVectorsInClusterToSplit = (double *) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
778 CKMALLOC(iNObjsInClusterToSplit * iDim * sizeof(double)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
779 for (iN=0; iN<iNObjsInClusterToSplit; iN++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
780 for (iD=0; iD<iDim; ++iD) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
781 int iThisObjIdx = |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
782 (*prKMeansResult_p)->ppiObjIndicesPerCluster[iClusterToSplot][iN]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
783 pdVectorsInClusterToSplit[iDim*iN + iD] = ppdVectors[iThisObjIdx][iD]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
784 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
785 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
786 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
787 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
788 Log(&rLog, LOG_FOCRED_DEBUG, "Round %d: Will split cluster %d which has %d objects", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
789 iNRounds, iClusterToSplot, iNObjsInClusterToSplit); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
790 fprintf(stderr, "DEBUG(%s|%s():%d): Object indices in cluster to split are:", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
791 __FILE__, __FUNCTION__, __LINE__); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
792 for (iN=0; iN<iNObjsInClusterToSplit; iN++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
793 fprintf(stderr, " %u", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
794 (*prKMeansResult_p)->ppiObjIndicesPerCluster[iClusterToSplot][iN]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
795 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
796 fprintf(stderr, "\n"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
797 (void) fflush(stderr); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
798 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
799 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
800 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
801 for (iN=0; iN<iNObjsInClusterToSplit; iN++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
802 fprintf(stderr, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
803 "DEBUG(%s|%s():%d): Coordinate of object %u (real index %u) in cluster to split:", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
804 __FILE__, __FUNCTION__, __LINE__, iN, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
805 (*prKMeansResult_p)->ppiObjIndicesPerCluster[iClusterToSplot][iN]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
806 for (iD=0; iD<iDim; iD++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
807 fprintf(stderr, " %f", pdVectorsInClusterToSplit[iDim*iN + iD]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
808 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
809 fprintf(stderr, "\n"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
810 (void) fflush(stderr); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
811 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
812 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
813 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
814 /* KMeans(1 "The number of points in the data set", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
815 * 2 "The number of clusters to look for", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
816 * 3 "The number of dimensions that the data set lives in", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
817 * 4 "points: An array of size n*d where points[d*i + j] gives coordinate j of point i", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
818 * 5 "attempts: The number of times to independently run k-means", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
819 * 6 "use_lloyds_method: uses kmpp if false, otherwise lloyds method", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
820 * 7 "centers: This can either be null or an array of size k*d. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
821 * In the latter case, centers[d*i + j] will give coordinate j of center i. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
822 * If the cluster is unused, it will contain NaN instead.", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
823 * 8 "assignments: This can either be null or an array of size n. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
824 * In the latter case, it will be filled with the cluster that each point is assigned to |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
825 * (an integer between 0 and k-1 inclusive)."); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
826 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
827 dCost = KMeans(iNObjsInClusterToSplit, 2, iDim, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
828 pdVectorsInClusterToSplit, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
829 RESTARTS_PER_SPLIT, USE_KMEANS_LLOYDS, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
830 pdKClusterCenters, piKClusterAssignments); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
831 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
832 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
833 Log(&rLog, LOG_FORCED_DEBUG, "%s", "Raw K-Means output:"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
834 for (iN=0; iN<2; iN++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
835 fprintf(stderr, "DEBUG(%s|%s():%d): Cluster Center %u =", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
836 __FILE__, __FUNCTION__, __LINE__, iN); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
837 for (iD=0; iD<iDim; iD++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
838 fprintf(stderr, " %f", pdKClusterCenters[iN*iDim+iD]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
839 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
840 fprintf(stderr, "\n"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
841 (void) fflush(stderr); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
842 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
843 for (iN=0; iN<iNObjsInClusterToSplit; iN++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
844 Log(&rLog, LOG_FORCED_DEBUG, " Raw assignment: Seq %u to cluster %d (of #%u)", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
845 iN, piKClusterAssignments[iN], 2); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
846 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
847 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
848 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
849 /* real index of one of the newly created clusters. the other |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
850 * one is iClusterToSplot */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
851 iNewClusterIdx = (*prKMeansResult_p)->iNClusters; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
852 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
853 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
854 /* We don't want Clusters which are too small. Check here if a |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
855 * split created a small cluster and if yes, discard the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
856 * solution. Because the cluster has already been removed from |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
857 * the queue, we can just continue. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
858 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
859 bSmallClusterDetected = FALSE; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
860 if (iMinRequiredObjsPerCluster>1) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
861 int iNObjsCluster[2]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
862 iNObjsCluster[0] = 0; /* first cluster */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
863 iNObjsCluster[1] = 0; /* second cluster */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
864 for (iN=0; iN<iNObjsInClusterToSplit; iN++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
865 iNObjsCluster[piKClusterAssignments[iN]]+=1; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
866 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
867 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
868 if (iNObjsCluster[0]<iMinRequiredObjsPerCluster |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
869 || |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
870 iNObjsCluster[1]<iMinRequiredObjsPerCluster) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
871 bSmallClusterDetected = TRUE; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
872 Log(&rLog, LOG_FORCED_DEBUG, "Skipping this split because objs in 1st/2nd cluster = %d/%d < %d", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
873 iNObjsCluster[0], iNObjsCluster[1], iMinRequiredObjsPerCluster); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
874 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
875 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
876 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
877 /* Same logic as for small clusters applies if KMeans couldn't |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
878 * split the cluster. In this case one of its center |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
879 * coordinates will be NaN, which we check for in the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
880 * following. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
881 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
882 if (! bSmallClusterDetected) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
883 for (iN=0; iN<2; iN++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
884 bNaNDetected = FALSE; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
885 for (iD=0; iD<iDim; iD++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
886 if (0 != isnan(pdKClusterCenters[iN*iDim+iN])) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
887 /* Got NaN as coordinate after splitting */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
888 Log(&rLog, LOG_WARN, "%s(): Can't split cluster no. %d which has %d objects any further. %s", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
889 __FUNCTION__, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
890 iClusterToSplot, iNObjsInClusterToSplit, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
891 "Hope it's not too big and doesn't slow things down." |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
892 ); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
893 bNaNDetected = TRUE; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
894 break; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
895 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
896 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
897 if (bNaNDetected) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
898 break; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
899 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
900 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
901 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
902 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
903 /* Discarding split of this cluster. It has been removed from the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
904 * queue already. so just continue |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
905 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
906 if (bNaNDetected || bSmallClusterDetected) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
907 CKFREE(piKClusterAssignments); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
908 CKFREE(pdVectorsInClusterToSplit); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
909 continue; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
910 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
911 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
912 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
913 /* update cluster centers: pdClusterCenters |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
914 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
915 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
916 /* reuse memory of existing/old/split cluster |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
917 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
918 for (iN=0; iN<iDim; iN++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
919 double dCoord = pdKClusterCenters[0*iDim+iN]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
920 (*prKMeansResult_p)->ppdClusterCenters[iClusterToSplot][iN] = dCoord; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
921 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
922 /* realloc and set new one |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
923 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
924 (*prKMeansResult_p)->ppdClusterCenters = (double **) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
925 CKREALLOC((*prKMeansResult_p)->ppdClusterCenters, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
926 ((*prKMeansResult_p)->iNClusters+1) * sizeof(double *)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
927 (*prKMeansResult_p)->ppdClusterCenters[iNewClusterIdx] = (double *) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
928 CKMALLOC(iDim * sizeof(double)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
929 for (iD=0; iD<iDim; iD++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
930 double dCoord = pdKClusterCenters[1*iDim+iD]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
931 (*prKMeansResult_p)->ppdClusterCenters[iNewClusterIdx][iD] = dCoord; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
932 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
933 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
934 Log(&rLog, LOG_FORCED_DEBUG, "%s", "* Update of cluster centers done. Cluster centers so far:"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
935 for (iN=0; iN<(*prKMeansResult_p)->iNClusters+1; iN++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
936 fprintf(stderr, "DEBUG(%s|%s():%d): Center %u =", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
937 __FILE__, __FUNCTION__, __LINE__, iN); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
938 for (iD=0; iD<iDim; iD++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
939 fprintf(stderr, " %f", (*prKMeansResult_p)->ppdClusterCenters[iN][iD]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
940 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
941 fprintf(stderr, "\n"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
942 (void) fflush(stderr); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
943 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
944 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
945 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
946 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
947 /* update #seqs per cluster: piNObjsPerCluster |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
948 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
949 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
950 (*prKMeansResult_p)->piNObjsPerCluster = (int *) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
951 CKREALLOC((*prKMeansResult_p)->piNObjsPerCluster, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
952 ((*prKMeansResult_p)->iNClusters+1) * sizeof(int)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
953 /* init new and old one to zero */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
954 (*prKMeansResult_p)->piNObjsPerCluster[iClusterToSplot] = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
955 (*prKMeansResult_p)->piNObjsPerCluster[iNewClusterIdx] = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
956 /* now update values */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
957 for (iN=0; iN<iNObjsInClusterToSplit; iN++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
958 if (0 == piKClusterAssignments[iN]) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
959 (*prKMeansResult_p)->piNObjsPerCluster[iClusterToSplot] += 1; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
960 } else if (1 == piKClusterAssignments[iN]) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
961 (*prKMeansResult_p)->piNObjsPerCluster[iNewClusterIdx] += 1; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
962 } else { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
963 /* there used to be code for iK>=2 in r101 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
964 Log(&rLog, LOG_FATAL, "Internal error: split into more than two clusters (got assignment %d)", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
965 piKClusterAssignments[iN]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
966 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
967 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
968 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
969 Log(&rLog, LOG_FORCED_DEBUG, "%s", "* Update of NObjs per cluster done:"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
970 for (iN=0; iN<(*prKMeansResult_p)->iNClusters+1; iN++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
971 Log(&rLog, LOG_FORCED_DEBUG, "Cluster %d contains %d sequences", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
972 iN, (*prKMeansResult_p)->piNObjsPerCluster[iN]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
973 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
974 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
975 /* queue clusters if still they are still too big |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
976 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
977 if ((*prKMeansResult_p)->piNObjsPerCluster[iClusterToSplot] > iMaxAllowedObjsPerCluster) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
978 INT_QUEUE_PUSH(&rClusterSplitQueue, iClusterToSplot); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
979 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
980 if ((*prKMeansResult_p)->piNObjsPerCluster[iNewClusterIdx] > iMaxAllowedObjsPerCluster) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
981 INT_QUEUE_PUSH(&rClusterSplitQueue, iNewClusterIdx); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
982 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
983 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
984 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
985 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
986 /* update which objects belong to which cluster: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
987 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
988 * note: piNObjsPerCluster needs to be already updated |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
989 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
990 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
991 /* create a copy of the object indices in the split cluster |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
992 * (original will be overwritten) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
993 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
994 piObjIndicesOfSplitCluster = (int *) CKMALLOC(iNObjsInClusterToSplit * sizeof(int)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
995 memcpy(piObjIndicesOfSplitCluster, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
996 (*prKMeansResult_p)->ppiObjIndicesPerCluster[iClusterToSplot], |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
997 iNObjsInClusterToSplit * sizeof(int)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
998 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
999 (*prKMeansResult_p)->ppiObjIndicesPerCluster = (int **) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1000 CKREALLOC((*prKMeansResult_p)->ppiObjIndicesPerCluster, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1001 ((*prKMeansResult_p)->iNClusters+1) * sizeof(int *)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1002 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1003 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1004 (*prKMeansResult_p)->ppiObjIndicesPerCluster[iClusterToSplot] = |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1005 (int *) CKREALLOC((*prKMeansResult_p)->ppiObjIndicesPerCluster[iClusterToSplot], |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1006 (*prKMeansResult_p)->piNObjsPerCluster[iClusterToSplot] * sizeof(int)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1007 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1008 (*prKMeansResult_p)->ppiObjIndicesPerCluster[iNewClusterIdx] = |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1009 (int *) CKMALLOC((*prKMeansResult_p)->piNObjsPerCluster[iNewClusterIdx] * sizeof(int)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1010 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1011 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1012 /* now reassign the object indices to the assigned cluster |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1013 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1014 piCurObjToUpdate[0] = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1015 piCurObjToUpdate[1] = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1016 for (iN=0; iN<iNObjsInClusterToSplit; iN++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1017 int iThisObjIdx = piObjIndicesOfSplitCluster[iN]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1018 int iThisClusterAssignment = piKClusterAssignments[iN]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1019 int iThisOffset = piCurObjToUpdate[iThisClusterAssignment]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1020 int iThisClusterIndex = 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1021 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1022 if (0 == iThisClusterAssignment) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1023 iThisClusterIndex = iClusterToSplot; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1024 } else if (1 == iThisClusterAssignment) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1025 iThisClusterIndex = iNewClusterIdx; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1026 } else { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1027 /* there used to be code for iK>=2 in r101 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1028 Log(&rLog, LOG_FATAL, "Internal error: split into more than two clusters (got assignment %d)", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1029 piKClusterAssignments[iN]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1030 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1031 #if 0 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1032 Log(&rLog, LOG_FORCED_DEBUG, "Setting (*prKMeansResult_p)->ppiObjIndicesPerCluster[%d][%d] = %d", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1033 iThisClusterIndex, iThisOffset, iThisObjIdx); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1034 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1035 (*prKMeansResult_p)->ppiObjIndicesPerCluster[iThisClusterIndex][iThisOffset] = iThisObjIdx; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1036 piCurObjToUpdate[iThisClusterAssignment]+=1; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1037 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1038 CKFREE(piObjIndicesOfSplitCluster); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1039 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1040 for (iN=0; iN<(*prKMeansResult_p)->iNClusters+1; iN++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1041 int iObj; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1042 fprintf(stderr, "DEBUG(%s|%s():%d): Objects in cluster %u: ", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1043 __FILE__, __FUNCTION__, __LINE__, iN); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1044 for (iObj=0; iObj<(*prKMeansResult_p)->piNObjsPerCluster[iN]; iObj++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1045 fprintf(stderr, " %u", (*prKMeansResult_p)->ppiObjIndicesPerCluster[iN][iObj]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1046 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1047 fprintf(stderr, "\n"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1048 (void) fflush(stderr); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1049 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1050 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1051 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1052 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1053 #if TIMING |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1054 StopwatchStop(stopwatch); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1055 StopwatchDisplay(stdout, "Total time after next round in Bisecting-KMeans: ", stopwatch); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1056 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1057 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1058 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1059 /* finally: increase number of clusters |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1060 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1061 (*prKMeansResult_p)->iNClusters += 1; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1062 iNRounds += 1; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1063 CKFREE(piKClusterAssignments); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1064 CKFREE(pdVectorsInClusterToSplit); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1065 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1066 } /* while */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1067 INT_QUEUE_DESTROY(&rClusterSplitQueue); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1068 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1069 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1070 Log(&rLog, LOG_DEBUG, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1071 "Bisecting K-means finished after %d rounds (no more clusters to split)", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1072 iNRounds); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1073 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1074 #if TIMING |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1075 StopwatchFree(stopwatch); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1076 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1077 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1078 /* @note could use progress/timer */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1079 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1080 CKFREE(pdKClusterCenters); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1081 CKFREE(piCurObjToUpdate); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1082 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1083 return; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1084 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1085 /*** end: BisectingKmeans() ***/ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1086 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1087 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1088 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1089 /** |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1090 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1091 * @brief From scratch reimplementation of mBed: Blackshields et al. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1092 * (2010); PMID 20470396. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1093 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1094 * Idea is a follows: |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1095 * - convert sequences into vectors of distances |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1096 * - cluster the vectors using k-means |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1097 * - cluster each of the k clusters using upgma (used cached distances |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1098 * from above?) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1099 * - join the sub-clusters to create on tree (use UPGMA on k-means |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1100 * medoids) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1101 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1102 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1103 * @param[out] prMbedTree_p |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1104 * Created upgma tree. will be allocated here. use FreeMuscleTree() |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1105 * to free |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1106 * @param[in] prMSeq |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1107 * Multiple sequences |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1108 * @param[in] iPairDistType |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1109 * Distance measure for pairwise alignments |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1110 * @param[in] pcGuidetreeOut |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1111 * Passed down to GuideTreeUpgma() |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1112 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1113 * @return Zero on success, non-zero on error |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1114 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1115 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1116 int |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1117 Mbed(tree_t **prMbedTree_p, mseq_t *prMSeq, const int iPairDistType, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1118 const char *pcGuidetreeOut) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1119 { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1120 /* number of seeds */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1121 int iNumSeeds; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1122 /* seed indices matching prMSeq */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1123 int *piSeeds; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1124 /* seqs represented as distance vectors */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1125 double **ppdSeqVec; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1126 /* kmeans result */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1127 bisecting_kmeans_result_t *prKMeansResult = NULL; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1128 /* distance matrix of kmeans (pre-)cluster centers */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1129 symmatrix_t *prPreClusterDistmat = NULL; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1130 /* auxiliary for symmetric matrix output; tree routines etc */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1131 char **ppcLabels; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1132 int iNodeIndex; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1133 /* mapping of cluster-center tree node indices to corresponding |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1134 cluster */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1135 int *piClusterToTreeNode; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1136 int iClusterIndex; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1137 int iI, uJ; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1138 FILE *pfOut; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1139 progress_t *prSubClusterDistanceProgress; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1140 bool bPrintCR = (LOG_VERBOSE<=rLog.iLogLevelEnabled) ? FALSE : TRUE; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1141 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1142 #if FULL_WITHIN_CLUSTER_DISTANCES |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1143 Log(&rLog, LOG_WARN, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1144 "FULL_WITHIN_CLUSTER_DISTANCES (using min %d / max %d seqs per subcluster) is a quick-hack. Brace yourself...", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1145 MIN_REQUIRED_SEQ_PER_PRECLUSTER, MAX_ALLOWED_SEQ_PER_PRECLUSTER); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1146 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1147 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1148 #if MBED_TIMING |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1149 Stopwatch_t *stopwatch = StopwatchCreate(); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1150 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1151 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1152 assert(NULL != prMbedTree_p); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1153 assert(NULL != prMSeq); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1154 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1155 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1156 iNumSeeds = (int) NUMBER_OF_SEEDS(prMSeq->nseqs); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1157 if (iNumSeeds >= prMSeq->nseqs) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1158 /* -1 is condition for RandomUniqueIntArray */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1159 iNumSeeds = prMSeq->nseqs-1; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1160 Log(&rLog, LOG_DEBUG, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1161 "Automatically determined number of seeds is bigger (or equal) the number of sequences. Will set it to %d", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1162 iNumSeeds); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1163 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1164 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1165 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1166 /* Turn sequences into vectors of distances to the seeds |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1167 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1168 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1169 piSeeds = (int *) CKMALLOC(iNumSeeds * sizeof(int)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1170 if (0 != SeedSelection(piSeeds, iNumSeeds, SEED_SELECTION, prMSeq)) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1171 Log(&rLog, LOG_ERROR, "Something went wrong during seed selection for mbed"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1172 return -1; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1173 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1174 ppdSeqVec = (double **) CKMALLOC(prMSeq->nseqs * sizeof(double *)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1175 for (iI=0; iI<prMSeq->nseqs; iI++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1176 ppdSeqVec[iI] = (double *) CKMALLOC(iNumSeeds * sizeof(double)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1177 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1178 if (0 != SeqToVec(ppdSeqVec, prMSeq, piSeeds, iNumSeeds, iPairDistType)) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1179 Log(&rLog, LOG_ERROR, "Could not convert sequences into vectors for mbed"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1180 return -1; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1181 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1182 CKFREE(piSeeds); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1183 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1184 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1185 /* Calculate (pre-)clusters of sequence vectors by applying |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1186 * bisecting kmeans |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1187 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1188 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1189 #if MBED_TIMING |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1190 /* start clock only here, to make sure we don't include pairwise |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1191 * distance computation */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1192 StopwatchZero(stopwatch); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1193 StopwatchStart(stopwatch); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1194 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1195 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1196 BisectingKmeans(&prKMeansResult, prMSeq->nseqs, iNumSeeds, ppdSeqVec, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1197 MIN_REQUIRED_SEQ_PER_PRECLUSTER, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1198 MAX_ALLOWED_SEQ_PER_PRECLUSTER); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1199 Log(&rLog, LOG_INFO, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1200 "mBed created %u cluster/s (with a minimum of %d and a soft maximum of %d sequences each)", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1201 prKMeansResult->iNClusters, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1202 MIN_REQUIRED_SEQ_PER_PRECLUSTER, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1203 MAX_ALLOWED_SEQ_PER_PRECLUSTER); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1204 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1205 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1206 #if PRINT_CLUSTER_DISTRIBUTION |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1207 Log(&rLog, LOG_FORCED_DEBUG, "Bisecting Kmeans returned %d clusters", prKMeansResult->iNClusters); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1208 for (iI=0; iI<prKMeansResult->iNClusters; iI++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1209 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1210 int iD; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1211 Log(&rLog, LOG_FORCED_DEBUG, "Diagnostic output for cluster %d follows:", iI); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1212 fprintf(stderr, "DEBUG(%s|%s():%d): center coordinates =", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1213 __FILE__, __FUNCTION__, __LINE__); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1214 for (iD=0; iD<iNumSeeds; iD++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1215 fprintf(stderr, " %f", prKMeansResult->ppdClusterCenters[iI][iD]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1216 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1217 fprintf(stderr, "\n"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1218 fflush(stderr); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1219 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1220 Log(&rLog, LOG_FORCED_DEBUG, "Cluster %d has %d objects assigned", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1221 iI, prKMeansResult->piNObjsPerCluster[iI]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1222 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1223 for (uJ=0; uJ<prKMeansResult->piNObjsPerCluster[iI]; uJ++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1224 int iRealIndex = prKMeansResult->ppiObjIndicesPerCluster[iI][uJ]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1225 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1226 Log(&rLog, LOG_FORCED_DEBUG, "Cluster %u: object %u has index %u (= seq %s)", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1227 iI, uJ, iRealIndex, prMSeq->sqinfo[iRealIndex].name); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1228 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1229 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1230 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1231 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1232 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1233 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1234 /* Cluster pre-clusters produced by k-means. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1235 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1236 * Do this by calculating the vector distances of the cluster |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1237 * centers and applying UPGMA. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1238 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1239 * @note could try to force-balance the tree here |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1240 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1241 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1242 if (0 != NewSymMatrix(&prPreClusterDistmat, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1243 prKMeansResult->iNClusters, prKMeansResult->iNClusters)) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1244 Log(&rLog, LOG_FATAL, "%s", "Memory allocation for pre-cluster distance-matrix failed"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1245 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1246 for (iI=0; iI<prKMeansResult->iNClusters; iI++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1247 for (uJ=iI+1; uJ<prKMeansResult->iNClusters; uJ++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1248 double dDist; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1249 dDist = EuclDist(prKMeansResult->ppdClusterCenters[iI], |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1250 prKMeansResult->ppdClusterCenters[uJ], |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1251 iNumSeeds); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1252 SymMatrixSetValue(prPreClusterDistmat, iI, uJ, dDist); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1253 /* Log(&rLog, LOG_FORCED_DEBUG, "Euclidean distance between clusters %d and %d = %f", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1254 iI, uJ, dDist); */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1255 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1256 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1257 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1258 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1259 /* labels needed for the guide tree building routine only */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1260 ppcLabels = (char **) CKMALLOC(prKMeansResult->iNClusters * sizeof(char*)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1261 for (iI=0; iI<prKMeansResult->iNClusters; iI++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1262 ppcLabels[iI] = (char *) CKMALLOC(32 * sizeof(char)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1263 (void) snprintf(ppcLabels[iI], 32, "Subcluster-%u", iI); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1264 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1265 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1266 Log(&rLog, LOG_FORCED_DEBUG, "%s", "Distance matrix for pre-cluster centers:"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1267 SymMatrixPrint(prPreClusterDistmat, ppcLabels, NULL); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1268 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1269 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1270 GuideTreeUpgma(prMbedTree_p, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1271 ppcLabels, prPreClusterDistmat, NULL); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1272 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1273 for (iI=0; iI<prKMeansResult->iNClusters; iI++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1274 CKFREE(ppcLabels[iI]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1275 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1276 CKFREE(ppcLabels); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1277 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1278 Log(&rLog, LOG_FORCED_DEBUG, "%s", "Cluster-center guide-tree:"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1279 LogTree(*prMbedTree_p); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1280 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1281 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1282 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1283 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1284 /* Now replace each leaf in the pre-cluster-center tree |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1285 * appropriately, i.e. with the corresponding sub-cluster. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1286 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1287 * For each leaf/sub-cluster, create a distance matrix for the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1288 * corresponding sequences. Use distances between vectors as |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1289 * approximated distances between sequences. Then create a |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1290 * guide-tree by applying UPGMA. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1291 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1292 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1293 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1294 /* Get a mapping of (pre)cluster number and leaf-node indices in |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1295 * the cluster-center tree. We can add trees to prMbedTree_p |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1296 * because AppendTrees() guarantees that no other than the node to |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1297 * append to changes. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1298 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1299 piClusterToTreeNode = (int*) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1300 CKMALLOC(prKMeansResult->iNClusters * sizeof(int)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1301 iNodeIndex = FirstDepthFirstNode(*prMbedTree_p); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1302 do { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1303 if (IsLeaf(iNodeIndex, *prMbedTree_p)) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1304 int iLeafId = GetLeafId(iNodeIndex, *prMbedTree_p); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1305 piClusterToTreeNode[iLeafId] = iNodeIndex; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1306 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1307 iNodeIndex = NextDepthFirstNode(iNodeIndex, *prMbedTree_p); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1308 } while (NULL_NEIGHBOR != iNodeIndex); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1309 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1310 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1311 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1312 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1313 /* Now step through all the leafs and replace them with the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1314 * corresponding sub-trees |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1315 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1316 NewProgress(&prSubClusterDistanceProgress, LogGetFP(&rLog, LOG_INFO), |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1317 "Distance calculation within sub-clusters", bPrintCR); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1318 /* for each cluster */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1319 for (iClusterIndex=0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1320 iClusterIndex < prKMeansResult->iNClusters; iClusterIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1321 /* distance matrix for the sub-cluster */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1322 symmatrix_t *prWithinClusterDistances = NULL; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1323 int iNSeqInCluster; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1324 tree_t *prSubClusterTree = NULL; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1325 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1326 ProgressLog(prSubClusterDistanceProgress, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1327 iClusterIndex, prKMeansResult->iNClusters, FALSE); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1328 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1329 #if FULL_WITHIN_CLUSTER_DISTANCES |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1330 mseq_t *prSubClusterMSeq; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1331 int iPairDistType; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1332 int iSeqIndex; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1333 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1334 Log(&rLog, LOG_DEBUG, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1335 "%s\n", "Calling new Mbed use makes only sense if nseq>MAX_ALLOWED_SEQ_PER_PRECLUSTER"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1336 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1337 if (TRUE == prMSeq->aligned) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1338 iPairDistType = PAIRDIST_SQUIDID_KIMURA; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1339 } else { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1340 iPairDistType = PAIRDIST_KTUPLE; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1341 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1342 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1343 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1344 iNSeqInCluster = prKMeansResult->piNObjsPerCluster[iClusterIndex]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1345 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1346 Log(&rLog, LOG_FORCED_DEBUG, "#seqs in subcluster no %d = %d", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1347 iClusterIndex, iNSeqInCluster); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1348 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1349 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1350 #if FULL_WITHIN_CLUSTER_DISTANCES |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1351 /* create an mseq structure for sequences in this cluster |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1352 * don't need most of the members (e.g. orig_seq) but copy |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1353 * them anyway for the sake of completeness |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1354 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1355 NewMSeq(&prSubClusterMSeq); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1356 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1357 prSubClusterMSeq->nseqs = iNSeqInCluster; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1358 prSubClusterMSeq->seqtype = prMSeq->seqtype; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1359 if (NULL!=prMSeq->filename) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1360 prSubClusterMSeq->filename = CkStrdup(prMSeq->filename); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1361 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1362 prSubClusterMSeq->seq = (char **) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1363 CKMALLOC(prSubClusterMSeq->nseqs * sizeof(char *)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1364 prSubClusterMSeq->orig_seq = (char **) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1365 CKMALLOC(prSubClusterMSeq->nseqs * sizeof(char *)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1366 prSubClusterMSeq->sqinfo = (SQINFO *) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1367 CKMALLOC(prSubClusterMSeq->nseqs * sizeof(SQINFO)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1368 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1369 for (iSeqIndex=0; iSeqIndex<iNSeqInCluster; iSeqIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1370 int iRealSeqIndex = prKMeansResult->ppiObjIndicesPerCluster[iClusterIndex][iSeqIndex]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1371 prSubClusterMSeq->seq[iSeqIndex] = CkStrdup(prMSeq->seq[iRealSeqIndex]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1372 prSubClusterMSeq->orig_seq[iSeqIndex] = CkStrdup(prMSeq->orig_seq[iRealSeqIndex]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1373 SeqinfoCopy(&prSubClusterMSeq->sqinfo[iSeqIndex], &prMSeq->sqinfo[iRealSeqIndex]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1374 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1375 Log(&rLog, LOG_DEBUG, "seq no %d in cluster %d is %s (real index = %d)", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1376 iSeqIndex, iClusterIndex, prSubClusterMSeq->sqinfo[iSeqIndex].name, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1377 iRealSeqIndex); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1378 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1379 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1380 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1381 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1382 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1383 /* Create a distance matrix for this sub-cluster |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1384 * (prWithinClusterDistances) by using the vector distances or |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1385 * ktuple distances. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1386 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1387 * Then apply UPGMA to get a subcluster tree |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1388 * (prSubClusterTree) and append created tree to the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1389 * pre-cluster-tree (prMbedTree_p) |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1390 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1391 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1392 #if FULL_WITHIN_CLUSTER_DISTANCES |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1393 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1394 /* compute distances, but be quiet */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1395 if (PairDistances(&prWithinClusterDistances, prSubClusterMSeq, iPairDistType, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1396 0, prSubClusterMSeq->nseqs, 0, prSubClusterMSeq->nseqs, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1397 NULL, NULL)) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1398 Log(&rLog, LOG_ERROR, "Couldn't compute pair distances"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1399 return -1; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1400 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1401 #else |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1402 if (NewSymMatrix(&prWithinClusterDistances, iNSeqInCluster, iNSeqInCluster)!=0) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1403 Log(&rLog, LOG_FATAL, "%s", "Memory allocation for disparity matrix failed"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1404 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1405 for (iI=0; iI<iNSeqInCluster; iI++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1406 int iRealIndexI = prKMeansResult->ppiObjIndicesPerCluster[iClusterIndex][iI]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1407 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1408 for (uJ=iI+1; uJ<iNSeqInCluster; uJ++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1409 int iRealIndexJ = prKMeansResult->ppiObjIndicesPerCluster[iClusterIndex][uJ]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1410 double dist; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1411 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1412 /* Log(&rLog, LOG_FORCED_DEBUG, "Cluster %d: compute distance between %d:%s and %d:%s", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1413 iClusterIndex, i, prMSeq->sqinfo[iRealIndexI].name, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1414 uJ, prMSeq->sqinfo[iRealIndexJ].name); */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1415 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1416 if (1 == USE_EUCLIDEAN_DISTANCE) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1417 dist = EuclDist(ppdSeqVec[iRealIndexI], |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1418 ppdSeqVec[iRealIndexJ], iNumSeeds); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1419 } else { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1420 dist = CosDist(ppdSeqVec[iRealIndexI], |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1421 ppdSeqVec[iRealIndexJ], iNumSeeds); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1422 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1423 SymMatrixSetValue(prWithinClusterDistances, iI, uJ, dist); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1424 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1425 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1426 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1427 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1428 /* labels needed for the guide tree building routine only */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1429 ppcLabels = (char**) CKMALLOC(iNSeqInCluster * sizeof(char*)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1430 for (iI=0; iI<iNSeqInCluster; iI++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1431 #if FULL_WITHIN_CLUSTER_DISTANCES |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1432 ppcLabels[iI] = prSubClusterMSeq->sqinfo[iI].name; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1433 #else |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1434 int iRealIndex = prKMeansResult->ppiObjIndicesPerCluster[iClusterIndex][iI]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1435 ppcLabels[iI] = prMSeq->sqinfo[iRealIndex].name; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1436 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1437 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1438 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1439 Log(&rLog, LOG_FORCED_DEBUG, "Distance matrix for seqs within sub cluster %d/%d", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1440 iClusterIndex, prKMeansResult->iNClusters); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1441 SymMatrixPrint(prWithinClusterDistances, ppcLabels, NULL); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1442 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1443 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1444 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1445 GuideTreeUpgma(&prSubClusterTree, ppcLabels, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1446 prWithinClusterDistances, NULL); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1447 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1448 CKFREE(ppcLabels); /* don't free members, they just point */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1449 #if 0 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1450 Log(&rLog, LOG_FORCED_DEBUG, "Cluster %d guide-tree:", iClusterIndex); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1451 LogTree(prSubClusterTree); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1452 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1453 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1454 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1455 /* The guide tree id's (that point to the sequences) now start |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1456 * from 0, i.e. the association with the prMSeq numbering is |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1457 * broken and fixed in the following |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1458 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1459 for (iNodeIndex = 0; iNodeIndex < (int)GetNodeCount(prSubClusterTree); iNodeIndex++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1460 if (IsLeaf(iNodeIndex, prSubClusterTree)) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1461 int iLeafId = GetLeafId(iNodeIndex, prSubClusterTree); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1462 int iRealId = prKMeansResult->ppiObjIndicesPerCluster[iClusterIndex][iLeafId]; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1463 #if 0 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1464 Log(&rLog, LOG_FORCED_DEBUG, "Correcting leaf node %d which has (wrong) id %d and name %s to id %d (prMSeq name %s)", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1465 iNodeIndex, iLeafId, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1466 GetLeafName(iNodeIndex, prSubClusterTree), |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1467 iRealId, prMSeq->sqinfo[iRealId].name); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1468 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1469 SetLeafId(prSubClusterTree, iNodeIndex, iRealId); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1470 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1471 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1472 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1473 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1474 /* Append the newly created tree (prSubClusterTree) to the |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1475 * corresponding node index of prMbedTree_p. |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1476 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1477 #if TRACE |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1478 Log(&rLog, LOG_FORCED_DEBUG, "Will join trees at leaf node %d = %s", |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1479 piClusterToTreeNode[iClusterIndex], |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1480 GetLeafName(piClusterToTreeNode[iClusterIndex], *prMbedTree_p)); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1481 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1482 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1483 AppendTree(*prMbedTree_p, |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1484 piClusterToTreeNode[iClusterIndex], |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1485 prSubClusterTree); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1486 /* Note: piClusterToTreeNode is still valid, because |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1487 * AppendTrees() guarantees that no other than the node to |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1488 * append to changes. */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1489 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1490 #if 0 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1491 Log(&rLog, LOG_FORCED_DEBUG, "%s", "prMbedTree_p after cluster %d has appended:", iClusterIndex); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1492 LogTree(*prMbedTree_p); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1493 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1494 if (0) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1495 char fname[] = "mbed-joined-tree.dnd"; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1496 FILE *pfOut; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1497 if (NULL == (pfOut = fopen(fname, "w"))) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1498 Log(&rLog, LOG_FATAL, "Couldn't open %s for writing", fname); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1499 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1500 MuscleTreeToFile(pfOut, *prMbedTree_p); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1501 Log(&rLog, LOG_FORCED_DEBUG, "Joined tree written to %s", fname); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1502 fclose(pfOut); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1503 Log(&rLog, LOG_FATAL, "DEBUG EXIT"); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1504 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1505 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1506 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1507 /* cleanup |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1508 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1509 FreeMuscleTree(prSubClusterTree); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1510 FreeSymMatrix(&prWithinClusterDistances); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1511 #if FULL_WITHIN_CLUSTER_DISTANCES |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1512 FreeMSeq(&prSubClusterMSeq); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1513 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1514 } /* end for each cluster */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1515 ProgressDone(prSubClusterDistanceProgress); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1516 FreeProgress(&prSubClusterDistanceProgress); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1517 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1518 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1519 if (NULL != pcGuidetreeOut) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1520 if (NULL == (pfOut = fopen(pcGuidetreeOut, "w"))) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1521 Log(&rLog, LOG_ERROR, "Couldn't open %s for writing", pcGuidetreeOut); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1522 } else { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1523 MuscleTreeToFile(pfOut, *prMbedTree_p); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1524 Log(&rLog, LOG_INFO, "Guide tree written to %s", pcGuidetreeOut); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1525 (void) fclose(pfOut); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1526 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1527 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1528 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1529 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1530 /* cleanup |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1531 * |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1532 */ |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1533 #if MBED_TIMING |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1534 StopwatchStop(stopwatch); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1535 StopwatchDisplay(stdout, "mBed time (without pairwise distance computation): ", stopwatch); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1536 StopwatchFree(stopwatch); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1537 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1538 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1539 FreeKMeansResult(&prKMeansResult); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1540 FreeSymMatrix(&prPreClusterDistmat); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1541 for (iI=0; iI<prMSeq->nseqs; iI++) { |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1542 CKFREE(ppdSeqVec[iI]); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1543 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1544 CKFREE(ppdSeqVec); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1545 CKFREE(piClusterToTreeNode); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1546 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1547 #ifndef NDEBUG |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1548 TreeValidate(*prMbedTree_p); |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1549 #endif |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1550 |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1551 return 0; |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1552 } |
ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
clustalomega
parents:
diff
changeset
|
1553 /*** end: Mbed() ***/ |