Mercurial > repos > clustalomega > clustalomega
comparison clustalomega/clustal-omega-0.2.0/src/squid/sre_math.c @ 0:ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
author | clustalomega |
---|---|
date | Tue, 07 Jun 2011 17:04:25 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ff1768533a07 |
---|---|
1 /***************************************************************** | |
2 * SQUID - a library of functions for biological sequence analysis | |
3 * Copyright (C) 1992-2002 Washington University School of Medicine | |
4 * | |
5 * This source code is freely distributed under the terms of the | |
6 * GNU General Public License. See the files COPYRIGHT and LICENSE | |
7 * for details. | |
8 *****************************************************************/ | |
9 | |
10 /* sre_math.c | |
11 * | |
12 * Portability for and extensions to C math library. | |
13 * RCS $Id: sre_math.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: sre_math.c,v 1.12 2002/10/09 14:26:09 eddy Exp) | |
14 */ | |
15 | |
16 #include <stdio.h> | |
17 #include <stdlib.h> | |
18 #include <math.h> | |
19 #include "squid.h" | |
20 | |
21 | |
22 /* Function: Linefit() | |
23 * | |
24 * Purpose: Given points x[0..N-1] and y[0..N-1], fit to | |
25 * a straight line y = a + bx. | |
26 * a, b, and the linear correlation coefficient r | |
27 * are filled in for return. | |
28 * | |
29 * Args: x - x values of data | |
30 * y - y values of data | |
31 * N - number of data points | |
32 * ret_a - RETURN: intercept | |
33 * ret_b - RETURN: slope | |
34 * ret_r - RETURN: correlation coefficient | |
35 * | |
36 * Return: 1 on success, 0 on failure. | |
37 */ | |
38 int | |
39 Linefit(float *x, float *y, int N, float *ret_a, float *ret_b, float *ret_r) | |
40 { | |
41 float xavg, yavg; | |
42 float sxx, syy, sxy; | |
43 int i; | |
44 | |
45 /* Calculate averages, xavg and yavg | |
46 */ | |
47 xavg = yavg = 0.0; | |
48 for (i = 0; i < N; i++) | |
49 { | |
50 xavg += x[i]; | |
51 yavg += y[i]; | |
52 } | |
53 xavg /= (float) N; | |
54 yavg /= (float) N; | |
55 | |
56 sxx = syy = sxy = 0.0; | |
57 for (i = 0; i < N; i++) | |
58 { | |
59 sxx += (x[i] - xavg) * (x[i] - xavg); | |
60 syy += (y[i] - yavg) * (y[i] - xavg); | |
61 sxy += (x[i] - xavg) * (y[i] - yavg); | |
62 } | |
63 *ret_b = sxy / sxx; | |
64 *ret_a = yavg - xavg*(*ret_b); | |
65 *ret_r = sxy / (sqrt(sxx) * sqrt(syy)); | |
66 return 1; | |
67 } | |
68 | |
69 | |
70 /* Function: WeightedLinefit() | |
71 * | |
72 * Purpose: Given points x[0..N-1] and y[0..N-1] with | |
73 * variances (measurement errors) var[0..N-1], | |
74 * fit to a straight line y = mx + b. | |
75 * | |
76 * Method: Algorithm from Numerical Recipes in C, [Press88]. | |
77 * | |
78 * Return: (void) | |
79 * ret_m contains slope; ret_b contains intercept | |
80 */ | |
81 void | |
82 WeightedLinefit(float *x, float *y, float *var, int N, float *ret_m, float *ret_b) | |
83 { | |
84 int i; | |
85 double s; | |
86 double sx, sy; | |
87 double sxx, sxy; | |
88 double delta; | |
89 double m, b; | |
90 | |
91 s = sx = sy = sxx = sxy = 0.; | |
92 for (i = 0; i < N; i++) | |
93 { | |
94 s += 1./var[i]; | |
95 sx += x[i] / var[i]; | |
96 sy += y[i] / var[i]; | |
97 sxx += x[i] * x[i] / var[i]; | |
98 sxy += x[i] * y[i] / var[i]; | |
99 } | |
100 | |
101 delta = s * sxx - (sx * sx); | |
102 b = (sxx * sy - sx * sxy) / delta; | |
103 m = (s * sxy - sx * sy) / delta; | |
104 | |
105 *ret_m = m; | |
106 *ret_b = b; | |
107 } | |
108 | |
109 | |
110 /* Function: Gammln() | |
111 * | |
112 * Returns the natural log of the gamma function of x. | |
113 * x is > 0.0. | |
114 * | |
115 * Adapted from a public domain implementation in the | |
116 * NCBI core math library. Thanks to John Spouge and | |
117 * the NCBI. (According to the NCBI, that's Dr. John | |
118 * "Gammas Galore" Spouge to you, pal.) | |
119 */ | |
120 double | |
121 Gammln(double x) | |
122 { | |
123 int i; | |
124 double xx, tx; | |
125 double tmp, value; | |
126 static double cof[11] = { | |
127 4.694580336184385e+04, | |
128 -1.560605207784446e+05, | |
129 2.065049568014106e+05, | |
130 -1.388934775095388e+05, | |
131 5.031796415085709e+04, | |
132 -9.601592329182778e+03, | |
133 8.785855930895250e+02, | |
134 -3.155153906098611e+01, | |
135 2.908143421162229e-01, | |
136 -2.319827630494973e-04, | |
137 1.251639670050933e-10 | |
138 }; | |
139 | |
140 /* Protect against x=0. We see this in Dirichlet code, | |
141 * for terms alpha = 0. This is a severe hack but it is effective | |
142 * and (we think?) safe. (due to GJM) | |
143 */ | |
144 if (x <= 0.0) return 999999.; | |
145 | |
146 xx = x - 1.0; | |
147 tx = tmp = xx + 11.0; | |
148 value = 1.0; | |
149 for (i = 10; i >= 0; i--) /* sum least significant terms first */ | |
150 { | |
151 value += cof[i] / tmp; | |
152 tmp -= 1.0; | |
153 } | |
154 value = log(value); | |
155 tx += 0.5; | |
156 value += 0.918938533 + (xx+0.5)*log(tx) - tx; | |
157 return value; | |
158 } | |
159 | |
160 | |
161 /* 2D matrix operations | |
162 */ | |
163 float ** | |
164 FMX2Alloc(int rows, int cols) | |
165 { | |
166 float **mx; | |
167 int r; | |
168 | |
169 mx = (float **) MallocOrDie(sizeof(float *) * rows); | |
170 mx[0] = (float *) MallocOrDie(sizeof(float) * rows * cols); | |
171 for (r = 1; r < rows; r++) | |
172 mx[r] = mx[0] + r*cols; | |
173 return mx; | |
174 } | |
175 void | |
176 FMX2Free(float **mx) | |
177 { | |
178 free(mx[0]); | |
179 free(mx); | |
180 } | |
181 double ** | |
182 DMX2Alloc(int rows, int cols) | |
183 { | |
184 double **mx; | |
185 int r; | |
186 | |
187 mx = (double **) MallocOrDie(sizeof(double *) * rows); | |
188 mx[0] = (double *) MallocOrDie(sizeof(double) * rows * cols); | |
189 for (r = 1; r < rows; r++) | |
190 mx[r] = mx[0] + r*cols; | |
191 return mx; | |
192 } | |
193 void | |
194 DMX2Free(double **mx) | |
195 { | |
196 free(mx[0]); | |
197 free(mx); | |
198 } | |
199 /* Function: FMX2Multiply() | |
200 * | |
201 * Purpose: Matrix multiplication. | |
202 * Multiply an m x p matrix A by a p x n matrix B, | |
203 * giving an m x n matrix C. | |
204 * Matrix C must be a preallocated matrix of the right | |
205 * size. | |
206 */ | |
207 void | |
208 FMX2Multiply(float **A, float **B, float **C, int m, int p, int n) | |
209 { | |
210 int i, j, k; | |
211 | |
212 for (i = 0; i < m; i++) | |
213 for (j = 0; j < n; j++) | |
214 { | |
215 C[i][j] = 0.; | |
216 for (k = 0; k < p; k++) | |
217 C[i][j] += A[i][p] * B[p][j]; | |
218 } | |
219 } | |
220 | |
221 | |
222 /* Function: IncompleteGamma() | |
223 * | |
224 * Purpose: Returns 1 - P(a,x) where: | |
225 * P(a,x) = \frac{1}{\Gamma(a)} \int_{0}^{x} t^{a-1} e^{-t} dt | |
226 * = \frac{\gamma(a,x)}{\Gamma(a)} | |
227 * = 1 - \frac{\Gamma(a,x)}{\Gamma(a)} | |
228 * | |
229 * Used in a chi-squared test: for a X^2 statistic x | |
230 * with v degrees of freedom, call: | |
231 * p = IncompleteGamma(v/2., x/2.) | |
232 * to get the probability p that a chi-squared value | |
233 * greater than x could be obtained by chance even for | |
234 * a correct model. (i.e. p should be large, say | |
235 * 0.95 or more). | |
236 * | |
237 * Method: Based on ideas from Numerical Recipes in C, Press et al., | |
238 * Cambridge University Press, 1988. | |
239 * | |
240 * Args: a - for instance, degrees of freedom / 2 [a > 0] | |
241 * x - for instance, chi-squared statistic / 2 [x >= 0] | |
242 * | |
243 * Return: 1 - P(a,x). | |
244 */ | |
245 double | |
246 IncompleteGamma(double a, double x) | |
247 { | |
248 int iter; /* iteration counter */ | |
249 | |
250 if (a <= 0.) Die("IncompleteGamma(): a must be > 0"); | |
251 if (x < 0.) Die("IncompleteGamma(): x must be >= 0"); | |
252 | |
253 /* For x > a + 1 the following gives rapid convergence; | |
254 * calculate 1 - P(a,x) = \frac{\Gamma(a,x)}{\Gamma(a)}: | |
255 * use a continued fraction development for \Gamma(a,x). | |
256 */ | |
257 if (x > a+1) | |
258 { | |
259 double oldp; /* previous value of p */ | |
260 double nu0, nu1; /* numerators for continued fraction calc */ | |
261 double de0, de1; /* denominators for continued fraction calc */ | |
262 | |
263 nu0 = 0.; /* A_0 = 0 */ | |
264 de0 = 1.; /* B_0 = 1 */ | |
265 nu1 = 1.; /* A_1 = 1 */ | |
266 de1 = x; /* B_1 = x */ | |
267 | |
268 oldp = nu1; | |
269 for (iter = 1; iter < 100; iter++) | |
270 { | |
271 /* Continued fraction development: | |
272 * set A_j = b_j A_j-1 + a_j A_j-2 | |
273 * B_j = b_j B_j-1 + a_j B_j-2 | |
274 * We start with A_2, B_2. | |
275 */ | |
276 /* j = even: a_j = iter-a, b_j = 1 */ | |
277 /* A,B_j-2 are in nu0, de0; A,B_j-1 are in nu1,de1 */ | |
278 nu0 = nu1 + ((double)iter - a) * nu0; | |
279 de0 = de1 + ((double)iter - a) * de0; | |
280 | |
281 /* j = odd: a_j = iter, b_j = x */ | |
282 /* A,B_j-2 are in nu1, de1; A,B_j-1 in nu0,de0 */ | |
283 nu1 = x * nu0 + (double) iter * nu1; | |
284 de1 = x * de0 + (double) iter * de1; | |
285 | |
286 /* rescale */ | |
287 if (de1 != 0.) | |
288 { | |
289 nu0 /= de1; | |
290 de0 /= de1; | |
291 nu1 /= de1; | |
292 de1 = 1.; | |
293 } | |
294 /* check for convergence */ | |
295 if (fabs((nu1-oldp)/nu1) < 1.e-7) | |
296 return nu1 * exp(a * log(x) - x - Gammln(a)); | |
297 | |
298 oldp = nu1; | |
299 } | |
300 Die("IncompleteGamma(): failed to converge using continued fraction approx"); | |
301 } | |
302 else /* x <= a+1 */ | |
303 { | |
304 double p; /* current sum */ | |
305 double val; /* current value used in sum */ | |
306 | |
307 /* For x <= a+1 we use a convergent series instead: | |
308 * P(a,x) = \frac{\gamma(a,x)}{\Gamma(a)}, | |
309 * where | |
310 * \gamma(a,x) = e^{-x}x^a \sum_{n=0}{\infty} \frac{\Gamma{a}}{\Gamma{a+1+n}} x^n | |
311 * which looks appalling but the sum is in fact rearrangeable to | |
312 * a simple series without the \Gamma functions: | |
313 * = \frac{1}{a} + \frac{x}{a(a+1)} + \frac{x^2}{a(a+1)(a+2)} ... | |
314 * and it's obvious that this should converge nicely for x <= a+1. | |
315 */ | |
316 | |
317 p = val = 1. / a; | |
318 for (iter = 1; iter < 10000; iter++) | |
319 { | |
320 val *= x / (a+(double)iter); | |
321 p += val; | |
322 | |
323 if (fabs(val/p) < 1.e-7) | |
324 return 1. - p * exp(a * log(x) - x - Gammln(a)); | |
325 } | |
326 Die("IncompleteGamma(): failed to converge using series approx"); | |
327 } | |
328 /*NOTREACHED*/ | |
329 return 0.; | |
330 } | |
331 |