comparison clustalomega/clustal-omega-0.2.0/src/squid/sre_math.c @ 0:ff1768533a07

Migrated tool version 0.2 from old tool shed archive to new tool shed repository
author clustalomega
date Tue, 07 Jun 2011 17:04:25 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:ff1768533a07
1 /*****************************************************************
2 * SQUID - a library of functions for biological sequence analysis
3 * Copyright (C) 1992-2002 Washington University School of Medicine
4 *
5 * This source code is freely distributed under the terms of the
6 * GNU General Public License. See the files COPYRIGHT and LICENSE
7 * for details.
8 *****************************************************************/
9
10 /* sre_math.c
11 *
12 * Portability for and extensions to C math library.
13 * RCS $Id: sre_math.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: sre_math.c,v 1.12 2002/10/09 14:26:09 eddy Exp)
14 */
15
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <math.h>
19 #include "squid.h"
20
21
22 /* Function: Linefit()
23 *
24 * Purpose: Given points x[0..N-1] and y[0..N-1], fit to
25 * a straight line y = a + bx.
26 * a, b, and the linear correlation coefficient r
27 * are filled in for return.
28 *
29 * Args: x - x values of data
30 * y - y values of data
31 * N - number of data points
32 * ret_a - RETURN: intercept
33 * ret_b - RETURN: slope
34 * ret_r - RETURN: correlation coefficient
35 *
36 * Return: 1 on success, 0 on failure.
37 */
38 int
39 Linefit(float *x, float *y, int N, float *ret_a, float *ret_b, float *ret_r)
40 {
41 float xavg, yavg;
42 float sxx, syy, sxy;
43 int i;
44
45 /* Calculate averages, xavg and yavg
46 */
47 xavg = yavg = 0.0;
48 for (i = 0; i < N; i++)
49 {
50 xavg += x[i];
51 yavg += y[i];
52 }
53 xavg /= (float) N;
54 yavg /= (float) N;
55
56 sxx = syy = sxy = 0.0;
57 for (i = 0; i < N; i++)
58 {
59 sxx += (x[i] - xavg) * (x[i] - xavg);
60 syy += (y[i] - yavg) * (y[i] - xavg);
61 sxy += (x[i] - xavg) * (y[i] - yavg);
62 }
63 *ret_b = sxy / sxx;
64 *ret_a = yavg - xavg*(*ret_b);
65 *ret_r = sxy / (sqrt(sxx) * sqrt(syy));
66 return 1;
67 }
68
69
70 /* Function: WeightedLinefit()
71 *
72 * Purpose: Given points x[0..N-1] and y[0..N-1] with
73 * variances (measurement errors) var[0..N-1],
74 * fit to a straight line y = mx + b.
75 *
76 * Method: Algorithm from Numerical Recipes in C, [Press88].
77 *
78 * Return: (void)
79 * ret_m contains slope; ret_b contains intercept
80 */
81 void
82 WeightedLinefit(float *x, float *y, float *var, int N, float *ret_m, float *ret_b)
83 {
84 int i;
85 double s;
86 double sx, sy;
87 double sxx, sxy;
88 double delta;
89 double m, b;
90
91 s = sx = sy = sxx = sxy = 0.;
92 for (i = 0; i < N; i++)
93 {
94 s += 1./var[i];
95 sx += x[i] / var[i];
96 sy += y[i] / var[i];
97 sxx += x[i] * x[i] / var[i];
98 sxy += x[i] * y[i] / var[i];
99 }
100
101 delta = s * sxx - (sx * sx);
102 b = (sxx * sy - sx * sxy) / delta;
103 m = (s * sxy - sx * sy) / delta;
104
105 *ret_m = m;
106 *ret_b = b;
107 }
108
109
110 /* Function: Gammln()
111 *
112 * Returns the natural log of the gamma function of x.
113 * x is > 0.0.
114 *
115 * Adapted from a public domain implementation in the
116 * NCBI core math library. Thanks to John Spouge and
117 * the NCBI. (According to the NCBI, that's Dr. John
118 * "Gammas Galore" Spouge to you, pal.)
119 */
120 double
121 Gammln(double x)
122 {
123 int i;
124 double xx, tx;
125 double tmp, value;
126 static double cof[11] = {
127 4.694580336184385e+04,
128 -1.560605207784446e+05,
129 2.065049568014106e+05,
130 -1.388934775095388e+05,
131 5.031796415085709e+04,
132 -9.601592329182778e+03,
133 8.785855930895250e+02,
134 -3.155153906098611e+01,
135 2.908143421162229e-01,
136 -2.319827630494973e-04,
137 1.251639670050933e-10
138 };
139
140 /* Protect against x=0. We see this in Dirichlet code,
141 * for terms alpha = 0. This is a severe hack but it is effective
142 * and (we think?) safe. (due to GJM)
143 */
144 if (x <= 0.0) return 999999.;
145
146 xx = x - 1.0;
147 tx = tmp = xx + 11.0;
148 value = 1.0;
149 for (i = 10; i >= 0; i--) /* sum least significant terms first */
150 {
151 value += cof[i] / tmp;
152 tmp -= 1.0;
153 }
154 value = log(value);
155 tx += 0.5;
156 value += 0.918938533 + (xx+0.5)*log(tx) - tx;
157 return value;
158 }
159
160
161 /* 2D matrix operations
162 */
163 float **
164 FMX2Alloc(int rows, int cols)
165 {
166 float **mx;
167 int r;
168
169 mx = (float **) MallocOrDie(sizeof(float *) * rows);
170 mx[0] = (float *) MallocOrDie(sizeof(float) * rows * cols);
171 for (r = 1; r < rows; r++)
172 mx[r] = mx[0] + r*cols;
173 return mx;
174 }
175 void
176 FMX2Free(float **mx)
177 {
178 free(mx[0]);
179 free(mx);
180 }
181 double **
182 DMX2Alloc(int rows, int cols)
183 {
184 double **mx;
185 int r;
186
187 mx = (double **) MallocOrDie(sizeof(double *) * rows);
188 mx[0] = (double *) MallocOrDie(sizeof(double) * rows * cols);
189 for (r = 1; r < rows; r++)
190 mx[r] = mx[0] + r*cols;
191 return mx;
192 }
193 void
194 DMX2Free(double **mx)
195 {
196 free(mx[0]);
197 free(mx);
198 }
199 /* Function: FMX2Multiply()
200 *
201 * Purpose: Matrix multiplication.
202 * Multiply an m x p matrix A by a p x n matrix B,
203 * giving an m x n matrix C.
204 * Matrix C must be a preallocated matrix of the right
205 * size.
206 */
207 void
208 FMX2Multiply(float **A, float **B, float **C, int m, int p, int n)
209 {
210 int i, j, k;
211
212 for (i = 0; i < m; i++)
213 for (j = 0; j < n; j++)
214 {
215 C[i][j] = 0.;
216 for (k = 0; k < p; k++)
217 C[i][j] += A[i][p] * B[p][j];
218 }
219 }
220
221
222 /* Function: IncompleteGamma()
223 *
224 * Purpose: Returns 1 - P(a,x) where:
225 * P(a,x) = \frac{1}{\Gamma(a)} \int_{0}^{x} t^{a-1} e^{-t} dt
226 * = \frac{\gamma(a,x)}{\Gamma(a)}
227 * = 1 - \frac{\Gamma(a,x)}{\Gamma(a)}
228 *
229 * Used in a chi-squared test: for a X^2 statistic x
230 * with v degrees of freedom, call:
231 * p = IncompleteGamma(v/2., x/2.)
232 * to get the probability p that a chi-squared value
233 * greater than x could be obtained by chance even for
234 * a correct model. (i.e. p should be large, say
235 * 0.95 or more).
236 *
237 * Method: Based on ideas from Numerical Recipes in C, Press et al.,
238 * Cambridge University Press, 1988.
239 *
240 * Args: a - for instance, degrees of freedom / 2 [a > 0]
241 * x - for instance, chi-squared statistic / 2 [x >= 0]
242 *
243 * Return: 1 - P(a,x).
244 */
245 double
246 IncompleteGamma(double a, double x)
247 {
248 int iter; /* iteration counter */
249
250 if (a <= 0.) Die("IncompleteGamma(): a must be > 0");
251 if (x < 0.) Die("IncompleteGamma(): x must be >= 0");
252
253 /* For x > a + 1 the following gives rapid convergence;
254 * calculate 1 - P(a,x) = \frac{\Gamma(a,x)}{\Gamma(a)}:
255 * use a continued fraction development for \Gamma(a,x).
256 */
257 if (x > a+1)
258 {
259 double oldp; /* previous value of p */
260 double nu0, nu1; /* numerators for continued fraction calc */
261 double de0, de1; /* denominators for continued fraction calc */
262
263 nu0 = 0.; /* A_0 = 0 */
264 de0 = 1.; /* B_0 = 1 */
265 nu1 = 1.; /* A_1 = 1 */
266 de1 = x; /* B_1 = x */
267
268 oldp = nu1;
269 for (iter = 1; iter < 100; iter++)
270 {
271 /* Continued fraction development:
272 * set A_j = b_j A_j-1 + a_j A_j-2
273 * B_j = b_j B_j-1 + a_j B_j-2
274 * We start with A_2, B_2.
275 */
276 /* j = even: a_j = iter-a, b_j = 1 */
277 /* A,B_j-2 are in nu0, de0; A,B_j-1 are in nu1,de1 */
278 nu0 = nu1 + ((double)iter - a) * nu0;
279 de0 = de1 + ((double)iter - a) * de0;
280
281 /* j = odd: a_j = iter, b_j = x */
282 /* A,B_j-2 are in nu1, de1; A,B_j-1 in nu0,de0 */
283 nu1 = x * nu0 + (double) iter * nu1;
284 de1 = x * de0 + (double) iter * de1;
285
286 /* rescale */
287 if (de1 != 0.)
288 {
289 nu0 /= de1;
290 de0 /= de1;
291 nu1 /= de1;
292 de1 = 1.;
293 }
294 /* check for convergence */
295 if (fabs((nu1-oldp)/nu1) < 1.e-7)
296 return nu1 * exp(a * log(x) - x - Gammln(a));
297
298 oldp = nu1;
299 }
300 Die("IncompleteGamma(): failed to converge using continued fraction approx");
301 }
302 else /* x <= a+1 */
303 {
304 double p; /* current sum */
305 double val; /* current value used in sum */
306
307 /* For x <= a+1 we use a convergent series instead:
308 * P(a,x) = \frac{\gamma(a,x)}{\Gamma(a)},
309 * where
310 * \gamma(a,x) = e^{-x}x^a \sum_{n=0}{\infty} \frac{\Gamma{a}}{\Gamma{a+1+n}} x^n
311 * which looks appalling but the sum is in fact rearrangeable to
312 * a simple series without the \Gamma functions:
313 * = \frac{1}{a} + \frac{x}{a(a+1)} + \frac{x^2}{a(a+1)(a+2)} ...
314 * and it's obvious that this should converge nicely for x <= a+1.
315 */
316
317 p = val = 1. / a;
318 for (iter = 1; iter < 10000; iter++)
319 {
320 val *= x / (a+(double)iter);
321 p += val;
322
323 if (fabs(val/p) < 1.e-7)
324 return 1. - p * exp(a * log(x) - x - Gammln(a));
325 }
326 Die("IncompleteGamma(): failed to converge using series approx");
327 }
328 /*NOTREACHED*/
329 return 0.;
330 }
331