Mercurial > repos > clustalomega > clustalomega
diff clustalomega/clustal-omega-0.2.0/src/squid/sre_math.c @ 0:ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
author | clustalomega |
---|---|
date | Tue, 07 Jun 2011 17:04:25 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clustalomega/clustal-omega-0.2.0/src/squid/sre_math.c Tue Jun 07 17:04:25 2011 -0400 @@ -0,0 +1,331 @@ +/***************************************************************** + * SQUID - a library of functions for biological sequence analysis + * Copyright (C) 1992-2002 Washington University School of Medicine + * + * This source code is freely distributed under the terms of the + * GNU General Public License. See the files COPYRIGHT and LICENSE + * for details. + *****************************************************************/ + +/* sre_math.c + * + * Portability for and extensions to C math library. + * RCS $Id: sre_math.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: sre_math.c,v 1.12 2002/10/09 14:26:09 eddy Exp) + */ + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include "squid.h" + + +/* Function: Linefit() + * + * Purpose: Given points x[0..N-1] and y[0..N-1], fit to + * a straight line y = a + bx. + * a, b, and the linear correlation coefficient r + * are filled in for return. + * + * Args: x - x values of data + * y - y values of data + * N - number of data points + * ret_a - RETURN: intercept + * ret_b - RETURN: slope + * ret_r - RETURN: correlation coefficient + * + * Return: 1 on success, 0 on failure. + */ +int +Linefit(float *x, float *y, int N, float *ret_a, float *ret_b, float *ret_r) +{ + float xavg, yavg; + float sxx, syy, sxy; + int i; + + /* Calculate averages, xavg and yavg + */ + xavg = yavg = 0.0; + for (i = 0; i < N; i++) + { + xavg += x[i]; + yavg += y[i]; + } + xavg /= (float) N; + yavg /= (float) N; + + sxx = syy = sxy = 0.0; + for (i = 0; i < N; i++) + { + sxx += (x[i] - xavg) * (x[i] - xavg); + syy += (y[i] - yavg) * (y[i] - xavg); + sxy += (x[i] - xavg) * (y[i] - yavg); + } + *ret_b = sxy / sxx; + *ret_a = yavg - xavg*(*ret_b); + *ret_r = sxy / (sqrt(sxx) * sqrt(syy)); + return 1; +} + + +/* Function: WeightedLinefit() + * + * Purpose: Given points x[0..N-1] and y[0..N-1] with + * variances (measurement errors) var[0..N-1], + * fit to a straight line y = mx + b. + * + * Method: Algorithm from Numerical Recipes in C, [Press88]. + * + * Return: (void) + * ret_m contains slope; ret_b contains intercept + */ +void +WeightedLinefit(float *x, float *y, float *var, int N, float *ret_m, float *ret_b) +{ + int i; + double s; + double sx, sy; + double sxx, sxy; + double delta; + double m, b; + + s = sx = sy = sxx = sxy = 0.; + for (i = 0; i < N; i++) + { + s += 1./var[i]; + sx += x[i] / var[i]; + sy += y[i] / var[i]; + sxx += x[i] * x[i] / var[i]; + sxy += x[i] * y[i] / var[i]; + } + + delta = s * sxx - (sx * sx); + b = (sxx * sy - sx * sxy) / delta; + m = (s * sxy - sx * sy) / delta; + + *ret_m = m; + *ret_b = b; +} + + +/* Function: Gammln() + * + * Returns the natural log of the gamma function of x. + * x is > 0.0. + * + * Adapted from a public domain implementation in the + * NCBI core math library. Thanks to John Spouge and + * the NCBI. (According to the NCBI, that's Dr. John + * "Gammas Galore" Spouge to you, pal.) + */ +double +Gammln(double x) +{ + int i; + double xx, tx; + double tmp, value; + static double cof[11] = { + 4.694580336184385e+04, + -1.560605207784446e+05, + 2.065049568014106e+05, + -1.388934775095388e+05, + 5.031796415085709e+04, + -9.601592329182778e+03, + 8.785855930895250e+02, + -3.155153906098611e+01, + 2.908143421162229e-01, + -2.319827630494973e-04, + 1.251639670050933e-10 + }; + + /* Protect against x=0. We see this in Dirichlet code, + * for terms alpha = 0. This is a severe hack but it is effective + * and (we think?) safe. (due to GJM) + */ + if (x <= 0.0) return 999999.; + + xx = x - 1.0; + tx = tmp = xx + 11.0; + value = 1.0; + for (i = 10; i >= 0; i--) /* sum least significant terms first */ + { + value += cof[i] / tmp; + tmp -= 1.0; + } + value = log(value); + tx += 0.5; + value += 0.918938533 + (xx+0.5)*log(tx) - tx; + return value; +} + + +/* 2D matrix operations + */ +float ** +FMX2Alloc(int rows, int cols) +{ + float **mx; + int r; + + mx = (float **) MallocOrDie(sizeof(float *) * rows); + mx[0] = (float *) MallocOrDie(sizeof(float) * rows * cols); + for (r = 1; r < rows; r++) + mx[r] = mx[0] + r*cols; + return mx; +} +void +FMX2Free(float **mx) +{ + free(mx[0]); + free(mx); +} +double ** +DMX2Alloc(int rows, int cols) +{ + double **mx; + int r; + + mx = (double **) MallocOrDie(sizeof(double *) * rows); + mx[0] = (double *) MallocOrDie(sizeof(double) * rows * cols); + for (r = 1; r < rows; r++) + mx[r] = mx[0] + r*cols; + return mx; +} +void +DMX2Free(double **mx) +{ + free(mx[0]); + free(mx); +} +/* Function: FMX2Multiply() + * + * Purpose: Matrix multiplication. + * Multiply an m x p matrix A by a p x n matrix B, + * giving an m x n matrix C. + * Matrix C must be a preallocated matrix of the right + * size. + */ +void +FMX2Multiply(float **A, float **B, float **C, int m, int p, int n) +{ + int i, j, k; + + for (i = 0; i < m; i++) + for (j = 0; j < n; j++) + { + C[i][j] = 0.; + for (k = 0; k < p; k++) + C[i][j] += A[i][p] * B[p][j]; + } +} + + +/* Function: IncompleteGamma() + * + * Purpose: Returns 1 - P(a,x) where: + * P(a,x) = \frac{1}{\Gamma(a)} \int_{0}^{x} t^{a-1} e^{-t} dt + * = \frac{\gamma(a,x)}{\Gamma(a)} + * = 1 - \frac{\Gamma(a,x)}{\Gamma(a)} + * + * Used in a chi-squared test: for a X^2 statistic x + * with v degrees of freedom, call: + * p = IncompleteGamma(v/2., x/2.) + * to get the probability p that a chi-squared value + * greater than x could be obtained by chance even for + * a correct model. (i.e. p should be large, say + * 0.95 or more). + * + * Method: Based on ideas from Numerical Recipes in C, Press et al., + * Cambridge University Press, 1988. + * + * Args: a - for instance, degrees of freedom / 2 [a > 0] + * x - for instance, chi-squared statistic / 2 [x >= 0] + * + * Return: 1 - P(a,x). + */ +double +IncompleteGamma(double a, double x) +{ + int iter; /* iteration counter */ + + if (a <= 0.) Die("IncompleteGamma(): a must be > 0"); + if (x < 0.) Die("IncompleteGamma(): x must be >= 0"); + + /* For x > a + 1 the following gives rapid convergence; + * calculate 1 - P(a,x) = \frac{\Gamma(a,x)}{\Gamma(a)}: + * use a continued fraction development for \Gamma(a,x). + */ + if (x > a+1) + { + double oldp; /* previous value of p */ + double nu0, nu1; /* numerators for continued fraction calc */ + double de0, de1; /* denominators for continued fraction calc */ + + nu0 = 0.; /* A_0 = 0 */ + de0 = 1.; /* B_0 = 1 */ + nu1 = 1.; /* A_1 = 1 */ + de1 = x; /* B_1 = x */ + + oldp = nu1; + for (iter = 1; iter < 100; iter++) + { + /* Continued fraction development: + * set A_j = b_j A_j-1 + a_j A_j-2 + * B_j = b_j B_j-1 + a_j B_j-2 + * We start with A_2, B_2. + */ + /* j = even: a_j = iter-a, b_j = 1 */ + /* A,B_j-2 are in nu0, de0; A,B_j-1 are in nu1,de1 */ + nu0 = nu1 + ((double)iter - a) * nu0; + de0 = de1 + ((double)iter - a) * de0; + + /* j = odd: a_j = iter, b_j = x */ + /* A,B_j-2 are in nu1, de1; A,B_j-1 in nu0,de0 */ + nu1 = x * nu0 + (double) iter * nu1; + de1 = x * de0 + (double) iter * de1; + + /* rescale */ + if (de1 != 0.) + { + nu0 /= de1; + de0 /= de1; + nu1 /= de1; + de1 = 1.; + } + /* check for convergence */ + if (fabs((nu1-oldp)/nu1) < 1.e-7) + return nu1 * exp(a * log(x) - x - Gammln(a)); + + oldp = nu1; + } + Die("IncompleteGamma(): failed to converge using continued fraction approx"); + } + else /* x <= a+1 */ + { + double p; /* current sum */ + double val; /* current value used in sum */ + + /* For x <= a+1 we use a convergent series instead: + * P(a,x) = \frac{\gamma(a,x)}{\Gamma(a)}, + * where + * \gamma(a,x) = e^{-x}x^a \sum_{n=0}{\infty} \frac{\Gamma{a}}{\Gamma{a+1+n}} x^n + * which looks appalling but the sum is in fact rearrangeable to + * a simple series without the \Gamma functions: + * = \frac{1}{a} + \frac{x}{a(a+1)} + \frac{x^2}{a(a+1)(a+2)} ... + * and it's obvious that this should converge nicely for x <= a+1. + */ + + p = val = 1. / a; + for (iter = 1; iter < 10000; iter++) + { + val *= x / (a+(double)iter); + p += val; + + if (fabs(val/p) < 1.e-7) + return 1. - p * exp(a * log(x) - x - Gammln(a)); + } + Die("IncompleteGamma(): failed to converge using series approx"); + } + /*NOTREACHED*/ + return 0.; +} +