diff clustalomega/clustal-omega-0.2.0/src/squid/sre_math.c @ 0:ff1768533a07

Migrated tool version 0.2 from old tool shed archive to new tool shed repository
author clustalomega
date Tue, 07 Jun 2011 17:04:25 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/clustalomega/clustal-omega-0.2.0/src/squid/sre_math.c	Tue Jun 07 17:04:25 2011 -0400
@@ -0,0 +1,331 @@
+/*****************************************************************
+ * SQUID - a library of functions for biological sequence analysis
+ * Copyright (C) 1992-2002 Washington University School of Medicine
+ * 
+ *     This source code is freely distributed under the terms of the
+ *     GNU General Public License. See the files COPYRIGHT and LICENSE
+ *     for details.
+ *****************************************************************/
+
+/* sre_math.c
+ * 
+ * Portability for and extensions to C math library.
+ * RCS $Id: sre_math.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: sre_math.c,v 1.12 2002/10/09 14:26:09 eddy Exp)
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "squid.h"
+
+  
+/* Function: Linefit()
+ * 
+ * Purpose:  Given points x[0..N-1] and y[0..N-1], fit to
+ *           a straight line y = a + bx.
+ *           a, b, and the linear correlation coefficient r
+ *           are filled in for return.
+ *           
+ * Args:     x     - x values of data
+ *           y     - y values of data               
+ *           N     - number of data points
+ *           ret_a - RETURN: intercept
+ *           ret_b - RETURN: slope
+ *           ret_r - RETURN: correlation coefficient  
+ *           
+ * Return:   1 on success, 0 on failure.
+ */
+int          
+Linefit(float *x, float *y, int N, float *ret_a, float *ret_b, float *ret_r) 
+{				
+  float xavg, yavg;
+  float sxx, syy, sxy;
+  int   i;
+  
+  /* Calculate averages, xavg and yavg
+   */
+  xavg = yavg = 0.0;
+  for (i = 0; i < N; i++)
+    {
+      xavg += x[i];
+      yavg += y[i];
+    }
+  xavg /= (float) N;
+  yavg /= (float) N;
+
+  sxx = syy = sxy = 0.0;
+  for (i = 0; i < N; i++)
+    {
+      sxx    += (x[i] - xavg) * (x[i] - xavg);
+      syy    += (y[i] - yavg) * (y[i] - xavg);
+      sxy    += (x[i] - xavg) * (y[i] - yavg);
+    }
+  *ret_b = sxy / sxx;
+  *ret_a = yavg - xavg*(*ret_b);
+  *ret_r = sxy / (sqrt(sxx) * sqrt(syy));
+  return 1;
+}
+
+
+/* Function: WeightedLinefit()
+ * 
+ * Purpose:  Given points x[0..N-1] and y[0..N-1] with
+ *           variances (measurement errors) var[0..N-1],  
+ *           fit to a straight line y = mx + b.
+ *           
+ * Method:   Algorithm from Numerical Recipes in C, [Press88].
+ *           
+ * Return:   (void)
+ *           ret_m contains slope; ret_b contains intercept 
+ */                
+void
+WeightedLinefit(float *x, float *y, float *var, int N, float *ret_m, float *ret_b) 
+{
+  int    i;
+  double s;
+  double sx, sy;
+  double sxx, sxy;
+  double delta;
+  double m, b;
+  
+  s = sx = sy = sxx = sxy = 0.;
+  for (i = 0; i < N; i++)
+    {
+      s   += 1./var[i];
+      sx  += x[i] / var[i];
+      sy  += y[i] / var[i];
+      sxx += x[i] * x[i] / var[i];
+      sxy += x[i] * y[i] / var[i];
+    }
+
+  delta = s * sxx - (sx * sx);
+  b = (sxx * sy - sx * sxy) / delta;
+  m = (s * sxy - sx * sy) / delta;
+
+  *ret_m = m;
+  *ret_b = b;
+}
+  
+
+/* Function: Gammln()
+ *
+ * Returns the natural log of the gamma function of x.
+ * x is > 0.0.  
+ *
+ * Adapted from a public domain implementation in the
+ * NCBI core math library. Thanks to John Spouge and
+ * the NCBI. (According to the NCBI, that's Dr. John
+ * "Gammas Galore" Spouge to you, pal.)
+ */
+double
+Gammln(double x)
+{
+  int i;
+  double xx, tx;
+  double tmp, value;
+  static double cof[11] = {
+    4.694580336184385e+04,
+    -1.560605207784446e+05,
+    2.065049568014106e+05,
+    -1.388934775095388e+05,
+    5.031796415085709e+04,
+    -9.601592329182778e+03,
+    8.785855930895250e+02,
+    -3.155153906098611e+01,
+    2.908143421162229e-01,
+    -2.319827630494973e-04,
+    1.251639670050933e-10
+  };
+  
+  /* Protect against x=0. We see this in Dirichlet code,
+   * for terms alpha = 0. This is a severe hack but it is effective
+   * and (we think?) safe. (due to GJM)
+   */ 
+  if (x <= 0.0) return 999999.; 
+
+  xx       = x - 1.0;
+  tx = tmp = xx + 11.0;
+  value    = 1.0;
+  for (i = 10; i >= 0; i--)	/* sum least significant terms first */
+    {
+      value += cof[i] / tmp;
+      tmp   -= 1.0;
+    }
+  value  = log(value);
+  tx    += 0.5;
+  value += 0.918938533 + (xx+0.5)*log(tx) - tx;
+  return value;
+}
+
+
+/* 2D matrix operations
+ */
+float **
+FMX2Alloc(int rows, int cols)
+{
+  float **mx;
+  int     r;
+  
+  mx    = (float **) MallocOrDie(sizeof(float *) * rows);
+  mx[0] = (float *)  MallocOrDie(sizeof(float) * rows * cols);
+  for (r = 1; r < rows; r++)
+    mx[r] = mx[0] + r*cols;
+  return mx;
+}
+void
+FMX2Free(float **mx)
+{
+  free(mx[0]);
+  free(mx);
+}
+double **
+DMX2Alloc(int rows, int cols)
+{
+  double **mx;
+  int      r;
+  
+  mx    = (double **) MallocOrDie(sizeof(double *) * rows);
+  mx[0] = (double *)  MallocOrDie(sizeof(double) * rows * cols);
+  for (r = 1; r < rows; r++)
+    mx[r] = mx[0] + r*cols;
+  return mx;
+}
+void
+DMX2Free(double **mx)
+{
+  free(mx[0]);
+  free(mx);
+}
+/* Function: FMX2Multiply()
+ * 
+ * Purpose:  Matrix multiplication.
+ *           Multiply an m x p matrix A by a p x n matrix B,
+ *           giving an m x n matrix C.
+ *           Matrix C must be a preallocated matrix of the right
+ *           size.
+ */
+void
+FMX2Multiply(float **A, float **B, float **C, int m, int p, int n)
+{
+  int i, j, k;
+
+  for (i = 0; i < m; i++)
+    for (j = 0; j < n; j++)
+      {
+	C[i][j] = 0.;
+	for (k = 0; k < p; k++)
+	  C[i][j] += A[i][p] * B[p][j];
+      }
+}
+
+
+/* Function: IncompleteGamma()
+ * 
+ * Purpose:  Returns 1 - P(a,x) where:
+ *           P(a,x) = \frac{1}{\Gamma(a)} \int_{0}^{x} t^{a-1} e^{-t} dt
+ *                  = \frac{\gamma(a,x)}{\Gamma(a)}
+ *                  = 1 - \frac{\Gamma(a,x)}{\Gamma(a)}
+ *                  
+ *           Used in a chi-squared test: for a X^2 statistic x
+ *           with v degrees of freedom, call:
+ *                  p = IncompleteGamma(v/2., x/2.) 
+ *           to get the probability p that a chi-squared value
+ *           greater than x could be obtained by chance even for
+ *           a correct model. (i.e. p should be large, say 
+ *           0.95 or more).
+ *           
+ * Method:   Based on ideas from Numerical Recipes in C, Press et al.,
+ *           Cambridge University Press, 1988. 
+ *           
+ * Args:     a  - for instance, degrees of freedom / 2     [a > 0]
+ *           x  - for instance, chi-squared statistic / 2  [x >= 0] 
+ *           
+ * Return:   1 - P(a,x).
+ */          
+double
+IncompleteGamma(double a, double x)
+{
+  int iter;			/* iteration counter */
+
+  if (a <= 0.) Die("IncompleteGamma(): a must be > 0");
+  if (x <  0.) Die("IncompleteGamma(): x must be >= 0");
+
+  /* For x > a + 1 the following gives rapid convergence;
+   * calculate 1 - P(a,x) = \frac{\Gamma(a,x)}{\Gamma(a)}:
+   *     use a continued fraction development for \Gamma(a,x).
+   */
+  if (x > a+1) 
+    {
+      double oldp;		/* previous value of p    */
+      double nu0, nu1;		/* numerators for continued fraction calc   */
+      double de0, de1;		/* denominators for continued fraction calc */
+
+      nu0 = 0.;			/* A_0 = 0       */
+      de0 = 1.;			/* B_0 = 1       */
+      nu1 = 1.;			/* A_1 = 1       */
+      de1 = x;			/* B_1 = x       */
+
+      oldp = nu1;
+      for (iter = 1; iter < 100; iter++)
+	{
+	  /* Continued fraction development:
+	   * set A_j = b_j A_j-1 + a_j A_j-2
+	   *     B_j = b_j B_j-1 + a_j B_j-2
+           * We start with A_2, B_2.
+	   */
+				/* j = even: a_j = iter-a, b_j = 1 */
+				/* A,B_j-2 are in nu0, de0; A,B_j-1 are in nu1,de1 */
+	  nu0 = nu1 + ((double)iter - a) * nu0;
+	  de0 = de1 + ((double)iter - a) * de0;
+
+				/* j = odd: a_j = iter, b_j = x */
+				/* A,B_j-2 are in nu1, de1; A,B_j-1 in nu0,de0 */
+	  nu1 = x * nu0 + (double) iter * nu1;
+	  de1 = x * de0 + (double) iter * de1;
+
+				/* rescale */
+	  if (de1 != 0.) 
+	    { 
+	      nu0 /= de1; 
+	      de0 /= de1;
+	      nu1 /= de1;
+	      de1 =  1.;
+	    }
+				/* check for convergence */
+	  if (fabs((nu1-oldp)/nu1) < 1.e-7)
+	    return nu1 * exp(a * log(x) - x - Gammln(a));
+
+	  oldp = nu1;
+	}
+      Die("IncompleteGamma(): failed to converge using continued fraction approx");
+    }
+  else /* x <= a+1 */
+    {
+      double p;			/* current sum               */
+      double val;		/* current value used in sum */
+
+      /* For x <= a+1 we use a convergent series instead:
+       *   P(a,x) = \frac{\gamma(a,x)}{\Gamma(a)},
+       * where
+       *   \gamma(a,x) = e^{-x}x^a \sum_{n=0}{\infty} \frac{\Gamma{a}}{\Gamma{a+1+n}} x^n
+       * which looks appalling but the sum is in fact rearrangeable to
+       * a simple series without the \Gamma functions:
+       *   = \frac{1}{a} + \frac{x}{a(a+1)} + \frac{x^2}{a(a+1)(a+2)} ...
+       * and it's obvious that this should converge nicely for x <= a+1.
+       */
+      
+      p = val = 1. / a;
+      for (iter = 1; iter < 10000; iter++)
+	{
+	  val *= x / (a+(double)iter);
+	  p   += val;
+	  
+	  if (fabs(val/p) < 1.e-7)
+	    return 1. - p * exp(a * log(x) - x - Gammln(a));
+	}
+      Die("IncompleteGamma(): failed to converge using series approx");
+    }
+  /*NOTREACHED*/
+  return 0.;
+}
+