Mercurial > repos > youngkim > ezbamqc
diff ezBAMQC/src/htslib/kfunc.c @ 0:dfa3745e5fd8
Uploaded
author | youngkim |
---|---|
date | Thu, 24 Mar 2016 17:12:52 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/kfunc.c Thu Mar 24 17:12:52 2016 -0400 @@ -0,0 +1,280 @@ +/* The MIT License + + Copyright (C) 2010, 2013 Genome Research Ltd. + Copyright (C) 2011 Attractive Chaos <attractor@live.co.uk> + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#include <math.h> +#include <stdlib.h> +#include "htslib/kfunc.h" + +/* Log gamma function + * \log{\Gamma(z)} + * AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245 + */ +double kf_lgamma(double z) +{ + double x = 0; + x += 0.1659470187408462e-06 / (z+7); + x += 0.9934937113930748e-05 / (z+6); + x -= 0.1385710331296526 / (z+5); + x += 12.50734324009056 / (z+4); + x -= 176.6150291498386 / (z+3); + x += 771.3234287757674 / (z+2); + x -= 1259.139216722289 / (z+1); + x += 676.5203681218835 / z; + x += 0.9999999999995183; + return log(x) - 5.58106146679532777 - z + (z-0.5) * log(z+6.5); +} + +/* complementary error function + * \frac{2}{\sqrt{\pi}} \int_x^{\infty} e^{-t^2} dt + * AS66, 2nd algorithm, http://lib.stat.cmu.edu/apstat/66 + */ +double kf_erfc(double x) +{ + const double p0 = 220.2068679123761; + const double p1 = 221.2135961699311; + const double p2 = 112.0792914978709; + const double p3 = 33.912866078383; + const double p4 = 6.37396220353165; + const double p5 = .7003830644436881; + const double p6 = .03526249659989109; + const double q0 = 440.4137358247522; + const double q1 = 793.8265125199484; + const double q2 = 637.3336333788311; + const double q3 = 296.5642487796737; + const double q4 = 86.78073220294608; + const double q5 = 16.06417757920695; + const double q6 = 1.755667163182642; + const double q7 = .08838834764831844; + double expntl, z, p; + z = fabs(x) * M_SQRT2; + if (z > 37.) return x > 0.? 0. : 2.; + expntl = exp(z * z * - .5); + if (z < 10. / M_SQRT2) // for small z + p = expntl * ((((((p6 * z + p5) * z + p4) * z + p3) * z + p2) * z + p1) * z + p0) + / (((((((q7 * z + q6) * z + q5) * z + q4) * z + q3) * z + q2) * z + q1) * z + q0); + else p = expntl / 2.506628274631001 / (z + 1. / (z + 2. / (z + 3. / (z + 4. / (z + .65))))); + return x > 0.? 2. * p : 2. * (1. - p); +} + +/* The following computes regularized incomplete gamma functions. + * Formulas are taken from Wiki, with additional input from Numerical + * Recipes in C (for modified Lentz's algorithm) and AS245 + * (http://lib.stat.cmu.edu/apstat/245). + * + * A good online calculator is available at: + * + * http://www.danielsoper.com/statcalc/calc23.aspx + * + * It calculates upper incomplete gamma function, which equals + * kf_gammaq(s,z)*tgamma(s). + */ + +#define KF_GAMMA_EPS 1e-14 +#define KF_TINY 1e-290 + +// regularized lower incomplete gamma function, by series expansion +static double _kf_gammap(double s, double z) +{ + double sum, x; + int k; + for (k = 1, sum = x = 1.; k < 100; ++k) { + sum += (x *= z / (s + k)); + if (x / sum < KF_GAMMA_EPS) break; + } + return exp(s * log(z) - z - kf_lgamma(s + 1.) + log(sum)); +} +// regularized upper incomplete gamma function, by continued fraction +static double _kf_gammaq(double s, double z) +{ + int j; + double C, D, f; + f = 1. + z - s; C = f; D = 0.; + // Modified Lentz's algorithm for computing continued fraction + // See Numerical Recipes in C, 2nd edition, section 5.2 + for (j = 1; j < 100; ++j) { + double a = j * (s - j), b = (j<<1) + 1 + z - s, d; + D = b + a * D; + if (D < KF_TINY) D = KF_TINY; + C = b + a / C; + if (C < KF_TINY) C = KF_TINY; + D = 1. / D; + d = C * D; + f *= d; + if (fabs(d - 1.) < KF_GAMMA_EPS) break; + } + return exp(s * log(z) - z - kf_lgamma(s) - log(f)); +} + +double kf_gammap(double s, double z) +{ + return z <= 1. || z < s? _kf_gammap(s, z) : 1. - _kf_gammaq(s, z); +} + +double kf_gammaq(double s, double z) +{ + return z <= 1. || z < s? 1. - _kf_gammap(s, z) : _kf_gammaq(s, z); +} + +/* Regularized incomplete beta function. The method is taken from + * Numerical Recipe in C, 2nd edition, section 6.4. The following web + * page calculates the incomplete beta function, which equals + * kf_betai(a,b,x) * gamma(a) * gamma(b) / gamma(a+b): + * + * http://www.danielsoper.com/statcalc/calc36.aspx + */ +static double kf_betai_aux(double a, double b, double x) +{ + double C, D, f; + int j; + if (x == 0.) return 0.; + if (x == 1.) return 1.; + f = 1.; C = f; D = 0.; + // Modified Lentz's algorithm for computing continued fraction + for (j = 1; j < 200; ++j) { + double aa, d; + int m = j>>1; + aa = (j&1)? -(a + m) * (a + b + m) * x / ((a + 2*m) * (a + 2*m + 1)) + : m * (b - m) * x / ((a + 2*m - 1) * (a + 2*m)); + D = 1. + aa * D; + if (D < KF_TINY) D = KF_TINY; + C = 1. + aa / C; + if (C < KF_TINY) C = KF_TINY; + D = 1. / D; + d = C * D; + f *= d; + if (fabs(d - 1.) < KF_GAMMA_EPS) break; + } + return exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b) + a * log(x) + b * log(1.-x)) / a / f; +} +double kf_betai(double a, double b, double x) +{ + return x < (a + 1.) / (a + b + 2.)? kf_betai_aux(a, b, x) : 1. - kf_betai_aux(b, a, 1. - x); +} + +#ifdef KF_MAIN +#include <stdio.h> +int main(int argc, char *argv[]) +{ + double x = 5.5, y = 3; + double a, b; + printf("erfc(%lg): %lg, %lg\n", x, erfc(x), kf_erfc(x)); + printf("upper-gamma(%lg,%lg): %lg\n", x, y, kf_gammaq(y, x)*tgamma(y)); + a = 2; b = 2; x = 0.5; + printf("incomplete-beta(%lg,%lg,%lg): %lg\n", a, b, x, kf_betai(a, b, x) / exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b))); + return 0; +} +#endif + + +// log\binom{n}{k} +static double lbinom(int n, int k) +{ + if (k == 0 || n == k) return 0; + return lgamma(n+1) - lgamma(k+1) - lgamma(n-k+1); +} + +// n11 n12 | n1_ +// n21 n22 | n2_ +//-----------+---- +// n_1 n_2 | n + +// hypergeometric distribution +static double hypergeo(int n11, int n1_, int n_1, int n) +{ + return exp(lbinom(n1_, n11) + lbinom(n-n1_, n_1-n11) - lbinom(n, n_1)); +} + +typedef struct { + int n11, n1_, n_1, n; + double p; +} hgacc_t; + +// incremental version of hypergenometric distribution +static double hypergeo_acc(int n11, int n1_, int n_1, int n, hgacc_t *aux) +{ + if (n1_ || n_1 || n) { + aux->n11 = n11; aux->n1_ = n1_; aux->n_1 = n_1; aux->n = n; + } else { // then only n11 changed; the rest fixed + if (n11%11 && n11 + aux->n - aux->n1_ - aux->n_1) { + if (n11 == aux->n11 + 1) { // incremental + aux->p *= (double)(aux->n1_ - aux->n11) / n11 + * (aux->n_1 - aux->n11) / (n11 + aux->n - aux->n1_ - aux->n_1); + aux->n11 = n11; + return aux->p; + } + if (n11 == aux->n11 - 1) { // incremental + aux->p *= (double)aux->n11 / (aux->n1_ - n11) + * (aux->n11 + aux->n - aux->n1_ - aux->n_1) / (aux->n_1 - n11); + aux->n11 = n11; + return aux->p; + } + } + aux->n11 = n11; + } + aux->p = hypergeo(aux->n11, aux->n1_, aux->n_1, aux->n); + return aux->p; +} + +double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two) +{ + int i, j, max, min; + double p, q, left, right; + hgacc_t aux; + int n1_, n_1, n; + + n1_ = n11 + n12; n_1 = n11 + n21; n = n11 + n12 + n21 + n22; // calculate n1_, n_1 and n + max = (n_1 < n1_) ? n_1 : n1_; // max n11, for right tail + min = n1_ + n_1 - n; // not sure why n11-n22 is used instead of min(n_1,n1_) + if (min < 0) min = 0; // min n11, for left tail + *two = *_left = *_right = 1.; + if (min == max) return 1.; // no need to do test + q = hypergeo_acc(n11, n1_, n_1, n, &aux); // the probability of the current table + // left tail + p = hypergeo_acc(min, 0, 0, 0, &aux); + for (left = 0., i = min + 1; p < 0.99999999 * q && i<=max; ++i) // loop until underflow + left += p, p = hypergeo_acc(i, 0, 0, 0, &aux); + --i; + if (p < 1.00000001 * q) left += p; + else --i; + // right tail + p = hypergeo_acc(max, 0, 0, 0, &aux); + for (right = 0., j = max - 1; p < 0.99999999 * q && j>=0; --j) // loop until underflow + right += p, p = hypergeo_acc(j, 0, 0, 0, &aux); + ++j; + if (p < 1.00000001 * q) right += p; + else ++j; + // two-tail + *two = left + right; + if (*two > 1.) *two = 1.; + // adjust left and right + if (abs(i - n11) < abs(j - n11)) right = 1. - left + q; + else left = 1.0 - right + q; + *_left = left; *_right = right; + return q; +} + + +