Mercurial > repos > youngkim > ezbamqc
comparison ezBAMQC/src/htslib/kfunc.c @ 0:dfa3745e5fd8
Uploaded
| author | youngkim |
|---|---|
| date | Thu, 24 Mar 2016 17:12:52 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dfa3745e5fd8 |
|---|---|
| 1 /* The MIT License | |
| 2 | |
| 3 Copyright (C) 2010, 2013 Genome Research Ltd. | |
| 4 Copyright (C) 2011 Attractive Chaos <attractor@live.co.uk> | |
| 5 | |
| 6 Permission is hereby granted, free of charge, to any person obtaining | |
| 7 a copy of this software and associated documentation files (the | |
| 8 "Software"), to deal in the Software without restriction, including | |
| 9 without limitation the rights to use, copy, modify, merge, publish, | |
| 10 distribute, sublicense, and/or sell copies of the Software, and to | |
| 11 permit persons to whom the Software is furnished to do so, subject to | |
| 12 the following conditions: | |
| 13 | |
| 14 The above copyright notice and this permission notice shall be | |
| 15 included in all copies or substantial portions of the Software. | |
| 16 | |
| 17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
| 18 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
| 19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
| 20 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
| 21 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
| 22 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
| 23 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
| 24 SOFTWARE. | |
| 25 */ | |
| 26 | |
| 27 #include <math.h> | |
| 28 #include <stdlib.h> | |
| 29 #include "htslib/kfunc.h" | |
| 30 | |
| 31 /* Log gamma function | |
| 32 * \log{\Gamma(z)} | |
| 33 * AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245 | |
| 34 */ | |
| 35 double kf_lgamma(double z) | |
| 36 { | |
| 37 double x = 0; | |
| 38 x += 0.1659470187408462e-06 / (z+7); | |
| 39 x += 0.9934937113930748e-05 / (z+6); | |
| 40 x -= 0.1385710331296526 / (z+5); | |
| 41 x += 12.50734324009056 / (z+4); | |
| 42 x -= 176.6150291498386 / (z+3); | |
| 43 x += 771.3234287757674 / (z+2); | |
| 44 x -= 1259.139216722289 / (z+1); | |
| 45 x += 676.5203681218835 / z; | |
| 46 x += 0.9999999999995183; | |
| 47 return log(x) - 5.58106146679532777 - z + (z-0.5) * log(z+6.5); | |
| 48 } | |
| 49 | |
| 50 /* complementary error function | |
| 51 * \frac{2}{\sqrt{\pi}} \int_x^{\infty} e^{-t^2} dt | |
| 52 * AS66, 2nd algorithm, http://lib.stat.cmu.edu/apstat/66 | |
| 53 */ | |
| 54 double kf_erfc(double x) | |
| 55 { | |
| 56 const double p0 = 220.2068679123761; | |
| 57 const double p1 = 221.2135961699311; | |
| 58 const double p2 = 112.0792914978709; | |
| 59 const double p3 = 33.912866078383; | |
| 60 const double p4 = 6.37396220353165; | |
| 61 const double p5 = .7003830644436881; | |
| 62 const double p6 = .03526249659989109; | |
| 63 const double q0 = 440.4137358247522; | |
| 64 const double q1 = 793.8265125199484; | |
| 65 const double q2 = 637.3336333788311; | |
| 66 const double q3 = 296.5642487796737; | |
| 67 const double q4 = 86.78073220294608; | |
| 68 const double q5 = 16.06417757920695; | |
| 69 const double q6 = 1.755667163182642; | |
| 70 const double q7 = .08838834764831844; | |
| 71 double expntl, z, p; | |
| 72 z = fabs(x) * M_SQRT2; | |
| 73 if (z > 37.) return x > 0.? 0. : 2.; | |
| 74 expntl = exp(z * z * - .5); | |
| 75 if (z < 10. / M_SQRT2) // for small z | |
| 76 p = expntl * ((((((p6 * z + p5) * z + p4) * z + p3) * z + p2) * z + p1) * z + p0) | |
| 77 / (((((((q7 * z + q6) * z + q5) * z + q4) * z + q3) * z + q2) * z + q1) * z + q0); | |
| 78 else p = expntl / 2.506628274631001 / (z + 1. / (z + 2. / (z + 3. / (z + 4. / (z + .65))))); | |
| 79 return x > 0.? 2. * p : 2. * (1. - p); | |
| 80 } | |
| 81 | |
| 82 /* The following computes regularized incomplete gamma functions. | |
| 83 * Formulas are taken from Wiki, with additional input from Numerical | |
| 84 * Recipes in C (for modified Lentz's algorithm) and AS245 | |
| 85 * (http://lib.stat.cmu.edu/apstat/245). | |
| 86 * | |
| 87 * A good online calculator is available at: | |
| 88 * | |
| 89 * http://www.danielsoper.com/statcalc/calc23.aspx | |
| 90 * | |
| 91 * It calculates upper incomplete gamma function, which equals | |
| 92 * kf_gammaq(s,z)*tgamma(s). | |
| 93 */ | |
| 94 | |
| 95 #define KF_GAMMA_EPS 1e-14 | |
| 96 #define KF_TINY 1e-290 | |
| 97 | |
| 98 // regularized lower incomplete gamma function, by series expansion | |
| 99 static double _kf_gammap(double s, double z) | |
| 100 { | |
| 101 double sum, x; | |
| 102 int k; | |
| 103 for (k = 1, sum = x = 1.; k < 100; ++k) { | |
| 104 sum += (x *= z / (s + k)); | |
| 105 if (x / sum < KF_GAMMA_EPS) break; | |
| 106 } | |
| 107 return exp(s * log(z) - z - kf_lgamma(s + 1.) + log(sum)); | |
| 108 } | |
| 109 // regularized upper incomplete gamma function, by continued fraction | |
| 110 static double _kf_gammaq(double s, double z) | |
| 111 { | |
| 112 int j; | |
| 113 double C, D, f; | |
| 114 f = 1. + z - s; C = f; D = 0.; | |
| 115 // Modified Lentz's algorithm for computing continued fraction | |
| 116 // See Numerical Recipes in C, 2nd edition, section 5.2 | |
| 117 for (j = 1; j < 100; ++j) { | |
| 118 double a = j * (s - j), b = (j<<1) + 1 + z - s, d; | |
| 119 D = b + a * D; | |
| 120 if (D < KF_TINY) D = KF_TINY; | |
| 121 C = b + a / C; | |
| 122 if (C < KF_TINY) C = KF_TINY; | |
| 123 D = 1. / D; | |
| 124 d = C * D; | |
| 125 f *= d; | |
| 126 if (fabs(d - 1.) < KF_GAMMA_EPS) break; | |
| 127 } | |
| 128 return exp(s * log(z) - z - kf_lgamma(s) - log(f)); | |
| 129 } | |
| 130 | |
| 131 double kf_gammap(double s, double z) | |
| 132 { | |
| 133 return z <= 1. || z < s? _kf_gammap(s, z) : 1. - _kf_gammaq(s, z); | |
| 134 } | |
| 135 | |
| 136 double kf_gammaq(double s, double z) | |
| 137 { | |
| 138 return z <= 1. || z < s? 1. - _kf_gammap(s, z) : _kf_gammaq(s, z); | |
| 139 } | |
| 140 | |
| 141 /* Regularized incomplete beta function. The method is taken from | |
| 142 * Numerical Recipe in C, 2nd edition, section 6.4. The following web | |
| 143 * page calculates the incomplete beta function, which equals | |
| 144 * kf_betai(a,b,x) * gamma(a) * gamma(b) / gamma(a+b): | |
| 145 * | |
| 146 * http://www.danielsoper.com/statcalc/calc36.aspx | |
| 147 */ | |
| 148 static double kf_betai_aux(double a, double b, double x) | |
| 149 { | |
| 150 double C, D, f; | |
| 151 int j; | |
| 152 if (x == 0.) return 0.; | |
| 153 if (x == 1.) return 1.; | |
| 154 f = 1.; C = f; D = 0.; | |
| 155 // Modified Lentz's algorithm for computing continued fraction | |
| 156 for (j = 1; j < 200; ++j) { | |
| 157 double aa, d; | |
| 158 int m = j>>1; | |
| 159 aa = (j&1)? -(a + m) * (a + b + m) * x / ((a + 2*m) * (a + 2*m + 1)) | |
| 160 : m * (b - m) * x / ((a + 2*m - 1) * (a + 2*m)); | |
| 161 D = 1. + aa * D; | |
| 162 if (D < KF_TINY) D = KF_TINY; | |
| 163 C = 1. + aa / C; | |
| 164 if (C < KF_TINY) C = KF_TINY; | |
| 165 D = 1. / D; | |
| 166 d = C * D; | |
| 167 f *= d; | |
| 168 if (fabs(d - 1.) < KF_GAMMA_EPS) break; | |
| 169 } | |
| 170 return exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b) + a * log(x) + b * log(1.-x)) / a / f; | |
| 171 } | |
| 172 double kf_betai(double a, double b, double x) | |
| 173 { | |
| 174 return x < (a + 1.) / (a + b + 2.)? kf_betai_aux(a, b, x) : 1. - kf_betai_aux(b, a, 1. - x); | |
| 175 } | |
| 176 | |
| 177 #ifdef KF_MAIN | |
| 178 #include <stdio.h> | |
| 179 int main(int argc, char *argv[]) | |
| 180 { | |
| 181 double x = 5.5, y = 3; | |
| 182 double a, b; | |
| 183 printf("erfc(%lg): %lg, %lg\n", x, erfc(x), kf_erfc(x)); | |
| 184 printf("upper-gamma(%lg,%lg): %lg\n", x, y, kf_gammaq(y, x)*tgamma(y)); | |
| 185 a = 2; b = 2; x = 0.5; | |
| 186 printf("incomplete-beta(%lg,%lg,%lg): %lg\n", a, b, x, kf_betai(a, b, x) / exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b))); | |
| 187 return 0; | |
| 188 } | |
| 189 #endif | |
| 190 | |
| 191 | |
| 192 // log\binom{n}{k} | |
| 193 static double lbinom(int n, int k) | |
| 194 { | |
| 195 if (k == 0 || n == k) return 0; | |
| 196 return lgamma(n+1) - lgamma(k+1) - lgamma(n-k+1); | |
| 197 } | |
| 198 | |
| 199 // n11 n12 | n1_ | |
| 200 // n21 n22 | n2_ | |
| 201 //-----------+---- | |
| 202 // n_1 n_2 | n | |
| 203 | |
| 204 // hypergeometric distribution | |
| 205 static double hypergeo(int n11, int n1_, int n_1, int n) | |
| 206 { | |
| 207 return exp(lbinom(n1_, n11) + lbinom(n-n1_, n_1-n11) - lbinom(n, n_1)); | |
| 208 } | |
| 209 | |
| 210 typedef struct { | |
| 211 int n11, n1_, n_1, n; | |
| 212 double p; | |
| 213 } hgacc_t; | |
| 214 | |
| 215 // incremental version of hypergenometric distribution | |
| 216 static double hypergeo_acc(int n11, int n1_, int n_1, int n, hgacc_t *aux) | |
| 217 { | |
| 218 if (n1_ || n_1 || n) { | |
| 219 aux->n11 = n11; aux->n1_ = n1_; aux->n_1 = n_1; aux->n = n; | |
| 220 } else { // then only n11 changed; the rest fixed | |
| 221 if (n11%11 && n11 + aux->n - aux->n1_ - aux->n_1) { | |
| 222 if (n11 == aux->n11 + 1) { // incremental | |
| 223 aux->p *= (double)(aux->n1_ - aux->n11) / n11 | |
| 224 * (aux->n_1 - aux->n11) / (n11 + aux->n - aux->n1_ - aux->n_1); | |
| 225 aux->n11 = n11; | |
| 226 return aux->p; | |
| 227 } | |
| 228 if (n11 == aux->n11 - 1) { // incremental | |
| 229 aux->p *= (double)aux->n11 / (aux->n1_ - n11) | |
| 230 * (aux->n11 + aux->n - aux->n1_ - aux->n_1) / (aux->n_1 - n11); | |
| 231 aux->n11 = n11; | |
| 232 return aux->p; | |
| 233 } | |
| 234 } | |
| 235 aux->n11 = n11; | |
| 236 } | |
| 237 aux->p = hypergeo(aux->n11, aux->n1_, aux->n_1, aux->n); | |
| 238 return aux->p; | |
| 239 } | |
| 240 | |
| 241 double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two) | |
| 242 { | |
| 243 int i, j, max, min; | |
| 244 double p, q, left, right; | |
| 245 hgacc_t aux; | |
| 246 int n1_, n_1, n; | |
| 247 | |
| 248 n1_ = n11 + n12; n_1 = n11 + n21; n = n11 + n12 + n21 + n22; // calculate n1_, n_1 and n | |
| 249 max = (n_1 < n1_) ? n_1 : n1_; // max n11, for right tail | |
| 250 min = n1_ + n_1 - n; // not sure why n11-n22 is used instead of min(n_1,n1_) | |
| 251 if (min < 0) min = 0; // min n11, for left tail | |
| 252 *two = *_left = *_right = 1.; | |
| 253 if (min == max) return 1.; // no need to do test | |
| 254 q = hypergeo_acc(n11, n1_, n_1, n, &aux); // the probability of the current table | |
| 255 // left tail | |
| 256 p = hypergeo_acc(min, 0, 0, 0, &aux); | |
| 257 for (left = 0., i = min + 1; p < 0.99999999 * q && i<=max; ++i) // loop until underflow | |
| 258 left += p, p = hypergeo_acc(i, 0, 0, 0, &aux); | |
| 259 --i; | |
| 260 if (p < 1.00000001 * q) left += p; | |
| 261 else --i; | |
| 262 // right tail | |
| 263 p = hypergeo_acc(max, 0, 0, 0, &aux); | |
| 264 for (right = 0., j = max - 1; p < 0.99999999 * q && j>=0; --j) // loop until underflow | |
| 265 right += p, p = hypergeo_acc(j, 0, 0, 0, &aux); | |
| 266 ++j; | |
| 267 if (p < 1.00000001 * q) right += p; | |
| 268 else ++j; | |
| 269 // two-tail | |
| 270 *two = left + right; | |
| 271 if (*two > 1.) *two = 1.; | |
| 272 // adjust left and right | |
| 273 if (abs(i - n11) < abs(j - n11)) right = 1. - left + q; | |
| 274 else left = 1.0 - right + q; | |
| 275 *_left = left; *_right = right; | |
| 276 return q; | |
| 277 } | |
| 278 | |
| 279 | |
| 280 |
