Mercurial > repos > davidmurphy > codonlogo
comparison corebio/moremath.py @ 0:c55bdc2fb9fa
Uploaded
author | davidmurphy |
---|---|
date | Thu, 27 Oct 2011 12:09:09 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c55bdc2fb9fa |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 # Copyright (c) 2005 Gavin E. Crooks <gec@threeplusone.com> | |
4 # | |
5 # This software is distributed under the MIT Open Source License. | |
6 # <http://www.opensource.org/licenses/mit-license.html> | |
7 # | |
8 # Permission is hereby granted, free of charge, to any person obtaining a | |
9 # copy of this software and associated documentation files (the "Software"), | |
10 # to deal in the Software without restriction, including without limitation | |
11 # the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
12 # and/or sell copies of the Software, and to permit persons to whom the | |
13 # Software is furnished to do so, subject to the following conditions: | |
14 # | |
15 # The above copyright notice and this permission notice shall be included | |
16 # in all copies or substantial portions of the Software. | |
17 # | |
18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
19 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
20 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
21 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
22 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
23 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
24 # THE SOFTWARE. | |
25 # | |
26 | |
27 | |
28 """ Various bits of useful math not in the standard python library. | |
29 | |
30 Constants : | |
31 | |
32 - euler_gamma = 0.577215... | |
33 - catalan = 0.915965... | |
34 - golden_ratio = 1.618033... | |
35 - bits_per_nat = log2(e) = 1/log(2) | |
36 - sqrt_2pi = 2.50662... | |
37 | |
38 Special Functions : | |
39 | |
40 - gamma() -- Gamma function. | |
41 - lngamma() -- Logarithm of the gamma function | |
42 - factorial() -- The factorial function. | |
43 - digamma() -- Digamma function (logarithmic derivative of gamma). | |
44 - trigamma() -- Trigamma function (derivative of digamma). | |
45 - entropy() -- The entropy of a probability vector | |
46 - incomplete_gamma() -- The 'upper' incomplete gamma function. | |
47 - normalized_incomplete_gamma() -- | |
48 - lg() -- Base 2 logarithms. | |
49 | |
50 | |
51 Vector Operations : | |
52 | |
53 - rmsd() -- Root mean squared deviation of two point vectors | |
54 - minimize_rmsd() -- Find the rigid transformation that minimized the | |
55 RMSD between two vectors of points. | |
56 | |
57 Minimization : | |
58 | |
59 - find_root() -- 1d root finding | |
60 | |
61 Probability Distributions : | |
62 - Gamma | |
63 - Dirichlet | |
64 - Multinomial | |
65 - Gaussian | |
66 | |
67 """ | |
68 | |
69 | |
70 | |
71 __all__ = ('euler_gamma', 'catalan', 'golden_ratio', 'bits_per_nat', 'sqrt_2pi', | |
72 'gamma', 'lngamma', 'factorial', 'digamma', 'trigamma', | |
73 'entropy', 'log2', | |
74 'incomplete_gamma', 'normalized_incomplete_gamma', | |
75 # 'integrate', | |
76 # 'rmsd', 'minimize_rmsd', 'find_root', | |
77 # 'Gamma', 'Dirichlet', | |
78 # 'decompose_log_odds_array', | |
79 'argmax', 'argmin' | |
80 ) | |
81 | |
82 from math import * | |
83 import random | |
84 from itertools import izip, count | |
85 | |
86 # Some mathematical constants | |
87 euler_gamma = 0.57721566490153286060651 | |
88 catalan = 0.91596559417721901505460 | |
89 golden_ratio = 1.6180339887498948482046 | |
90 bits_per_nat = 1.44269504088896340735992468100 # = log_2(e) = 1/log(2) | |
91 sqrt_2pi = 2.5066282746310005024157652848110 | |
92 | |
93 | |
94 | |
95 | |
96 | |
97 # The Lanczos approximation for the gamma function is | |
98 # | |
99 # -(z + g + 1/2) (z + 1/2) | |
100 # Gamma(z+1) = e * (z + g + 1/2) * Sqrt(2Pi) * C | |
101 # | |
102 # | |
103 # c[1] c[2] c[3] | |
104 # C = [c[0] + ----- + ----- + ----- + ... ] | |
105 # z + 1 z + 2 z + 3 | |
106 # | |
107 # | |
108 # To calculate digamma and trigamma functions we take an analytic derivative | |
109 # of the Lanczos approximation. | |
110 # | |
111 # Gamma(z) = Gamma(z+1)/z | |
112 # Digamma(z) = D ln Gamma(z) | |
113 # Trigamma(z) = D Digamma(z) | |
114 | |
115 # These Lanczos constants are from | |
116 # "A note on the computation of the convergent | |
117 # Lanczos complex Gamma approximation." Paul Godfrey (2001) | |
118 # http://my.fit.edu/~gabdo/gamma.txt | |
119 | |
120 | |
121 __lanczos_gamma = 607./128. | |
122 __lanczos_coefficients = ( | |
123 0.99999999999999709182, | |
124 57.156235665862923517, | |
125 -59.597960355475491248, | |
126 14.136097974741747174, | |
127 -0.49191381609762019978, | |
128 .33994649984811888699e-4, | |
129 .46523628927048575665e-4, | |
130 -.98374475304879564677e-4, | |
131 .15808870322491248884e-3, | |
132 -.21026444172410488319e-3, | |
133 .21743961811521264320e-3, | |
134 -.16431810653676389022e-3, | |
135 .84418223983852743293e-4, | |
136 -.26190838401581408670e-4, | |
137 .36899182659531622704e-5) | |
138 | |
139 __factorial =( | |
140 1., | |
141 1., | |
142 2., | |
143 6., | |
144 24., | |
145 120., | |
146 720., | |
147 5040., | |
148 40320., | |
149 362880., | |
150 3628800., | |
151 39916800., | |
152 479001600., | |
153 6227020800., | |
154 87178291200., | |
155 1307674368000., | |
156 20922789888000., | |
157 355687428096000., | |
158 6402373705728000., | |
159 121645100408832000., | |
160 2432902008176640000., | |
161 51090942171709440000., | |
162 1124000727777607680000., | |
163 25852016738884976640000., | |
164 620448401733239439360000., | |
165 15511210043330985984000000., | |
166 403291461126605635584000000., | |
167 10888869450418352160768000000., | |
168 304888344611713860501504000000., | |
169 8841761993739701954543616000000., | |
170 265252859812191058636308480000000., | |
171 8222838654177922817725562880000000., | |
172 263130836933693530167218012160000000. ) | |
173 | |
174 def gamma(z) : | |
175 """The gamma function. Returns exact results for small integers. Will | |
176 overflow for modest sized arguments. Use lngamma(z) instead. | |
177 | |
178 See: Eric W. Weisstein. "Gamma Function." From MathWorld, A Wolfram Web Resource. | |
179 http://mathworld.wolfram.com/GammaFunction.html | |
180 | |
181 """ | |
182 | |
183 n = floor(z) | |
184 if n == z : | |
185 if z <= 0 : | |
186 return 1.0/0.0 # Infinity | |
187 elif n <= len(__factorial) : | |
188 return __factorial[int(n)-1] | |
189 | |
190 zz = z | |
191 if z < 0.5 : | |
192 zz = 1-z | |
193 | |
194 | |
195 g = __lanczos_gamma | |
196 c = __lanczos_coefficients | |
197 | |
198 zz = zz - 1. | |
199 zh = zz + 0.5 | |
200 zgh = zh + g | |
201 zp = zgh** (zh*0.5) # trick for avoiding FP overflow above z=141 | |
202 | |
203 ss = 0.0 | |
204 for k in range(len(c)-1,0,-1): | |
205 ss += c[k]/(zz+k) | |
206 | |
207 f = (sqrt_2pi*(c[0]+ss)) * (( zp*exp(-zgh)) *zp) | |
208 | |
209 if z<0.5 : | |
210 f = pi /( sin(pi*z) *f) | |
211 | |
212 return f | |
213 | |
214 | |
215 def lngamma(z) : | |
216 """The logarithm of the gamma function. | |
217 """ | |
218 | |
219 # common case optimization | |
220 | |
221 n = floor(z) | |
222 if n == z : | |
223 if z <= 0 : | |
224 return 1.0/0.0 # Infinity | |
225 elif n <= len(__factorial) : | |
226 return __factorial[int(n)-1] | |
227 | |
228 zz = z | |
229 if z < 0.5 : | |
230 zz = 1-z | |
231 | |
232 | |
233 g = __lanczos_gamma | |
234 c = __lanczos_coefficients | |
235 | |
236 zz = zz - 1. | |
237 zh = zz + 0.5 | |
238 zgh = zh + g | |
239 zp = zgh** (zh*0.5) # trick for avoiding FP overflow above z=141 | |
240 | |
241 ss = 0.0 | |
242 for k in range(len(c)-1,0,-1): | |
243 ss += c[k]/(zz+k) | |
244 | |
245 f = (sqrt_2pi*(c[0]+ss)) * (( zp*exp(-zgh)) *zp) | |
246 | |
247 if z<0.5 : | |
248 f = pi /( sin(pi*z) *f) | |
249 | |
250 return log(f) | |
251 | |
252 | |
253 def factorial(z) : | |
254 """ The factorial function. | |
255 factorial(z) == gamma(z+1) | |
256 """ | |
257 return gamma(z+1) | |
258 | |
259 | |
260 def digamma(z) : | |
261 """The digamma function, the logarithmic derivative of the gamma function. | |
262 digamma(z) = d/dz ln( gamma(z) ) | |
263 | |
264 See: Eric W. Weisstein. "Digamma Function." From MathWorld-- | |
265 A Wolfram Web Resource. http://mathworld.wolfram.com/DigammaFunction.html | |
266 """ | |
267 | |
268 g = __lanczos_gamma | |
269 c = __lanczos_coefficients | |
270 | |
271 zz = z | |
272 if z < 0.5 : | |
273 zz = 1 -z | |
274 | |
275 n=0. | |
276 d=0. | |
277 for k in range(len(c)-1,0,-1): | |
278 dz =1./(zz+(k+1)-2); | |
279 dd =c[k] * dz | |
280 d = d + dd | |
281 n = n - dd * dz | |
282 | |
283 d = d + c[0] | |
284 gg = zz + g - 0.5 | |
285 f = log(gg) + (n/d - g/gg) | |
286 | |
287 if z<0.5 : | |
288 f -= pi / tan( pi * z) | |
289 | |
290 return f | |
291 | |
292 | |
293 def trigamma(z) : | |
294 """The trigamma function, the derivative of the digamma function. | |
295 trigamma(z) = d/dz digamma(z) = d/dz d/dz ln( gamma(z) ) | |
296 | |
297 See: Eric W. Weisstein. "Digamma Function." From MathWorld-- | |
298 A Wolfram Web Resource. http://mathworld.wolfram.com/TrigammaFunction.html | |
299 """ | |
300 | |
301 g = __lanczos_gamma | |
302 c = __lanczos_coefficients | |
303 | |
304 t1=0. | |
305 t2=0. | |
306 t3=0. | |
307 for k in range(len(c)-1,0,-1): | |
308 dz =1./(z+k); | |
309 dd1 = c[k]* dz | |
310 t1 += dd1 | |
311 dd2 = dd1 * dz | |
312 t2 += dd2 | |
313 t3 += dd2 * dz | |
314 | |
315 t1 += c[0] | |
316 c = - (t2*t2)/(t1*t1) +2*t3/t1 | |
317 | |
318 result = 1./(z*z) | |
319 gg = z + g + 0.5 | |
320 result += - (z+0.5)/ (gg*gg) | |
321 result += 2./gg | |
322 | |
323 result += c | |
324 | |
325 return result | |
326 | |
327 def incomplete_gamma(a,x) : | |
328 """The 'upper' incomplete gamma function: | |
329 | |
330 oo | |
331 - | |
332 | -t a-1 | |
333 incomplete_gamma(a,x) = | e t dt. | |
334 | | |
335 - | |
336 x | |
337 | |
338 In Mathematica, Gamma[a,x]. | |
339 | |
340 Note that, very confusingly, the phrase 'incomplete gamma fucntion' | |
341 can also refer to the same integral between 0 and x, (the 'lower' | |
342 incomplete gamma function) or to the normalized versions, | |
343 normalized_incomplete_gamma() ) | |
344 | |
345 | |
346 See: Eric W. Weisstein. "Gamma Function." From MathWorld, A Wolfram Web Resource. | |
347 http://mathworld.wolfram.com/IncompleteGammaFunction.html | |
348 | |
349 Bugs : | |
350 This implentation is not very accurate for some arguments. | |
351 """ | |
352 return normalized_incomplete_gamma(a,x) * gamma(a) | |
353 | |
354 | |
355 def normalized_incomplete_gamma(a,x) : | |
356 """The upper, incomplete gamma function normalized so that the limiting | |
357 values are zero and one. | |
358 | |
359 Q(a,x) = incomplete_gamma(a,x) / gamma(a) | |
360 | |
361 See: | |
362 incomplete_gamma() | |
363 Bugs : | |
364 This implentation is not very accurate for some arguments. | |
365 """ | |
366 maxiter = 100 | |
367 epsilon = 1.48e-8 | |
368 small = 1e-30 | |
369 | |
370 | |
371 if a<=0 or x<0 : | |
372 raise ValueError("Invalid arguments") | |
373 if x == 0.0 : return 1.0 | |
374 | |
375 if x<= a+1 : | |
376 # Use the series representation | |
377 term = 1./a | |
378 total = term | |
379 for n in range(1,maxiter) : | |
380 term *= x/(a+n) | |
381 total += term | |
382 if abs(term/total) < epsilon : | |
383 return 1. - total * exp(-x+a*log(x) - lngamma(a) ) | |
384 raise RuntimeError( | |
385 "Failed to converge after %d iterations." % (maxiter) ) | |
386 else : | |
387 # Use the continued fraction representation | |
388 total = 1.0 | |
389 b = x + 1. -a | |
390 c = 1./small | |
391 d = 1./b | |
392 h = d | |
393 for i in range(1, maxiter) : | |
394 an = -i * (i-a) | |
395 b = b+2. | |
396 d = an * d + b | |
397 if abs(d) < small : d = small | |
398 c = b + an /c | |
399 if abs(c) < small : c= small | |
400 d = 1./d | |
401 term = d * c | |
402 h = h * term | |
403 if abs( term-1.) < epsilon : | |
404 return h * exp(-x+a*log(x) - lngamma(a) ) | |
405 raise RuntimeError( | |
406 "Failed to converge after %d iterations." % (maxiter) ) | |
407 | |
408 | |
409 | |
410 def log2( x) : | |
411 """ Return the base 2 logarithm of x """ | |
412 return log(x,2) | |
413 | |
414 | |
415 def entropy( pvec, base= exp(1) ) : | |
416 """ The entropy S = -Sum_i p_i ln p_i | |
417 pvec is a frequency vector, not necessarily normalized. | |
418 """ | |
419 # TODO: Optimize | |
420 if len(pvec) ==0 : | |
421 raise ValueError("Zero length vector") | |
422 | |
423 | |
424 total = 0.0 | |
425 ent = 0.0 | |
426 for p in pvec: | |
427 if p>0 : # 0 log(0) =0 | |
428 total += p | |
429 ent += - log(float(p)) *p | |
430 elif p<0: | |
431 raise ValueError("Negative probability") | |
432 | |
433 | |
434 ent = (ent/total) + log(total) | |
435 ent /= log(base) | |
436 | |
437 return ent | |
438 | |
439 | |
440 | |
441 | |
442 | |
443 def argmax( alist) : | |
444 """Return the index of the last occurance of the maximum value in the list.""" | |
445 return max(izip(alist, count() ))[1] | |
446 | |
447 def argmin( alist) : | |
448 """Return the index of the first occurance of the minimum value in the list.""" | |
449 return min(izip(alist, count() ))[1] | |
450 | |
451 | |
452 | |
453 | |
454 | |
455 |