comparison corebio/moremath.py @ 7:8d676bbd1f2d

Uploaded
author davidmurphy
date Mon, 16 Jan 2012 07:03:36 -0500
parents c55bdc2fb9fa
children
comparison
equal deleted inserted replaced
6:4a4aca3d57c9 7:8d676bbd1f2d
1 #!/usr/bin/env python
2
3 # Copyright (c) 2005 Gavin E. Crooks <gec@threeplusone.com>
4 #
5 # This software is distributed under the MIT Open Source License.
6 # <http://www.opensource.org/licenses/mit-license.html>
7 #
8 # Permission is hereby granted, free of charge, to any person obtaining a
9 # copy of this software and associated documentation files (the "Software"),
10 # to deal in the Software without restriction, including without limitation
11 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 # and/or sell copies of the Software, and to permit persons to whom the
13 # Software is furnished to do so, subject to the following conditions:
14 #
15 # The above copyright notice and this permission notice shall be included
16 # in all copies or substantial portions of the Software.
17 #
18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 # THE SOFTWARE.
25 #
26
27
28 """ Various bits of useful math not in the standard python library.
29
30 Constants :
31
32 - euler_gamma = 0.577215...
33 - catalan = 0.915965...
34 - golden_ratio = 1.618033...
35 - bits_per_nat = log2(e) = 1/log(2)
36 - sqrt_2pi = 2.50662...
37
38 Special Functions :
39
40 - gamma() -- Gamma function.
41 - lngamma() -- Logarithm of the gamma function
42 - factorial() -- The factorial function.
43 - digamma() -- Digamma function (logarithmic derivative of gamma).
44 - trigamma() -- Trigamma function (derivative of digamma).
45 - entropy() -- The entropy of a probability vector
46 - incomplete_gamma() -- The 'upper' incomplete gamma function.
47 - normalized_incomplete_gamma() --
48 - lg() -- Base 2 logarithms.
49
50
51 Vector Operations :
52
53 - rmsd() -- Root mean squared deviation of two point vectors
54 - minimize_rmsd() -- Find the rigid transformation that minimized the
55 RMSD between two vectors of points.
56
57 Minimization :
58
59 - find_root() -- 1d root finding
60
61 Probability Distributions :
62 - Gamma
63 - Dirichlet
64 - Multinomial
65 - Gaussian
66
67 """
68
69
70
71 __all__ = ('euler_gamma', 'catalan', 'golden_ratio', 'bits_per_nat', 'sqrt_2pi',
72 'gamma', 'lngamma', 'factorial', 'digamma', 'trigamma',
73 'entropy', 'log2',
74 'incomplete_gamma', 'normalized_incomplete_gamma',
75 # 'integrate',
76 # 'rmsd', 'minimize_rmsd', 'find_root',
77 # 'Gamma', 'Dirichlet',
78 # 'decompose_log_odds_array',
79 'argmax', 'argmin'
80 )
81
82 from math import *
83 import random
84 from itertools import izip, count
85
86 # Some mathematical constants
87 euler_gamma = 0.57721566490153286060651
88 catalan = 0.91596559417721901505460
89 golden_ratio = 1.6180339887498948482046
90 bits_per_nat = 1.44269504088896340735992468100 # = log_2(e) = 1/log(2)
91 sqrt_2pi = 2.5066282746310005024157652848110
92
93
94
95
96
97 # The Lanczos approximation for the gamma function is
98 #
99 # -(z + g + 1/2) (z + 1/2)
100 # Gamma(z+1) = e * (z + g + 1/2) * Sqrt(2Pi) * C
101 #
102 #
103 # c[1] c[2] c[3]
104 # C = [c[0] + ----- + ----- + ----- + ... ]
105 # z + 1 z + 2 z + 3
106 #
107 #
108 # To calculate digamma and trigamma functions we take an analytic derivative
109 # of the Lanczos approximation.
110 #
111 # Gamma(z) = Gamma(z+1)/z
112 # Digamma(z) = D ln Gamma(z)
113 # Trigamma(z) = D Digamma(z)
114
115 # These Lanczos constants are from
116 # "A note on the computation of the convergent
117 # Lanczos complex Gamma approximation." Paul Godfrey (2001)
118 # http://my.fit.edu/~gabdo/gamma.txt
119
120
121 __lanczos_gamma = 607./128.
122 __lanczos_coefficients = (
123 0.99999999999999709182,
124 57.156235665862923517,
125 -59.597960355475491248,
126 14.136097974741747174,
127 -0.49191381609762019978,
128 .33994649984811888699e-4,
129 .46523628927048575665e-4,
130 -.98374475304879564677e-4,
131 .15808870322491248884e-3,
132 -.21026444172410488319e-3,
133 .21743961811521264320e-3,
134 -.16431810653676389022e-3,
135 .84418223983852743293e-4,
136 -.26190838401581408670e-4,
137 .36899182659531622704e-5)
138
139 __factorial =(
140 1.,
141 1.,
142 2.,
143 6.,
144 24.,
145 120.,
146 720.,
147 5040.,
148 40320.,
149 362880.,
150 3628800.,
151 39916800.,
152 479001600.,
153 6227020800.,
154 87178291200.,
155 1307674368000.,
156 20922789888000.,
157 355687428096000.,
158 6402373705728000.,
159 121645100408832000.,
160 2432902008176640000.,
161 51090942171709440000.,
162 1124000727777607680000.,
163 25852016738884976640000.,
164 620448401733239439360000.,
165 15511210043330985984000000.,
166 403291461126605635584000000.,
167 10888869450418352160768000000.,
168 304888344611713860501504000000.,
169 8841761993739701954543616000000.,
170 265252859812191058636308480000000.,
171 8222838654177922817725562880000000.,
172 263130836933693530167218012160000000. )
173
174 def gamma(z) :
175 """The gamma function. Returns exact results for small integers. Will
176 overflow for modest sized arguments. Use lngamma(z) instead.
177
178 See: Eric W. Weisstein. "Gamma Function." From MathWorld, A Wolfram Web Resource.
179 http://mathworld.wolfram.com/GammaFunction.html
180
181 """
182
183 n = floor(z)
184 if n == z :
185 if z <= 0 :
186 return 1.0/0.0 # Infinity
187 elif n <= len(__factorial) :
188 return __factorial[int(n)-1]
189
190 zz = z
191 if z < 0.5 :
192 zz = 1-z
193
194
195 g = __lanczos_gamma
196 c = __lanczos_coefficients
197
198 zz = zz - 1.
199 zh = zz + 0.5
200 zgh = zh + g
201 zp = zgh** (zh*0.5) # trick for avoiding FP overflow above z=141
202
203 ss = 0.0
204 for k in range(len(c)-1,0,-1):
205 ss += c[k]/(zz+k)
206
207 f = (sqrt_2pi*(c[0]+ss)) * (( zp*exp(-zgh)) *zp)
208
209 if z<0.5 :
210 f = pi /( sin(pi*z) *f)
211
212 return f
213
214
215 def lngamma(z) :
216 """The logarithm of the gamma function.
217 """
218
219 # common case optimization
220
221 n = floor(z)
222 if n == z :
223 if z <= 0 :
224 return 1.0/0.0 # Infinity
225 elif n <= len(__factorial) :
226 return __factorial[int(n)-1]
227
228 zz = z
229 if z < 0.5 :
230 zz = 1-z
231
232
233 g = __lanczos_gamma
234 c = __lanczos_coefficients
235
236 zz = zz - 1.
237 zh = zz + 0.5
238 zgh = zh + g
239 zp = zgh** (zh*0.5) # trick for avoiding FP overflow above z=141
240
241 ss = 0.0
242 for k in range(len(c)-1,0,-1):
243 ss += c[k]/(zz+k)
244
245 f = (sqrt_2pi*(c[0]+ss)) * (( zp*exp(-zgh)) *zp)
246
247 if z<0.5 :
248 f = pi /( sin(pi*z) *f)
249
250 return log(f)
251
252
253 def factorial(z) :
254 """ The factorial function.
255 factorial(z) == gamma(z+1)
256 """
257 return gamma(z+1)
258
259
260 def digamma(z) :
261 """The digamma function, the logarithmic derivative of the gamma function.
262 digamma(z) = d/dz ln( gamma(z) )
263
264 See: Eric W. Weisstein. "Digamma Function." From MathWorld--
265 A Wolfram Web Resource. http://mathworld.wolfram.com/DigammaFunction.html
266 """
267
268 g = __lanczos_gamma
269 c = __lanczos_coefficients
270
271 zz = z
272 if z < 0.5 :
273 zz = 1 -z
274
275 n=0.
276 d=0.
277 for k in range(len(c)-1,0,-1):
278 dz =1./(zz+(k+1)-2);
279 dd =c[k] * dz
280 d = d + dd
281 n = n - dd * dz
282
283 d = d + c[0]
284 gg = zz + g - 0.5
285 f = log(gg) + (n/d - g/gg)
286
287 if z<0.5 :
288 f -= pi / tan( pi * z)
289
290 return f
291
292
293 def trigamma(z) :
294 """The trigamma function, the derivative of the digamma function.
295 trigamma(z) = d/dz digamma(z) = d/dz d/dz ln( gamma(z) )
296
297 See: Eric W. Weisstein. "Digamma Function." From MathWorld--
298 A Wolfram Web Resource. http://mathworld.wolfram.com/TrigammaFunction.html
299 """
300
301 g = __lanczos_gamma
302 c = __lanczos_coefficients
303
304 t1=0.
305 t2=0.
306 t3=0.
307 for k in range(len(c)-1,0,-1):
308 dz =1./(z+k);
309 dd1 = c[k]* dz
310 t1 += dd1
311 dd2 = dd1 * dz
312 t2 += dd2
313 t3 += dd2 * dz
314
315 t1 += c[0]
316 c = - (t2*t2)/(t1*t1) +2*t3/t1
317
318 result = 1./(z*z)
319 gg = z + g + 0.5
320 result += - (z+0.5)/ (gg*gg)
321 result += 2./gg
322
323 result += c
324
325 return result
326
327 def incomplete_gamma(a,x) :
328 """The 'upper' incomplete gamma function:
329
330 oo
331 -
332 | -t a-1
333 incomplete_gamma(a,x) = | e t dt.
334 |
335 -
336 x
337
338 In Mathematica, Gamma[a,x].
339
340 Note that, very confusingly, the phrase 'incomplete gamma fucntion'
341 can also refer to the same integral between 0 and x, (the 'lower'
342 incomplete gamma function) or to the normalized versions,
343 normalized_incomplete_gamma() )
344
345
346 See: Eric W. Weisstein. "Gamma Function." From MathWorld, A Wolfram Web Resource.
347 http://mathworld.wolfram.com/IncompleteGammaFunction.html
348
349 Bugs :
350 This implentation is not very accurate for some arguments.
351 """
352 return normalized_incomplete_gamma(a,x) * gamma(a)
353
354
355 def normalized_incomplete_gamma(a,x) :
356 """The upper, incomplete gamma function normalized so that the limiting
357 values are zero and one.
358
359 Q(a,x) = incomplete_gamma(a,x) / gamma(a)
360
361 See:
362 incomplete_gamma()
363 Bugs :
364 This implentation is not very accurate for some arguments.
365 """
366 maxiter = 100
367 epsilon = 1.48e-8
368 small = 1e-30
369
370
371 if a<=0 or x<0 :
372 raise ValueError("Invalid arguments")
373 if x == 0.0 : return 1.0
374
375 if x<= a+1 :
376 # Use the series representation
377 term = 1./a
378 total = term
379 for n in range(1,maxiter) :
380 term *= x/(a+n)
381 total += term
382 if abs(term/total) < epsilon :
383 return 1. - total * exp(-x+a*log(x) - lngamma(a) )
384 raise RuntimeError(
385 "Failed to converge after %d iterations." % (maxiter) )
386 else :
387 # Use the continued fraction representation
388 total = 1.0
389 b = x + 1. -a
390 c = 1./small
391 d = 1./b
392 h = d
393 for i in range(1, maxiter) :
394 an = -i * (i-a)
395 b = b+2.
396 d = an * d + b
397 if abs(d) < small : d = small
398 c = b + an /c
399 if abs(c) < small : c= small
400 d = 1./d
401 term = d * c
402 h = h * term
403 if abs( term-1.) < epsilon :
404 return h * exp(-x+a*log(x) - lngamma(a) )
405 raise RuntimeError(
406 "Failed to converge after %d iterations." % (maxiter) )
407
408
409
410 def log2( x) :
411 """ Return the base 2 logarithm of x """
412 return log(x,2)
413
414
415 def entropy( pvec, base= exp(1) ) :
416 """ The entropy S = -Sum_i p_i ln p_i
417 pvec is a frequency vector, not necessarily normalized.
418 """
419 # TODO: Optimize
420 if len(pvec) ==0 :
421 raise ValueError("Zero length vector")
422
423
424 total = 0.0
425 ent = 0.0
426 for p in pvec:
427 if p>0 : # 0 log(0) =0
428 total += p
429 ent += - log(float(p)) *p
430 elif p<0:
431 raise ValueError("Negative probability")
432
433
434 ent = (ent/total) + log(total)
435 ent /= log(base)
436
437 return ent
438
439
440
441
442
443 def argmax( alist) :
444 """Return the index of the last occurance of the maximum value in the list."""
445 return max(izip(alist, count() ))[1]
446
447 def argmin( alist) :
448 """Return the index of the first occurance of the minimum value in the list."""
449 return min(izip(alist, count() ))[1]
450
451
452
453
454
455