0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 # Copyright (c) 2005 Gavin E. Crooks <gec@threeplusone.com>
|
|
4 #
|
|
5 # This software is distributed under the MIT Open Source License.
|
|
6 # <http://www.opensource.org/licenses/mit-license.html>
|
|
7 #
|
|
8 # Permission is hereby granted, free of charge, to any person obtaining a
|
|
9 # copy of this software and associated documentation files (the "Software"),
|
|
10 # to deal in the Software without restriction, including without limitation
|
|
11 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
12 # and/or sell copies of the Software, and to permit persons to whom the
|
|
13 # Software is furnished to do so, subject to the following conditions:
|
|
14 #
|
|
15 # The above copyright notice and this permission notice shall be included
|
|
16 # in all copies or substantial portions of the Software.
|
|
17 #
|
|
18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
19 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
20 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
21 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
22 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
23 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
24 # THE SOFTWARE.
|
|
25 #
|
|
26
|
|
27
|
|
28 """ Various bits of useful math not in the standard python library.
|
|
29
|
|
30 Constants :
|
|
31
|
|
32 - euler_gamma = 0.577215...
|
|
33 - catalan = 0.915965...
|
|
34 - golden_ratio = 1.618033...
|
|
35 - bits_per_nat = log2(e) = 1/log(2)
|
|
36 - sqrt_2pi = 2.50662...
|
|
37
|
|
38 Special Functions :
|
|
39
|
|
40 - gamma() -- Gamma function.
|
|
41 - lngamma() -- Logarithm of the gamma function
|
|
42 - factorial() -- The factorial function.
|
|
43 - digamma() -- Digamma function (logarithmic derivative of gamma).
|
|
44 - trigamma() -- Trigamma function (derivative of digamma).
|
|
45 - entropy() -- The entropy of a probability vector
|
|
46 - incomplete_gamma() -- The 'upper' incomplete gamma function.
|
|
47 - normalized_incomplete_gamma() --
|
|
48 - lg() -- Base 2 logarithms.
|
|
49
|
|
50
|
|
51 Vector Operations :
|
|
52
|
|
53 - rmsd() -- Root mean squared deviation of two point vectors
|
|
54 - minimize_rmsd() -- Find the rigid transformation that minimized the
|
|
55 RMSD between two vectors of points.
|
|
56
|
|
57 Minimization :
|
|
58
|
|
59 - find_root() -- 1d root finding
|
|
60
|
|
61 Probability Distributions :
|
|
62 - Gamma
|
|
63 - Dirichlet
|
|
64 - Multinomial
|
|
65 - Gaussian
|
|
66
|
|
67 """
|
|
68
|
|
69
|
|
70
|
|
71 __all__ = ('euler_gamma', 'catalan', 'golden_ratio', 'bits_per_nat', 'sqrt_2pi',
|
|
72 'gamma', 'lngamma', 'factorial', 'digamma', 'trigamma',
|
|
73 'entropy', 'log2',
|
|
74 'incomplete_gamma', 'normalized_incomplete_gamma',
|
|
75 # 'integrate',
|
|
76 # 'rmsd', 'minimize_rmsd', 'find_root',
|
|
77 # 'Gamma', 'Dirichlet',
|
|
78 # 'decompose_log_odds_array',
|
|
79 'argmax', 'argmin'
|
|
80 )
|
|
81
|
|
82 from math import *
|
|
83 import random
|
|
84 from itertools import izip, count
|
|
85
|
|
86 # Some mathematical constants
|
|
87 euler_gamma = 0.57721566490153286060651
|
|
88 catalan = 0.91596559417721901505460
|
|
89 golden_ratio = 1.6180339887498948482046
|
|
90 bits_per_nat = 1.44269504088896340735992468100 # = log_2(e) = 1/log(2)
|
|
91 sqrt_2pi = 2.5066282746310005024157652848110
|
|
92
|
|
93
|
|
94
|
|
95
|
|
96
|
|
97 # The Lanczos approximation for the gamma function is
|
|
98 #
|
|
99 # -(z + g + 1/2) (z + 1/2)
|
|
100 # Gamma(z+1) = e * (z + g + 1/2) * Sqrt(2Pi) * C
|
|
101 #
|
|
102 #
|
|
103 # c[1] c[2] c[3]
|
|
104 # C = [c[0] + ----- + ----- + ----- + ... ]
|
|
105 # z + 1 z + 2 z + 3
|
|
106 #
|
|
107 #
|
|
108 # To calculate digamma and trigamma functions we take an analytic derivative
|
|
109 # of the Lanczos approximation.
|
|
110 #
|
|
111 # Gamma(z) = Gamma(z+1)/z
|
|
112 # Digamma(z) = D ln Gamma(z)
|
|
113 # Trigamma(z) = D Digamma(z)
|
|
114
|
|
115 # These Lanczos constants are from
|
|
116 # "A note on the computation of the convergent
|
|
117 # Lanczos complex Gamma approximation." Paul Godfrey (2001)
|
|
118 # http://my.fit.edu/~gabdo/gamma.txt
|
|
119
|
|
120
|
|
121 __lanczos_gamma = 607./128.
|
|
122 __lanczos_coefficients = (
|
|
123 0.99999999999999709182,
|
|
124 57.156235665862923517,
|
|
125 -59.597960355475491248,
|
|
126 14.136097974741747174,
|
|
127 -0.49191381609762019978,
|
|
128 .33994649984811888699e-4,
|
|
129 .46523628927048575665e-4,
|
|
130 -.98374475304879564677e-4,
|
|
131 .15808870322491248884e-3,
|
|
132 -.21026444172410488319e-3,
|
|
133 .21743961811521264320e-3,
|
|
134 -.16431810653676389022e-3,
|
|
135 .84418223983852743293e-4,
|
|
136 -.26190838401581408670e-4,
|
|
137 .36899182659531622704e-5)
|
|
138
|
|
139 __factorial =(
|
|
140 1.,
|
|
141 1.,
|
|
142 2.,
|
|
143 6.,
|
|
144 24.,
|
|
145 120.,
|
|
146 720.,
|
|
147 5040.,
|
|
148 40320.,
|
|
149 362880.,
|
|
150 3628800.,
|
|
151 39916800.,
|
|
152 479001600.,
|
|
153 6227020800.,
|
|
154 87178291200.,
|
|
155 1307674368000.,
|
|
156 20922789888000.,
|
|
157 355687428096000.,
|
|
158 6402373705728000.,
|
|
159 121645100408832000.,
|
|
160 2432902008176640000.,
|
|
161 51090942171709440000.,
|
|
162 1124000727777607680000.,
|
|
163 25852016738884976640000.,
|
|
164 620448401733239439360000.,
|
|
165 15511210043330985984000000.,
|
|
166 403291461126605635584000000.,
|
|
167 10888869450418352160768000000.,
|
|
168 304888344611713860501504000000.,
|
|
169 8841761993739701954543616000000.,
|
|
170 265252859812191058636308480000000.,
|
|
171 8222838654177922817725562880000000.,
|
|
172 263130836933693530167218012160000000. )
|
|
173
|
|
174 def gamma(z) :
|
|
175 """The gamma function. Returns exact results for small integers. Will
|
|
176 overflow for modest sized arguments. Use lngamma(z) instead.
|
|
177
|
|
178 See: Eric W. Weisstein. "Gamma Function." From MathWorld, A Wolfram Web Resource.
|
|
179 http://mathworld.wolfram.com/GammaFunction.html
|
|
180
|
|
181 """
|
|
182
|
|
183 n = floor(z)
|
|
184 if n == z :
|
|
185 if z <= 0 :
|
|
186 return 1.0/0.0 # Infinity
|
|
187 elif n <= len(__factorial) :
|
|
188 return __factorial[int(n)-1]
|
|
189
|
|
190 zz = z
|
|
191 if z < 0.5 :
|
|
192 zz = 1-z
|
|
193
|
|
194
|
|
195 g = __lanczos_gamma
|
|
196 c = __lanczos_coefficients
|
|
197
|
|
198 zz = zz - 1.
|
|
199 zh = zz + 0.5
|
|
200 zgh = zh + g
|
|
201 zp = zgh** (zh*0.5) # trick for avoiding FP overflow above z=141
|
|
202
|
|
203 ss = 0.0
|
|
204 for k in range(len(c)-1,0,-1):
|
|
205 ss += c[k]/(zz+k)
|
|
206
|
|
207 f = (sqrt_2pi*(c[0]+ss)) * (( zp*exp(-zgh)) *zp)
|
|
208
|
|
209 if z<0.5 :
|
|
210 f = pi /( sin(pi*z) *f)
|
|
211
|
|
212 return f
|
|
213
|
|
214
|
|
215 def lngamma(z) :
|
|
216 """The logarithm of the gamma function.
|
|
217 """
|
|
218
|
|
219 # common case optimization
|
|
220
|
|
221 n = floor(z)
|
|
222 if n == z :
|
|
223 if z <= 0 :
|
|
224 return 1.0/0.0 # Infinity
|
|
225 elif n <= len(__factorial) :
|
|
226 return __factorial[int(n)-1]
|
|
227
|
|
228 zz = z
|
|
229 if z < 0.5 :
|
|
230 zz = 1-z
|
|
231
|
|
232
|
|
233 g = __lanczos_gamma
|
|
234 c = __lanczos_coefficients
|
|
235
|
|
236 zz = zz - 1.
|
|
237 zh = zz + 0.5
|
|
238 zgh = zh + g
|
|
239 zp = zgh** (zh*0.5) # trick for avoiding FP overflow above z=141
|
|
240
|
|
241 ss = 0.0
|
|
242 for k in range(len(c)-1,0,-1):
|
|
243 ss += c[k]/(zz+k)
|
|
244
|
|
245 f = (sqrt_2pi*(c[0]+ss)) * (( zp*exp(-zgh)) *zp)
|
|
246
|
|
247 if z<0.5 :
|
|
248 f = pi /( sin(pi*z) *f)
|
|
249
|
|
250 return log(f)
|
|
251
|
|
252
|
|
253 def factorial(z) :
|
|
254 """ The factorial function.
|
|
255 factorial(z) == gamma(z+1)
|
|
256 """
|
|
257 return gamma(z+1)
|
|
258
|
|
259
|
|
260 def digamma(z) :
|
|
261 """The digamma function, the logarithmic derivative of the gamma function.
|
|
262 digamma(z) = d/dz ln( gamma(z) )
|
|
263
|
|
264 See: Eric W. Weisstein. "Digamma Function." From MathWorld--
|
|
265 A Wolfram Web Resource. http://mathworld.wolfram.com/DigammaFunction.html
|
|
266 """
|
|
267
|
|
268 g = __lanczos_gamma
|
|
269 c = __lanczos_coefficients
|
|
270
|
|
271 zz = z
|
|
272 if z < 0.5 :
|
|
273 zz = 1 -z
|
|
274
|
|
275 n=0.
|
|
276 d=0.
|
|
277 for k in range(len(c)-1,0,-1):
|
|
278 dz =1./(zz+(k+1)-2);
|
|
279 dd =c[k] * dz
|
|
280 d = d + dd
|
|
281 n = n - dd * dz
|
|
282
|
|
283 d = d + c[0]
|
|
284 gg = zz + g - 0.5
|
|
285 f = log(gg) + (n/d - g/gg)
|
|
286
|
|
287 if z<0.5 :
|
|
288 f -= pi / tan( pi * z)
|
|
289
|
|
290 return f
|
|
291
|
|
292
|
|
293 def trigamma(z) :
|
|
294 """The trigamma function, the derivative of the digamma function.
|
|
295 trigamma(z) = d/dz digamma(z) = d/dz d/dz ln( gamma(z) )
|
|
296
|
|
297 See: Eric W. Weisstein. "Digamma Function." From MathWorld--
|
|
298 A Wolfram Web Resource. http://mathworld.wolfram.com/TrigammaFunction.html
|
|
299 """
|
|
300
|
|
301 g = __lanczos_gamma
|
|
302 c = __lanczos_coefficients
|
|
303
|
|
304 t1=0.
|
|
305 t2=0.
|
|
306 t3=0.
|
|
307 for k in range(len(c)-1,0,-1):
|
|
308 dz =1./(z+k);
|
|
309 dd1 = c[k]* dz
|
|
310 t1 += dd1
|
|
311 dd2 = dd1 * dz
|
|
312 t2 += dd2
|
|
313 t3 += dd2 * dz
|
|
314
|
|
315 t1 += c[0]
|
|
316 c = - (t2*t2)/(t1*t1) +2*t3/t1
|
|
317
|
|
318 result = 1./(z*z)
|
|
319 gg = z + g + 0.5
|
|
320 result += - (z+0.5)/ (gg*gg)
|
|
321 result += 2./gg
|
|
322
|
|
323 result += c
|
|
324
|
|
325 return result
|
|
326
|
|
327 def incomplete_gamma(a,x) :
|
|
328 """The 'upper' incomplete gamma function:
|
|
329
|
|
330 oo
|
|
331 -
|
|
332 | -t a-1
|
|
333 incomplete_gamma(a,x) = | e t dt.
|
|
334 |
|
|
335 -
|
|
336 x
|
|
337
|
|
338 In Mathematica, Gamma[a,x].
|
|
339
|
|
340 Note that, very confusingly, the phrase 'incomplete gamma fucntion'
|
|
341 can also refer to the same integral between 0 and x, (the 'lower'
|
|
342 incomplete gamma function) or to the normalized versions,
|
|
343 normalized_incomplete_gamma() )
|
|
344
|
|
345
|
|
346 See: Eric W. Weisstein. "Gamma Function." From MathWorld, A Wolfram Web Resource.
|
|
347 http://mathworld.wolfram.com/IncompleteGammaFunction.html
|
|
348
|
|
349 Bugs :
|
|
350 This implentation is not very accurate for some arguments.
|
|
351 """
|
|
352 return normalized_incomplete_gamma(a,x) * gamma(a)
|
|
353
|
|
354
|
|
355 def normalized_incomplete_gamma(a,x) :
|
|
356 """The upper, incomplete gamma function normalized so that the limiting
|
|
357 values are zero and one.
|
|
358
|
|
359 Q(a,x) = incomplete_gamma(a,x) / gamma(a)
|
|
360
|
|
361 See:
|
|
362 incomplete_gamma()
|
|
363 Bugs :
|
|
364 This implentation is not very accurate for some arguments.
|
|
365 """
|
|
366 maxiter = 100
|
|
367 epsilon = 1.48e-8
|
|
368 small = 1e-30
|
|
369
|
|
370
|
|
371 if a<=0 or x<0 :
|
|
372 raise ValueError("Invalid arguments")
|
|
373 if x == 0.0 : return 1.0
|
|
374
|
|
375 if x<= a+1 :
|
|
376 # Use the series representation
|
|
377 term = 1./a
|
|
378 total = term
|
|
379 for n in range(1,maxiter) :
|
|
380 term *= x/(a+n)
|
|
381 total += term
|
|
382 if abs(term/total) < epsilon :
|
|
383 return 1. - total * exp(-x+a*log(x) - lngamma(a) )
|
|
384 raise RuntimeError(
|
|
385 "Failed to converge after %d iterations." % (maxiter) )
|
|
386 else :
|
|
387 # Use the continued fraction representation
|
|
388 total = 1.0
|
|
389 b = x + 1. -a
|
|
390 c = 1./small
|
|
391 d = 1./b
|
|
392 h = d
|
|
393 for i in range(1, maxiter) :
|
|
394 an = -i * (i-a)
|
|
395 b = b+2.
|
|
396 d = an * d + b
|
|
397 if abs(d) < small : d = small
|
|
398 c = b + an /c
|
|
399 if abs(c) < small : c= small
|
|
400 d = 1./d
|
|
401 term = d * c
|
|
402 h = h * term
|
|
403 if abs( term-1.) < epsilon :
|
|
404 return h * exp(-x+a*log(x) - lngamma(a) )
|
|
405 raise RuntimeError(
|
|
406 "Failed to converge after %d iterations." % (maxiter) )
|
|
407
|
|
408
|
|
409
|
|
410 def log2( x) :
|
|
411 """ Return the base 2 logarithm of x """
|
|
412 return log(x,2)
|
|
413
|
|
414
|
|
415 def entropy( pvec, base= exp(1) ) :
|
|
416 """ The entropy S = -Sum_i p_i ln p_i
|
|
417 pvec is a frequency vector, not necessarily normalized.
|
|
418 """
|
|
419 # TODO: Optimize
|
|
420 if len(pvec) ==0 :
|
|
421 raise ValueError("Zero length vector")
|
|
422
|
|
423
|
|
424 total = 0.0
|
|
425 ent = 0.0
|
|
426 for p in pvec:
|
|
427 if p>0 : # 0 log(0) =0
|
|
428 total += p
|
|
429 ent += - log(float(p)) *p
|
|
430 elif p<0:
|
|
431 raise ValueError("Negative probability")
|
|
432
|
|
433
|
|
434 ent = (ent/total) + log(total)
|
|
435 ent /= log(base)
|
|
436
|
|
437 return ent
|
|
438
|
|
439
|
|
440
|
|
441
|
|
442
|
|
443 def argmax( alist) :
|
|
444 """Return the index of the last occurance of the maximum value in the list."""
|
|
445 return max(izip(alist, count() ))[1]
|
|
446
|
|
447 def argmin( alist) :
|
|
448 """Return the index of the first occurance of the minimum value in the list."""
|
|
449 return min(izip(alist, count() ))[1]
|
|
450
|
|
451
|
|
452
|
|
453
|
|
454
|
|
455
|