Mercurial > repos > davidmurphy > codonlogo
diff corebio/moremath.py @ 7:8d676bbd1f2d
Uploaded
author | davidmurphy |
---|---|
date | Mon, 16 Jan 2012 07:03:36 -0500 |
parents | c55bdc2fb9fa |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/corebio/moremath.py Mon Jan 16 07:03:36 2012 -0500 @@ -0,0 +1,455 @@ +#!/usr/bin/env python + +# Copyright (c) 2005 Gavin E. Crooks <gec@threeplusone.com> +# +# This software is distributed under the MIT Open Source License. +# <http://www.opensource.org/licenses/mit-license.html> +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# + + +""" Various bits of useful math not in the standard python library. + +Constants : + +- euler_gamma = 0.577215... +- catalan = 0.915965... +- golden_ratio = 1.618033... +- bits_per_nat = log2(e) = 1/log(2) +- sqrt_2pi = 2.50662... + +Special Functions : + +- gamma() -- Gamma function. +- lngamma() -- Logarithm of the gamma function +- factorial() -- The factorial function. +- digamma() -- Digamma function (logarithmic derivative of gamma). +- trigamma() -- Trigamma function (derivative of digamma). +- entropy() -- The entropy of a probability vector +- incomplete_gamma() -- The 'upper' incomplete gamma function. +- normalized_incomplete_gamma() -- +- lg() -- Base 2 logarithms. + + +Vector Operations : + +- rmsd() -- Root mean squared deviation of two point vectors +- minimize_rmsd() -- Find the rigid transformation that minimized the + RMSD between two vectors of points. + +Minimization : + +- find_root() -- 1d root finding + +Probability Distributions : +- Gamma +- Dirichlet +- Multinomial +- Gaussian + +""" + + + +__all__ = ('euler_gamma', 'catalan', 'golden_ratio', 'bits_per_nat', 'sqrt_2pi', + 'gamma', 'lngamma', 'factorial', 'digamma', 'trigamma', + 'entropy', 'log2', + 'incomplete_gamma', 'normalized_incomplete_gamma', + # 'integrate', + # 'rmsd', 'minimize_rmsd', 'find_root', + # 'Gamma', 'Dirichlet', + # 'decompose_log_odds_array', + 'argmax', 'argmin' + ) + +from math import * +import random +from itertools import izip, count + +# Some mathematical constants +euler_gamma = 0.57721566490153286060651 +catalan = 0.91596559417721901505460 +golden_ratio = 1.6180339887498948482046 +bits_per_nat = 1.44269504088896340735992468100 # = log_2(e) = 1/log(2) +sqrt_2pi = 2.5066282746310005024157652848110 + + + + + +# The Lanczos approximation for the gamma function is +# +# -(z + g + 1/2) (z + 1/2) +# Gamma(z+1) = e * (z + g + 1/2) * Sqrt(2Pi) * C +# +# +# c[1] c[2] c[3] +# C = [c[0] + ----- + ----- + ----- + ... ] +# z + 1 z + 2 z + 3 +# +# +# To calculate digamma and trigamma functions we take an analytic derivative +# of the Lanczos approximation. +# +# Gamma(z) = Gamma(z+1)/z +# Digamma(z) = D ln Gamma(z) +# Trigamma(z) = D Digamma(z) + +# These Lanczos constants are from +# "A note on the computation of the convergent +# Lanczos complex Gamma approximation." Paul Godfrey (2001) +# http://my.fit.edu/~gabdo/gamma.txt + + +__lanczos_gamma = 607./128. +__lanczos_coefficients = ( + 0.99999999999999709182, + 57.156235665862923517, + -59.597960355475491248, + 14.136097974741747174, + -0.49191381609762019978, + .33994649984811888699e-4, + .46523628927048575665e-4, + -.98374475304879564677e-4, + .15808870322491248884e-3, + -.21026444172410488319e-3, + .21743961811521264320e-3, + -.16431810653676389022e-3, + .84418223983852743293e-4, + -.26190838401581408670e-4, + .36899182659531622704e-5) + +__factorial =( + 1., + 1., + 2., + 6., + 24., + 120., + 720., + 5040., + 40320., + 362880., + 3628800., + 39916800., + 479001600., + 6227020800., + 87178291200., + 1307674368000., + 20922789888000., + 355687428096000., + 6402373705728000., + 121645100408832000., + 2432902008176640000., + 51090942171709440000., + 1124000727777607680000., + 25852016738884976640000., + 620448401733239439360000., + 15511210043330985984000000., + 403291461126605635584000000., + 10888869450418352160768000000., + 304888344611713860501504000000., + 8841761993739701954543616000000., + 265252859812191058636308480000000., + 8222838654177922817725562880000000., + 263130836933693530167218012160000000. ) + +def gamma(z) : + """The gamma function. Returns exact results for small integers. Will + overflow for modest sized arguments. Use lngamma(z) instead. + + See: Eric W. Weisstein. "Gamma Function." From MathWorld, A Wolfram Web Resource. + http://mathworld.wolfram.com/GammaFunction.html + + """ + + n = floor(z) + if n == z : + if z <= 0 : + return 1.0/0.0 # Infinity + elif n <= len(__factorial) : + return __factorial[int(n)-1] + + zz = z + if z < 0.5 : + zz = 1-z + + + g = __lanczos_gamma + c = __lanczos_coefficients + + zz = zz - 1. + zh = zz + 0.5 + zgh = zh + g + zp = zgh** (zh*0.5) # trick for avoiding FP overflow above z=141 + + ss = 0.0 + for k in range(len(c)-1,0,-1): + ss += c[k]/(zz+k) + + f = (sqrt_2pi*(c[0]+ss)) * (( zp*exp(-zgh)) *zp) + + if z<0.5 : + f = pi /( sin(pi*z) *f) + + return f + + +def lngamma(z) : + """The logarithm of the gamma function. + """ + + # common case optimization + + n = floor(z) + if n == z : + if z <= 0 : + return 1.0/0.0 # Infinity + elif n <= len(__factorial) : + return __factorial[int(n)-1] + + zz = z + if z < 0.5 : + zz = 1-z + + + g = __lanczos_gamma + c = __lanczos_coefficients + + zz = zz - 1. + zh = zz + 0.5 + zgh = zh + g + zp = zgh** (zh*0.5) # trick for avoiding FP overflow above z=141 + + ss = 0.0 + for k in range(len(c)-1,0,-1): + ss += c[k]/(zz+k) + + f = (sqrt_2pi*(c[0]+ss)) * (( zp*exp(-zgh)) *zp) + + if z<0.5 : + f = pi /( sin(pi*z) *f) + + return log(f) + + +def factorial(z) : + """ The factorial function. + factorial(z) == gamma(z+1) + """ + return gamma(z+1) + + +def digamma(z) : + """The digamma function, the logarithmic derivative of the gamma function. + digamma(z) = d/dz ln( gamma(z) ) + + See: Eric W. Weisstein. "Digamma Function." From MathWorld-- + A Wolfram Web Resource. http://mathworld.wolfram.com/DigammaFunction.html + """ + + g = __lanczos_gamma + c = __lanczos_coefficients + + zz = z + if z < 0.5 : + zz = 1 -z + + n=0. + d=0. + for k in range(len(c)-1,0,-1): + dz =1./(zz+(k+1)-2); + dd =c[k] * dz + d = d + dd + n = n - dd * dz + + d = d + c[0] + gg = zz + g - 0.5 + f = log(gg) + (n/d - g/gg) + + if z<0.5 : + f -= pi / tan( pi * z) + + return f + + +def trigamma(z) : + """The trigamma function, the derivative of the digamma function. + trigamma(z) = d/dz digamma(z) = d/dz d/dz ln( gamma(z) ) + + See: Eric W. Weisstein. "Digamma Function." From MathWorld-- + A Wolfram Web Resource. http://mathworld.wolfram.com/TrigammaFunction.html + """ + + g = __lanczos_gamma + c = __lanczos_coefficients + + t1=0. + t2=0. + t3=0. + for k in range(len(c)-1,0,-1): + dz =1./(z+k); + dd1 = c[k]* dz + t1 += dd1 + dd2 = dd1 * dz + t2 += dd2 + t3 += dd2 * dz + + t1 += c[0] + c = - (t2*t2)/(t1*t1) +2*t3/t1 + + result = 1./(z*z) + gg = z + g + 0.5 + result += - (z+0.5)/ (gg*gg) + result += 2./gg + + result += c + + return result + +def incomplete_gamma(a,x) : + """The 'upper' incomplete gamma function: + + oo + - + | -t a-1 + incomplete_gamma(a,x) = | e t dt. + | + - + x + + In Mathematica, Gamma[a,x]. + + Note that, very confusingly, the phrase 'incomplete gamma fucntion' + can also refer to the same integral between 0 and x, (the 'lower' + incomplete gamma function) or to the normalized versions, + normalized_incomplete_gamma() ) + + + See: Eric W. Weisstein. "Gamma Function." From MathWorld, A Wolfram Web Resource. + http://mathworld.wolfram.com/IncompleteGammaFunction.html + + Bugs : + This implentation is not very accurate for some arguments. + """ + return normalized_incomplete_gamma(a,x) * gamma(a) + + +def normalized_incomplete_gamma(a,x) : + """The upper, incomplete gamma function normalized so that the limiting + values are zero and one. + + Q(a,x) = incomplete_gamma(a,x) / gamma(a) + + See: + incomplete_gamma() + Bugs : + This implentation is not very accurate for some arguments. + """ + maxiter = 100 + epsilon = 1.48e-8 + small = 1e-30 + + + if a<=0 or x<0 : + raise ValueError("Invalid arguments") + if x == 0.0 : return 1.0 + + if x<= a+1 : + # Use the series representation + term = 1./a + total = term + for n in range(1,maxiter) : + term *= x/(a+n) + total += term + if abs(term/total) < epsilon : + return 1. - total * exp(-x+a*log(x) - lngamma(a) ) + raise RuntimeError( + "Failed to converge after %d iterations." % (maxiter) ) + else : + # Use the continued fraction representation + total = 1.0 + b = x + 1. -a + c = 1./small + d = 1./b + h = d + for i in range(1, maxiter) : + an = -i * (i-a) + b = b+2. + d = an * d + b + if abs(d) < small : d = small + c = b + an /c + if abs(c) < small : c= small + d = 1./d + term = d * c + h = h * term + if abs( term-1.) < epsilon : + return h * exp(-x+a*log(x) - lngamma(a) ) + raise RuntimeError( + "Failed to converge after %d iterations." % (maxiter) ) + + + +def log2( x) : + """ Return the base 2 logarithm of x """ + return log(x,2) + + +def entropy( pvec, base= exp(1) ) : + """ The entropy S = -Sum_i p_i ln p_i + pvec is a frequency vector, not necessarily normalized. + """ + # TODO: Optimize + if len(pvec) ==0 : + raise ValueError("Zero length vector") + + + total = 0.0 + ent = 0.0 + for p in pvec: + if p>0 : # 0 log(0) =0 + total += p + ent += - log(float(p)) *p + elif p<0: + raise ValueError("Negative probability") + + + ent = (ent/total) + log(total) + ent /= log(base) + + return ent + + + + + +def argmax( alist) : + """Return the index of the last occurance of the maximum value in the list.""" + return max(izip(alist, count() ))[1] + +def argmin( alist) : + """Return the index of the first occurance of the minimum value in the list.""" + return min(izip(alist, count() ))[1] + + + + + +