codonlogo: corebio/moremath.py comparison

comparison corebio/moremath.py @ 7:8d676bbd1f2d

Uploaded

author	davidmurphy
date	Mon, 16 Jan 2012 07:03:36 -0500
parents	c55bdc2fb9fa
children

comparison

equal deleted inserted replaced

-:4a4aca3d57c9
+:8d676bbd1f2d
+#!/usr/bin/env python
+#  Copyright (c) 2005 Gavin E. Crooks <gec@threeplusone.com>
+#
+#  This software is distributed under the MIT Open Source License.
+#  <http://www.opensource.org/licenses/mit-license.html>
+#
+#  Permission is hereby granted, free of charge, to any person obtaining a
+#  copy of this software and associated documentation files (the "Software"),
+#  to deal in the Software without restriction, including without limitation
+#  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+#  and/or sell copies of the Software, and to permit persons to whom the
+#  Software is furnished to do so, subject to the following conditions:
+#
+#  The above copyright notice and this permission notice shall be included
+#  in all copies or substantial portions of the Software.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+#  THE SOFTWARE.
+#
+""" Various bits of useful math not in the standard python library.
+Constants :
+- euler_gamma  = 0.577215...
+- catalan      = 0.915965...
+- golden_ratio = 1.618033...
+- bits_per_nat = log2(e) = 1/log(2)
+- sqrt_2pi     = 2.50662...
+Special Functions :
+- gamma()                       -- Gamma function.
+- lngamma()                     -- Logarithm of the gamma function
+- factorial()                   -- The factorial function.
+- digamma()                     -- Digamma function (logarithmic derivative of gamma).
+- trigamma()                    -- Trigamma function (derivative of digamma).
+- entropy()                     -- The entropy of a probability vector
+- incomplete_gamma()            -- The 'upper' incomplete gamma function.
+- normalized_incomplete_gamma() --
+- lg()                          -- Base 2 logarithms.
+Vector Operations :
+- rmsd()            -- Root mean squared deviation of two point vectors
+- minimize_rmsd()   -- Find the rigid transformation that minimized the
+RMSD between two vectors of points.
+Minimization :
+- find_root()          -- 1d root finding
+Probability Distributions :
+- Gamma
+- Dirichlet
+- Multinomial
+- Gaussian
+"""
+__all__ = ('euler_gamma', 'catalan', 'golden_ratio', 'bits_per_nat', 'sqrt_2pi',
+'gamma', 'lngamma', 'factorial', 'digamma', 'trigamma',
+'entropy', 'log2',
+'incomplete_gamma', 'normalized_incomplete_gamma',
+#    'integrate',
+#     'rmsd', 'minimize_rmsd', 'find_root',
+#     'Gamma', 'Dirichlet',
+#      'decompose_log_odds_array',
+'argmax', 'argmin'
+)
+from math import *
+import random
+from itertools import izip, count
+# Some mathematical constants
+euler_gamma  = 0.57721566490153286060651
+catalan      = 0.91596559417721901505460
+golden_ratio = 1.6180339887498948482046
+bits_per_nat = 1.44269504088896340735992468100 # = log_2(e) = 1/log(2)
+sqrt_2pi     = 2.5066282746310005024157652848110
+# The Lanczos approximation for the gamma function is
+#
+#                -(z + g + 1/2)               (z + 1/2)
+# Gamma(z+1) =  e               * (z + g + 1/2)        * Sqrt(2Pi) * C
+#
+#
+#                      c[1]    c[2]    c[3]
+#          C = [c[0] + ----- + ----- + ----- + ...   ]
+#                      z + 1   z + 2   z + 3
+#
+#
+#  To calculate digamma and trigamma functions we take an analytic derivative
+#  of the Lanczos approximation.
+#
+#  Gamma(z)  = Gamma(z+1)/z
+#  Digamma(z) = D ln Gamma(z)
+#  Trigamma(z) = D Digamma(z)
+# These Lanczos constants are from
+# "A note on the computation of the convergent
+# Lanczos complex Gamma approximation." Paul Godfrey (2001)
+# http://my.fit.edu/~gabdo/gamma.txt
+__lanczos_gamma = 607./128.
+__lanczos_coefficients = (
+0.99999999999999709182,
+57.156235665862923517,
+-59.597960355475491248,
+14.136097974741747174,
+-0.49191381609762019978,
+.33994649984811888699e-4,
+.46523628927048575665e-4,
+-.98374475304879564677e-4,
+.15808870322491248884e-3,
+-.21026444172410488319e-3,
+.21743961811521264320e-3,
+-.16431810653676389022e-3,
+.84418223983852743293e-4,
+-.26190838401581408670e-4,
+.36899182659531622704e-5)
+__factorial =(
+1.,
+1.,
+2.,
+6.,
+24.,
+120.,
+720.,
+5040.,
+40320.,
+362880.,
+3628800.,
+39916800.,
+479001600.,
+6227020800.,
+87178291200.,
+1307674368000.,
+20922789888000.,
+355687428096000.,
+6402373705728000.,
+121645100408832000.,
+2432902008176640000.,
+51090942171709440000.,
+1124000727777607680000.,
+25852016738884976640000.,
+620448401733239439360000.,
+15511210043330985984000000.,
+403291461126605635584000000.,
+10888869450418352160768000000.,
+304888344611713860501504000000.,
+8841761993739701954543616000000.,
+265252859812191058636308480000000.,
+8222838654177922817725562880000000.,
+263130836933693530167218012160000000. )
+def gamma(z) :
+"""The gamma function. Returns exact results for small integers. Will
+overflow for modest sized arguments. Use lngamma(z) instead.
+See: Eric W. Weisstein. "Gamma Function." From MathWorld, A Wolfram Web Resource.
+http://mathworld.wolfram.com/GammaFunction.html
+"""
+n = floor(z)
+if  n == z  :
+if z <= 0 :
+return 1.0/0.0 # Infinity
+elif n <= len(__factorial) :
+return __factorial[int(n)-1]
+zz = z
+if z < 0.5 :
+zz = 1-z
+g = __lanczos_gamma
+c = __lanczos_coefficients
+zz = zz - 1.
+zh = zz + 0.5
+zgh = zh + g
+zp = zgh** (zh*0.5) # trick for avoiding FP overflow above z=141
+ss = 0.0
+for k in range(len(c)-1,0,-1):
+ss += c[k]/(zz+k)
+f = (sqrt_2pi*(c[0]+ss)) * (( zp*exp(-zgh)) *zp)
+if z<0.5 :
+f  = pi /( sin(pi*z) *f)
+return f
+def lngamma(z) :
+"""The logarithm of the gamma function.
+"""
+# common case optimization
+n = floor(z)
+if  n == z  :
+if z <= 0 :
+return 1.0/0.0 # Infinity
+elif n <= len(__factorial) :
+return __factorial[int(n)-1]
+zz = z
+if z < 0.5 :
+zz = 1-z
+g = __lanczos_gamma
+c = __lanczos_coefficients
+zz = zz - 1.
+zh = zz + 0.5
+zgh = zh + g
+zp = zgh** (zh*0.5) # trick for avoiding FP overflow above z=141
+ss = 0.0
+for k in range(len(c)-1,0,-1):
+ss += c[k]/(zz+k)
+f = (sqrt_2pi*(c[0]+ss)) * (( zp*exp(-zgh)) *zp)
+if z<0.5 :
+f  = pi /( sin(pi*z) *f)
+return log(f)
+def factorial(z) :
+""" The factorial function.
+factorial(z) == gamma(z+1)
+"""
+return gamma(z+1)
+def digamma(z) :
+"""The digamma function, the logarithmic derivative of the gamma function.
+digamma(z) = d/dz ln( gamma(z) )
+See: Eric W. Weisstein. "Digamma Function." From MathWorld--
+A Wolfram Web Resource. http://mathworld.wolfram.com/DigammaFunction.html
+"""
+g = __lanczos_gamma
+c = __lanczos_coefficients
+zz = z
+if z < 0.5 :
+zz = 1 -z
+n=0.
+d=0.
+for k in range(len(c)-1,0,-1):
+dz =1./(zz+(k+1)-2);
+dd =c[k] * dz
+d = d + dd
+n = n - dd * dz
+d = d + c[0]
+gg = zz + g - 0.5
+f = log(gg) + (n/d - g/gg)
+if z<0.5 :
+f -= pi / tan( pi * z)
+return f
+def trigamma(z) :
+"""The trigamma function, the derivative of the digamma function.
+trigamma(z) = d/dz digamma(z) = d/dz d/dz ln( gamma(z) )
+See: Eric W. Weisstein. "Digamma Function." From MathWorld--
+A Wolfram Web Resource. http://mathworld.wolfram.com/TrigammaFunction.html
+"""
+g = __lanczos_gamma
+c = __lanczos_coefficients
+t1=0.
+t2=0.
+t3=0.
+for k in range(len(c)-1,0,-1):
+dz =1./(z+k);
+dd1 = c[k]* dz
+t1 += dd1
+dd2 = dd1 * dz
+t2 += dd2
+t3 += dd2 * dz
+t1 += c[0]
+c =  - (t2*t2)/(t1*t1)  +2*t3/t1
+result = 1./(z*z)
+gg = z + g + 0.5
+result += - (z+0.5)/ (gg*gg)
+result += 2./gg
+result += c
+return result
+def incomplete_gamma(a,x) :
+"""The 'upper' incomplete gamma function:
+oo
+-
+|    -t  a-1
+incomplete_gamma(a,x) = |   e   t   dt.
+|
+-
+x
+In Mathematica, Gamma[a,x].
+Note that, very confusingly, the phrase 'incomplete gamma fucntion'
+can also refer to the same integral between 0 and x, (the 'lower'
+incomplete gamma function) or to the normalized versions,
+normalized_incomplete_gamma() )
+See: Eric W. Weisstein. "Gamma Function." From MathWorld, A Wolfram Web Resource.
+http://mathworld.wolfram.com/IncompleteGammaFunction.html
+Bugs :
+This implentation is not very accurate for some arguments.
+"""
+return  normalized_incomplete_gamma(a,x) * gamma(a)
+def normalized_incomplete_gamma(a,x) :
+"""The upper, incomplete gamma function normalized so that the limiting
+values are zero and one.
+Q(a,x) = incomplete_gamma(a,x) / gamma(a)
+See:
+incomplete_gamma()
+Bugs :
+This implentation is not very accurate for some arguments.
+"""
+maxiter = 100
+epsilon = 1.48e-8
+small = 1e-30
+if a<=0 or x<0 :
+raise ValueError("Invalid arguments")
+if x == 0.0 : return 1.0
+if x<= a+1 :
+# Use the series representation
+term = 1./a
+total = term
+for n in range(1,maxiter) :
+term *= x/(a+n)
+total += term
+if abs(term/total) < epsilon :
+return 1. - total * exp(-x+a*log(x) - lngamma(a) )
+raise RuntimeError(
+"Failed to converge after %d iterations." % (maxiter) )
+else :
+# Use the continued fraction representation
+total = 1.0
+b = x + 1. -a
+c = 1./small
+d = 1./b
+h = d
+for i in range(1, maxiter) :
+an = -i * (i-a)
+b = b+2.
+d = an * d + b
+if abs(d) < small : d = small
+c = b + an /c
+if abs(c) < small : c= small
+d = 1./d
+term = d * c
+h = h * term
+if abs( term-1.) < epsilon :
+return h * exp(-x+a*log(x) - lngamma(a) )
+raise RuntimeError(
+"Failed to converge after %d iterations." % (maxiter) )
+def log2( x) :
+""" Return the base 2 logarithm of x """
+return log(x,2)
+def entropy( pvec, base= exp(1) ) :
+""" The entropy S = -Sum_i p_i ln p_i
+pvec is a frequency vector, not necessarily normalized.
+"""
+# TODO: Optimize
+if len(pvec) ==0 :
+raise ValueError("Zero length vector")
+total = 0.0
+ent = 0.0
+for p in pvec:
+if p>0 : # 0 log(0) =0
+total += p
+ent += - log(float(p)) *p
+elif p<0:
+raise ValueError("Negative probability")
+ent = (ent/total) + log(total)
+ent /= log(base)
+return ent
+def argmax( alist) :
+"""Return the index of the last occurance of the maximum value in the list."""
+return max(izip(alist, count() ))[1]
+def argmin( alist) :
+"""Return the index of the first occurance of the minimum value in the list."""
+return min(izip(alist, count() ))[1]

Mercurial > repos > davidmurphy > codonlogo

comparison corebio/moremath.py @ 7:8d676bbd1f2d