diff corebio/moremath.py @ 7:8d676bbd1f2d

Uploaded
author davidmurphy
date Mon, 16 Jan 2012 07:03:36 -0500
parents c55bdc2fb9fa
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/corebio/moremath.py	Mon Jan 16 07:03:36 2012 -0500
@@ -0,0 +1,455 @@
+#!/usr/bin/env python
+  
+#  Copyright (c) 2005 Gavin E. Crooks <gec@threeplusone.com>
+#
+#  This software is distributed under the MIT Open Source License.
+#  <http://www.opensource.org/licenses/mit-license.html>
+#
+#  Permission is hereby granted, free of charge, to any person obtaining a 
+#  copy of this software and associated documentation files (the "Software"),
+#  to deal in the Software without restriction, including without limitation
+#  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+#  and/or sell copies of the Software, and to permit persons to whom the
+#  Software is furnished to do so, subject to the following conditions:
+#
+#  The above copyright notice and this permission notice shall be included
+#  in all copies or substantial portions of the Software.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
+#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
+#  THE SOFTWARE.
+#
+
+
+""" Various bits of useful math not in the standard python library.
+
+Constants :
+
+- euler_gamma  = 0.577215...
+- catalan      = 0.915965...
+- golden_ratio = 1.618033...
+- bits_per_nat = log2(e) = 1/log(2) 
+- sqrt_2pi     = 2.50662...
+    
+Special Functions :
+
+- gamma()                       -- Gamma function.
+- lngamma()                     -- Logarithm of the gamma function
+- factorial()                   -- The factorial function.
+- digamma()                     -- Digamma function (logarithmic derivative of gamma).
+- trigamma()                    -- Trigamma function (derivative of digamma).
+- entropy()                     -- The entropy of a probability vector
+- incomplete_gamma()            -- The 'upper' incomplete gamma function.
+- normalized_incomplete_gamma() -- 
+- lg()                          -- Base 2 logarithms.
+
+
+Vector Operations :
+
+- rmsd()            -- Root mean squared deviation of two point vectors
+- minimize_rmsd()   -- Find the rigid transformation that minimized the 
+                            RMSD between two vectors of points. 
+    
+Minimization :
+
+- find_root()          -- 1d root finding
+
+Probability Distributions :
+- Gamma
+- Dirichlet
+- Multinomial
+- Gaussian
+
+"""
+
+
+
+__all__ = ('euler_gamma', 'catalan', 'golden_ratio', 'bits_per_nat', 'sqrt_2pi',
+            'gamma', 'lngamma', 'factorial', 'digamma', 'trigamma',
+            'entropy', 'log2',            
+            'incomplete_gamma', 'normalized_incomplete_gamma', 
+        #    'integrate',
+       #     'rmsd', 'minimize_rmsd', 'find_root',
+       #     'Gamma', 'Dirichlet', 
+      #      'decompose_log_odds_array',
+            'argmax', 'argmin'
+            )            
+
+from math import *
+import random
+from itertools import izip, count
+
+# Some mathematical constants
+euler_gamma  = 0.57721566490153286060651
+catalan      = 0.91596559417721901505460
+golden_ratio = 1.6180339887498948482046
+bits_per_nat = 1.44269504088896340735992468100 # = log_2(e) = 1/log(2) 
+sqrt_2pi     = 2.5066282746310005024157652848110
+
+
+
+
+
+# The Lanczos approximation for the gamma function is
+#                          
+#                -(z + g + 1/2)               (z + 1/2)                   
+# Gamma(z+1) =  e               * (z + g + 1/2)        * Sqrt(2Pi) * C 
+#                                                                        
+#
+#                      c[1]    c[2]    c[3]
+#          C = [c[0] + ----- + ----- + ----- + ...   ]
+#                      z + 1   z + 2   z + 3 
+#
+#
+#  To calculate digamma and trigamma functions we take an analytic derivative
+#  of the Lanczos approximation.
+#
+#  Gamma(z)  = Gamma(z+1)/z
+#  Digamma(z) = D ln Gamma(z)
+#  Trigamma(z) = D Digamma(z)         
+
+# These Lanczos constants are from
+# "A note on the computation of the convergent
+# Lanczos complex Gamma approximation." Paul Godfrey (2001)
+# http://my.fit.edu/~gabdo/gamma.txt
+
+
+__lanczos_gamma = 607./128.
+__lanczos_coefficients = ( 
+       0.99999999999999709182,
+      57.156235665862923517,
+     -59.597960355475491248,
+      14.136097974741747174,
+      -0.49191381609762019978,
+        .33994649984811888699e-4,
+        .46523628927048575665e-4,
+       -.98374475304879564677e-4,
+        .15808870322491248884e-3,
+       -.21026444172410488319e-3,
+        .21743961811521264320e-3,
+       -.16431810653676389022e-3,
+        .84418223983852743293e-4,
+       -.26190838401581408670e-4,
+        .36899182659531622704e-5)
+
+__factorial =(
+    1.,
+    1.,
+    2.,
+    6.,
+    24.,
+    120.,
+    720.,
+    5040.,
+    40320.,
+    362880.,
+    3628800.,
+    39916800.,
+    479001600.,
+    6227020800.,
+    87178291200.,
+    1307674368000.,
+    20922789888000.,
+    355687428096000.,
+    6402373705728000.,
+    121645100408832000.,
+    2432902008176640000.,
+    51090942171709440000.,
+    1124000727777607680000.,
+    25852016738884976640000.,
+    620448401733239439360000.,
+    15511210043330985984000000.,
+    403291461126605635584000000.,
+    10888869450418352160768000000.,
+    304888344611713860501504000000.,
+    8841761993739701954543616000000.,
+    265252859812191058636308480000000.,
+    8222838654177922817725562880000000.,
+    263130836933693530167218012160000000. )
+    
+def gamma(z) :
+    """The gamma function. Returns exact results for small integers. Will
+    overflow for modest sized arguments. Use lngamma(z) instead.
+    
+    See: Eric W. Weisstein. "Gamma Function." From MathWorld, A Wolfram Web Resource.
+         http://mathworld.wolfram.com/GammaFunction.html
+
+    """
+    
+    n = floor(z)   
+    if  n == z  :
+        if z <= 0 :
+            return 1.0/0.0 # Infinity
+        elif n <= len(__factorial) :
+            return __factorial[int(n)-1]
+        
+    zz = z
+    if z < 0.5 :
+        zz = 1-z
+    
+        
+    g = __lanczos_gamma
+    c = __lanczos_coefficients 
+    
+    zz = zz - 1.    
+    zh = zz + 0.5
+    zgh = zh + g
+    zp = zgh** (zh*0.5) # trick for avoiding FP overflow above z=141
+
+    ss = 0.0
+    for k in range(len(c)-1,0,-1):
+        ss += c[k]/(zz+k)
+    
+    f = (sqrt_2pi*(c[0]+ss)) * (( zp*exp(-zgh)) *zp)
+
+    if z<0.5 :
+        f  = pi /( sin(pi*z) *f)
+
+    return f
+
+   
+def lngamma(z) :
+    """The logarithm of the gamma function. 
+    """
+    
+    # common case optimization
+    
+    n = floor(z)   
+    if  n == z  :
+        if z <= 0 :
+            return 1.0/0.0 # Infinity
+        elif n <= len(__factorial) :
+            return __factorial[int(n)-1]
+        
+    zz = z
+    if z < 0.5 :
+        zz = 1-z
+    
+        
+    g = __lanczos_gamma
+    c = __lanczos_coefficients 
+    
+    zz = zz - 1.    
+    zh = zz + 0.5
+    zgh = zh + g
+    zp = zgh** (zh*0.5) # trick for avoiding FP overflow above z=141
+
+    ss = 0.0
+    for k in range(len(c)-1,0,-1):
+        ss += c[k]/(zz+k)
+    
+    f = (sqrt_2pi*(c[0]+ss)) * (( zp*exp(-zgh)) *zp)
+
+    if z<0.5 :
+        f  = pi /( sin(pi*z) *f)
+
+    return log(f)
+          
+                    
+def factorial(z) :
+    """ The factorial function. 
+    factorial(z) == gamma(z+1)
+    """
+    return gamma(z+1)
+              
+          
+def digamma(z) :
+    """The digamma function, the logarithmic derivative of the gamma function.
+            digamma(z) = d/dz ln( gamma(z) )
+    
+    See: Eric W. Weisstein. "Digamma Function." From MathWorld--
+    A Wolfram Web Resource. http://mathworld.wolfram.com/DigammaFunction.html
+    """
+
+    g = __lanczos_gamma
+    c = __lanczos_coefficients 
+        
+    zz = z
+    if z < 0.5 :
+        zz = 1 -z
+        
+    n=0.
+    d=0.
+    for k in range(len(c)-1,0,-1):
+        dz =1./(zz+(k+1)-2);
+        dd =c[k] * dz
+        d = d + dd 
+        n = n - dd * dz
+
+    d = d + c[0]
+    gg = zz + g - 0.5
+    f = log(gg) + (n/d - g/gg)
+
+    if z<0.5 :
+        f -= pi / tan( pi * z)
+        
+    return f
+
+
+def trigamma(z) :
+    """The trigamma function, the derivative of the digamma function.
+            trigamma(z) = d/dz digamma(z) = d/dz d/dz ln( gamma(z) )
+    
+    See: Eric W. Weisstein. "Digamma Function." From MathWorld--
+    A Wolfram Web Resource. http://mathworld.wolfram.com/TrigammaFunction.html
+    """
+
+    g = __lanczos_gamma
+    c = __lanczos_coefficients 
+        
+    t1=0.
+    t2=0.
+    t3=0.
+    for k in range(len(c)-1,0,-1):
+        dz =1./(z+k);       
+        dd1 = c[k]* dz
+        t1 += dd1
+        dd2 = dd1 * dz
+        t2 += dd2
+        t3 += dd2 * dz
+
+    t1 += c[0]
+    c =  - (t2*t2)/(t1*t1)  +2*t3/t1
+
+    result = 1./(z*z)
+    gg = z + g + 0.5
+    result += - (z+0.5)/ (gg*gg)
+    result += 2./gg
+
+    result += c
+
+    return result
+
+def incomplete_gamma(a,x) :
+    """The 'upper' incomplete gamma function:
+
+                            oo
+                             -
+                            |    -t  a-1
+    incomplete_gamma(a,x) = |   e   t   dt.
+                            |
+                           -                         
+                            x
+
+    In Mathematica, Gamma[a,x].
+
+    Note that, very confusingly, the phrase 'incomplete gamma fucntion'
+    can also refer to the same integral between 0 and x, (the 'lower'
+    incomplete gamma function) or to the normalized versions,
+    normalized_incomplete_gamma() )
+
+
+    See: Eric W. Weisstein. "Gamma Function." From MathWorld, A Wolfram Web Resource.
+         http://mathworld.wolfram.com/IncompleteGammaFunction.html
+
+    Bugs :
+        This implentation is not very accurate for some arguments. 
+    """   
+    return  normalized_incomplete_gamma(a,x) * gamma(a)
+
+    
+def normalized_incomplete_gamma(a,x) :
+    """The upper, incomplete gamma function normalized so that the limiting
+    values are zero and one.
+    
+     Q(a,x) = incomplete_gamma(a,x) / gamma(a) 
+
+    See: 
+        incomplete_gamma()
+    Bugs :
+        This implentation is not very accurate for some arguments. 
+    """
+    maxiter = 100
+    epsilon = 1.48e-8
+    small = 1e-30
+    
+        
+    if a<=0 or x<0 : 
+        raise ValueError("Invalid arguments")
+    if x == 0.0 : return 1.0
+    
+    if x<= a+1 :
+        # Use the series representation
+        term = 1./a
+        total = term
+        for n in range(1,maxiter) :
+            term *= x/(a+n)
+            total += term
+            if abs(term/total) < epsilon : 
+                return 1. - total * exp(-x+a*log(x) - lngamma(a) )
+        raise RuntimeError(
+            "Failed to converge after %d iterations." % (maxiter) )
+    else :
+        # Use the continued fraction representation
+        total = 1.0
+        b = x + 1. -a
+        c = 1./small
+        d = 1./b
+        h = d
+        for i in range(1, maxiter) :
+            an = -i * (i-a)
+            b = b+2.
+            d = an * d + b
+            if abs(d) < small : d = small
+            c = b + an /c
+            if abs(c) < small : c= small
+            d = 1./d
+            term = d * c
+            h = h * term
+            if abs( term-1.) < epsilon :
+                return h * exp(-x+a*log(x) - lngamma(a) )
+        raise RuntimeError(
+            "Failed to converge after %d iterations." % (maxiter) )
+
+
+ 
+def log2( x) :
+    """ Return the base 2 logarithm of x """
+    return log(x,2)
+
+
+def entropy( pvec, base= exp(1) ) :
+    """ The entropy S = -Sum_i p_i ln p_i
+        pvec is a frequency vector, not necessarily normalized. 
+    """
+    # TODO: Optimize
+    if len(pvec) ==0 : 
+        raise ValueError("Zero length vector")
+        
+        
+    total = 0.0
+    ent = 0.0
+    for p in pvec:
+        if p>0 : # 0 log(0) =0 
+            total += p
+            ent += - log(float(p)) *p
+        elif p<0:
+            raise ValueError("Negative probability")
+    
+    
+    ent = (ent/total) + log(total)
+    ent /= log(base)
+
+    return ent
+   
+    
+
+
+    
+def argmax( alist) :
+    """Return the index of the last occurance of the maximum value in the list."""
+    return max(izip(alist, count() ))[1]
+
+def argmin( alist) :
+    """Return the index of the first occurance of the minimum value in the list."""
+    return min(izip(alist, count() ))[1]
+
+     
+    
+           
+
+