diff ngsutils/support/stats.py @ 0:4e4e4093d65d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ngsutils commit 09194687c74a424732f8b0c017cbb942aad89068
author iuc
date Wed, 11 Nov 2015 13:04:07 -0500
parents
children 7a68005de299
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsutils/support/stats.py	Wed Nov 11 13:04:07 2015 -0500
@@ -0,0 +1,161 @@
+'''
+various statistical tests and methods...
+'''
+import math
+from ngsutils.support import memoize
+
+def median(vals):
+    '''
+    >>> median([1,2,3])
+    2
+    >>> median([1,2,3,4])
+    2.5
+    '''
+    vals.sort()
+
+    if len(vals) % 2 == 1:
+        return vals[len(vals) / 2]
+    else:
+        a = vals[(len(vals) / 2) - 1]
+        b = vals[(len(vals) / 2)]
+        return float(a + b) / 2
+
+
+def mean_stdev(l):
+    '''
+    >>> mean_stdev([1,2,2,2])
+    (1.75, 0.5)
+    >>> mean_stdev([2,2,2,2])
+    (2.0, 0.0)
+    '''
+
+    acc = 0
+    for el in l:
+        acc += el
+
+    mean = float(acc) / len(l)
+    acc = 0
+    for el in l:
+        acc += (el - mean) ** 2
+
+    if len(l) > 2:
+        stdev = math.sqrt(float(acc) / (len(l) - 1))
+    else:
+        stdev = 0.0
+
+    return (mean, stdev)
+
+
+def counts_median(d):
+    '''
+    Calculate the median from counts stored in a dictionary
+    >>> counts_median({ 1: 4, 2: 1, 3: 4 })
+    2
+    >>> counts_median({ 1: 4, 3: 4 })
+    2
+
+    '''
+    count = 0
+    for k in d:
+        count += d[k]
+
+    if count == 0:
+        return 0
+
+    acc = 0.0
+    last = 0
+    for k in sorted(d):
+        if last:
+            return (last + k) / 2
+        acc += d[k]
+        if acc / count == 0.5:
+            last = k
+        elif acc / count > 0.5:
+            return k
+
+
+def counts_mean_stdev(d):
+    '''
+
+    calc mean / stdev when data is stored as counts in a dictionary
+
+    Ex:
+        { 1: 4, 2: 1, 3: 4 } = [1, 1, 1, 1, 2, 3, 3, 3, 3]
+
+    >>> counts_mean_stdev({ 1: 4, 2: 1, 3: 4 })
+    (2.0, 1.0)
+
+    '''
+
+    acc = 0
+    count = 0
+    for k in d:
+        acc += k * d[k]
+        count += d[k]
+
+    mean = float(acc) / count
+
+    acc = 0
+    for k in d:
+        acc += (((k - mean) ** 2) * d[k])
+
+    if count > 2:
+        stdev = math.sqrt(float(acc) / (count - 1))
+    else:
+        stdev = 0.0
+
+    return (mean, stdev)
+
+@memoize
+def poisson_prob(x, mean):
+    '''
+        Return the probability that you could get x counts in
+        a Poisson test with a mean value.
+
+        prob(x) = sum(i=1..x){poisson(i)}
+
+        >>> poisson_prob(6,10)
+        0.1300960209527205
+        >>> poisson_prob(8,10)
+        0.33277427882095645
+    '''
+    acc = 0.0
+    for i in xrange(1, x+1):
+        acc += poisson_func(i, mean)
+    return acc
+
+@memoize
+def poisson_func(mu, lambd):
+    '''
+        This is the Poisson distribution function
+        
+        p(mu) = (lambda^mu * e^(-lambda)) / (mu!)
+
+        mu is a count
+        lambd is the mean
+
+        >>> poisson_func(1,10)
+        0.00045399929762484856
+        >>> poisson_func(2,10)
+        0.0022699964881242427
+        >>> poisson_func(3,10)
+        0.007566654960414142
+    '''
+    return (lambd ** mu) * (math.exp(-1 * lambd)) / _factorial(mu)
+
+
+@memoize
+def _factorial(x):
+    '''
+    >>> _factorial(1)
+    1
+    >>> _factorial(2)
+    2
+    >>> _factorial(3)
+    6
+    '''
+    return math.factorial(x)
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()