comparison ngsutils/support/stats.py @ 0:4e4e4093d65d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ngsutils commit 09194687c74a424732f8b0c017cbb942aad89068
author iuc
date Wed, 11 Nov 2015 13:04:07 -0500
parents
children 7a68005de299
comparison
equal deleted inserted replaced
-1:000000000000 0:4e4e4093d65d
1 '''
2 various statistical tests and methods...
3 '''
4 import math
5 from ngsutils.support import memoize
6
7 def median(vals):
8 '''
9 >>> median([1,2,3])
10 2
11 >>> median([1,2,3,4])
12 2.5
13 '''
14 vals.sort()
15
16 if len(vals) % 2 == 1:
17 return vals[len(vals) / 2]
18 else:
19 a = vals[(len(vals) / 2) - 1]
20 b = vals[(len(vals) / 2)]
21 return float(a + b) / 2
22
23
24 def mean_stdev(l):
25 '''
26 >>> mean_stdev([1,2,2,2])
27 (1.75, 0.5)
28 >>> mean_stdev([2,2,2,2])
29 (2.0, 0.0)
30 '''
31
32 acc = 0
33 for el in l:
34 acc += el
35
36 mean = float(acc) / len(l)
37 acc = 0
38 for el in l:
39 acc += (el - mean) ** 2
40
41 if len(l) > 2:
42 stdev = math.sqrt(float(acc) / (len(l) - 1))
43 else:
44 stdev = 0.0
45
46 return (mean, stdev)
47
48
49 def counts_median(d):
50 '''
51 Calculate the median from counts stored in a dictionary
52 >>> counts_median({ 1: 4, 2: 1, 3: 4 })
53 2
54 >>> counts_median({ 1: 4, 3: 4 })
55 2
56
57 '''
58 count = 0
59 for k in d:
60 count += d[k]
61
62 if count == 0:
63 return 0
64
65 acc = 0.0
66 last = 0
67 for k in sorted(d):
68 if last:
69 return (last + k) / 2
70 acc += d[k]
71 if acc / count == 0.5:
72 last = k
73 elif acc / count > 0.5:
74 return k
75
76
77 def counts_mean_stdev(d):
78 '''
79
80 calc mean / stdev when data is stored as counts in a dictionary
81
82 Ex:
83 { 1: 4, 2: 1, 3: 4 } = [1, 1, 1, 1, 2, 3, 3, 3, 3]
84
85 >>> counts_mean_stdev({ 1: 4, 2: 1, 3: 4 })
86 (2.0, 1.0)
87
88 '''
89
90 acc = 0
91 count = 0
92 for k in d:
93 acc += k * d[k]
94 count += d[k]
95
96 mean = float(acc) / count
97
98 acc = 0
99 for k in d:
100 acc += (((k - mean) ** 2) * d[k])
101
102 if count > 2:
103 stdev = math.sqrt(float(acc) / (count - 1))
104 else:
105 stdev = 0.0
106
107 return (mean, stdev)
108
109 @memoize
110 def poisson_prob(x, mean):
111 '''
112 Return the probability that you could get x counts in
113 a Poisson test with a mean value.
114
115 prob(x) = sum(i=1..x){poisson(i)}
116
117 >>> poisson_prob(6,10)
118 0.1300960209527205
119 >>> poisson_prob(8,10)
120 0.33277427882095645
121 '''
122 acc = 0.0
123 for i in xrange(1, x+1):
124 acc += poisson_func(i, mean)
125 return acc
126
127 @memoize
128 def poisson_func(mu, lambd):
129 '''
130 This is the Poisson distribution function
131
132 p(mu) = (lambda^mu * e^(-lambda)) / (mu!)
133
134 mu is a count
135 lambd is the mean
136
137 >>> poisson_func(1,10)
138 0.00045399929762484856
139 >>> poisson_func(2,10)
140 0.0022699964881242427
141 >>> poisson_func(3,10)
142 0.007566654960414142
143 '''
144 return (lambd ** mu) * (math.exp(-1 * lambd)) / _factorial(mu)
145
146
147 @memoize
148 def _factorial(x):
149 '''
150 >>> _factorial(1)
151 1
152 >>> _factorial(2)
153 2
154 >>> _factorial(3)
155 6
156 '''
157 return math.factorial(x)
158
159 if __name__ == '__main__':
160 import doctest
161 doctest.testmod()