Mercurial > repos > iuc > ngsutils_bam_filter
comparison ngsutils/support/stats.py @ 0:4e4e4093d65d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ngsutils commit 09194687c74a424732f8b0c017cbb942aad89068
author | iuc |
---|---|
date | Wed, 11 Nov 2015 13:04:07 -0500 |
parents | |
children | 7a68005de299 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4e4e4093d65d |
---|---|
1 ''' | |
2 various statistical tests and methods... | |
3 ''' | |
4 import math | |
5 from ngsutils.support import memoize | |
6 | |
7 def median(vals): | |
8 ''' | |
9 >>> median([1,2,3]) | |
10 2 | |
11 >>> median([1,2,3,4]) | |
12 2.5 | |
13 ''' | |
14 vals.sort() | |
15 | |
16 if len(vals) % 2 == 1: | |
17 return vals[len(vals) / 2] | |
18 else: | |
19 a = vals[(len(vals) / 2) - 1] | |
20 b = vals[(len(vals) / 2)] | |
21 return float(a + b) / 2 | |
22 | |
23 | |
24 def mean_stdev(l): | |
25 ''' | |
26 >>> mean_stdev([1,2,2,2]) | |
27 (1.75, 0.5) | |
28 >>> mean_stdev([2,2,2,2]) | |
29 (2.0, 0.0) | |
30 ''' | |
31 | |
32 acc = 0 | |
33 for el in l: | |
34 acc += el | |
35 | |
36 mean = float(acc) / len(l) | |
37 acc = 0 | |
38 for el in l: | |
39 acc += (el - mean) ** 2 | |
40 | |
41 if len(l) > 2: | |
42 stdev = math.sqrt(float(acc) / (len(l) - 1)) | |
43 else: | |
44 stdev = 0.0 | |
45 | |
46 return (mean, stdev) | |
47 | |
48 | |
49 def counts_median(d): | |
50 ''' | |
51 Calculate the median from counts stored in a dictionary | |
52 >>> counts_median({ 1: 4, 2: 1, 3: 4 }) | |
53 2 | |
54 >>> counts_median({ 1: 4, 3: 4 }) | |
55 2 | |
56 | |
57 ''' | |
58 count = 0 | |
59 for k in d: | |
60 count += d[k] | |
61 | |
62 if count == 0: | |
63 return 0 | |
64 | |
65 acc = 0.0 | |
66 last = 0 | |
67 for k in sorted(d): | |
68 if last: | |
69 return (last + k) / 2 | |
70 acc += d[k] | |
71 if acc / count == 0.5: | |
72 last = k | |
73 elif acc / count > 0.5: | |
74 return k | |
75 | |
76 | |
77 def counts_mean_stdev(d): | |
78 ''' | |
79 | |
80 calc mean / stdev when data is stored as counts in a dictionary | |
81 | |
82 Ex: | |
83 { 1: 4, 2: 1, 3: 4 } = [1, 1, 1, 1, 2, 3, 3, 3, 3] | |
84 | |
85 >>> counts_mean_stdev({ 1: 4, 2: 1, 3: 4 }) | |
86 (2.0, 1.0) | |
87 | |
88 ''' | |
89 | |
90 acc = 0 | |
91 count = 0 | |
92 for k in d: | |
93 acc += k * d[k] | |
94 count += d[k] | |
95 | |
96 mean = float(acc) / count | |
97 | |
98 acc = 0 | |
99 for k in d: | |
100 acc += (((k - mean) ** 2) * d[k]) | |
101 | |
102 if count > 2: | |
103 stdev = math.sqrt(float(acc) / (count - 1)) | |
104 else: | |
105 stdev = 0.0 | |
106 | |
107 return (mean, stdev) | |
108 | |
109 @memoize | |
110 def poisson_prob(x, mean): | |
111 ''' | |
112 Return the probability that you could get x counts in | |
113 a Poisson test with a mean value. | |
114 | |
115 prob(x) = sum(i=1..x){poisson(i)} | |
116 | |
117 >>> poisson_prob(6,10) | |
118 0.1300960209527205 | |
119 >>> poisson_prob(8,10) | |
120 0.33277427882095645 | |
121 ''' | |
122 acc = 0.0 | |
123 for i in xrange(1, x+1): | |
124 acc += poisson_func(i, mean) | |
125 return acc | |
126 | |
127 @memoize | |
128 def poisson_func(mu, lambd): | |
129 ''' | |
130 This is the Poisson distribution function | |
131 | |
132 p(mu) = (lambda^mu * e^(-lambda)) / (mu!) | |
133 | |
134 mu is a count | |
135 lambd is the mean | |
136 | |
137 >>> poisson_func(1,10) | |
138 0.00045399929762484856 | |
139 >>> poisson_func(2,10) | |
140 0.0022699964881242427 | |
141 >>> poisson_func(3,10) | |
142 0.007566654960414142 | |
143 ''' | |
144 return (lambd ** mu) * (math.exp(-1 * lambd)) / _factorial(mu) | |
145 | |
146 | |
147 @memoize | |
148 def _factorial(x): | |
149 ''' | |
150 >>> _factorial(1) | |
151 1 | |
152 >>> _factorial(2) | |
153 2 | |
154 >>> _factorial(3) | |
155 6 | |
156 ''' | |
157 return math.factorial(x) | |
158 | |
159 if __name__ == '__main__': | |
160 import doctest | |
161 doctest.testmod() |