Mercurial > repos > thondeboer > neat_genreads
comparison py/probability.py @ 0:6e75a84e9338 draft
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
author | thondeboer |
---|---|
date | Tue, 15 May 2018 02:39:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:6e75a84e9338 |
---|---|
1 import math | |
2 import random | |
3 import bisect | |
4 import copy | |
5 import numpy as np | |
6 | |
7 LOW_PROB_THRESH = 1e-12 | |
8 | |
9 def mean_ind_of_weighted_list(l): | |
10 myMid = sum(l)/2.0 | |
11 mySum = 0.0 | |
12 for i in xrange(len(l)): | |
13 mySum += l[i] | |
14 if mySum >= myMid: | |
15 return i | |
16 | |
17 class DiscreteDistribution: | |
18 def __init__(self, weights, values, degenerateVal=None, method='bisect'): | |
19 | |
20 # some sanity checking | |
21 if not len(weights) or not len(values): | |
22 print '\nError: weight or value vector given to DiscreteDistribution() are 0-length.\n' | |
23 asdf = intentional_crash[0] | |
24 exit(1) | |
25 | |
26 self.method = method | |
27 sumWeight = float(sum(weights)) | |
28 | |
29 # if probability of all input events is 0, consider it degenerate and always return the first value | |
30 if sumWeight < LOW_PROB_THRESH: | |
31 self.degenerate = values[0] | |
32 else: | |
33 self.weights = [n/sumWeight for n in weights] | |
34 self.values = copy.deepcopy(values) | |
35 if len(self.values) != len(self.weights): | |
36 print '\nError: length and weights and values vectors must be the same.\n' | |
37 exit(1) | |
38 self.degenerate = degenerateVal | |
39 # prune values with probability too low to be worth using [DOESN'T REALLY IMPROVE PERFORMANCE] | |
40 ####if self.degenerate != None: | |
41 #### for i in xrange(len(self.weights)-1,-1,-1): | |
42 #### if self.weights[i] < LOW_PROB_THRESH: | |
43 #### del self.weights[i] | |
44 #### del self.values[i] | |
45 #### if len(self.weights) == 0: | |
46 #### print '\nError: probability distribution has no usable values.\n' | |
47 #### exit(1) | |
48 | |
49 if self.method == 'alias': | |
50 K = len(self.weights) | |
51 q = np.zeros(K) | |
52 J = np.zeros(K, dtype=np.int) | |
53 smaller = [] | |
54 larger = [] | |
55 for kk, prob in enumerate(self.weights): | |
56 q[kk] = K*prob | |
57 if q[kk] < 1.0: | |
58 smaller.append(kk) | |
59 else: | |
60 larger.append(kk) | |
61 while len(smaller) > 0 and len(larger) > 0: | |
62 small = smaller.pop() | |
63 large = larger.pop() | |
64 J[small] = large | |
65 q[large] = (q[large] + q[small]) - 1.0 | |
66 if q[large] < 1.0: | |
67 smaller.append(large) | |
68 else: | |
69 larger.append(large) | |
70 | |
71 self.a1 = len(J)-1 | |
72 self.a2 = J.tolist() | |
73 self.a3 = q.tolist() | |
74 | |
75 elif self.method == 'bisect': | |
76 self.cumP = np.cumsum(self.weights).tolist()[:-1] | |
77 self.cumP.insert(0,0.) | |
78 | |
79 def __str__(self): | |
80 return str(self.weights)+' '+str(self.values)+' '+self.method | |
81 | |
82 def sample(self): | |
83 | |
84 if self.degenerate != None: | |
85 return self.degenerate | |
86 | |
87 else: | |
88 | |
89 if self.method == 'alias': | |
90 r1 = random.randint(0,self.a1) | |
91 r2 = random.random() | |
92 if r2 < self.a3[r1]: | |
93 return self.values[r1] | |
94 else: | |
95 return self.values[self.a2[r1]] | |
96 | |
97 elif self.method == 'bisect': | |
98 r = random.random() | |
99 return self.values[bisect.bisect(self.cumP,r)-1] | |
100 | |
101 | |
102 # takes k_range, lambda, [0,1,2,..], returns a DiscreteDistribution object with the corresponding to a poisson distribution | |
103 MIN_WEIGHT = 1e-12 | |
104 def poisson_list(k_range,l): | |
105 if l < MIN_WEIGHT: | |
106 return DiscreteDistribution([1],[0],degenerateVal=0) | |
107 logFactorial_list = [0.0] | |
108 for k in k_range[1:]: | |
109 logFactorial_list.append(np.log(float(k))+logFactorial_list[k-1]) | |
110 w_range = [np.exp(k*np.log(l) - l - logFactorial_list[k]) for k in k_range] | |
111 w_range = [n for n in w_range if n >= MIN_WEIGHT] | |
112 if len(w_range) <= 1: | |
113 return DiscreteDistribution([1],[0],degenerateVal=0) | |
114 return DiscreteDistribution(w_range,k_range[:len(w_range)]) | |
115 | |
116 # quantize a list of values into blocks | |
117 MIN_PROB = 1e-12 | |
118 QUANT_BLOCKS = 10 | |
119 def quantize_list(l): | |
120 suml = float(sum(l)) | |
121 ls = sorted([n for n in l if n >= MIN_PROB*suml]) | |
122 if len(ls) == 0: | |
123 return None | |
124 qi = [] | |
125 for i in xrange(QUANT_BLOCKS): | |
126 #qi.append(ls[int((i)*(len(ls)/float(QUANT_BLOCKS)))]) | |
127 qi.append(ls[0]+(i/float(QUANT_BLOCKS))*(ls[-1]-ls[0])) | |
128 qi.append(1e12) | |
129 runningList = [] | |
130 prevBi = None | |
131 previ = None | |
132 for i in xrange(len(l)): | |
133 if l[i] >= MIN_PROB*suml: | |
134 bi = bisect.bisect(qi,l[i]) | |
135 #print i, l[i], qi[bi-1] | |
136 if prevBi != None: | |
137 if bi == prevBi and previ == i-1: | |
138 runningList[-1][1] += 1 | |
139 else: | |
140 runningList.append([i,i,qi[bi-1]]) | |
141 else: | |
142 runningList.append([i,i,qi[bi-1]]) | |
143 prevBi = bi | |
144 previ = i | |
145 return runningList | |
146 |