comparison py/probability.py @ 0:6e75a84e9338 draft

planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
author thondeboer
date Tue, 15 May 2018 02:39:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:6e75a84e9338
1 import math
2 import random
3 import bisect
4 import copy
5 import numpy as np
6
7 LOW_PROB_THRESH = 1e-12
8
9 def mean_ind_of_weighted_list(l):
10 myMid = sum(l)/2.0
11 mySum = 0.0
12 for i in xrange(len(l)):
13 mySum += l[i]
14 if mySum >= myMid:
15 return i
16
17 class DiscreteDistribution:
18 def __init__(self, weights, values, degenerateVal=None, method='bisect'):
19
20 # some sanity checking
21 if not len(weights) or not len(values):
22 print '\nError: weight or value vector given to DiscreteDistribution() are 0-length.\n'
23 asdf = intentional_crash[0]
24 exit(1)
25
26 self.method = method
27 sumWeight = float(sum(weights))
28
29 # if probability of all input events is 0, consider it degenerate and always return the first value
30 if sumWeight < LOW_PROB_THRESH:
31 self.degenerate = values[0]
32 else:
33 self.weights = [n/sumWeight for n in weights]
34 self.values = copy.deepcopy(values)
35 if len(self.values) != len(self.weights):
36 print '\nError: length and weights and values vectors must be the same.\n'
37 exit(1)
38 self.degenerate = degenerateVal
39 # prune values with probability too low to be worth using [DOESN'T REALLY IMPROVE PERFORMANCE]
40 ####if self.degenerate != None:
41 #### for i in xrange(len(self.weights)-1,-1,-1):
42 #### if self.weights[i] < LOW_PROB_THRESH:
43 #### del self.weights[i]
44 #### del self.values[i]
45 #### if len(self.weights) == 0:
46 #### print '\nError: probability distribution has no usable values.\n'
47 #### exit(1)
48
49 if self.method == 'alias':
50 K = len(self.weights)
51 q = np.zeros(K)
52 J = np.zeros(K, dtype=np.int)
53 smaller = []
54 larger = []
55 for kk, prob in enumerate(self.weights):
56 q[kk] = K*prob
57 if q[kk] < 1.0:
58 smaller.append(kk)
59 else:
60 larger.append(kk)
61 while len(smaller) > 0 and len(larger) > 0:
62 small = smaller.pop()
63 large = larger.pop()
64 J[small] = large
65 q[large] = (q[large] + q[small]) - 1.0
66 if q[large] < 1.0:
67 smaller.append(large)
68 else:
69 larger.append(large)
70
71 self.a1 = len(J)-1
72 self.a2 = J.tolist()
73 self.a3 = q.tolist()
74
75 elif self.method == 'bisect':
76 self.cumP = np.cumsum(self.weights).tolist()[:-1]
77 self.cumP.insert(0,0.)
78
79 def __str__(self):
80 return str(self.weights)+' '+str(self.values)+' '+self.method
81
82 def sample(self):
83
84 if self.degenerate != None:
85 return self.degenerate
86
87 else:
88
89 if self.method == 'alias':
90 r1 = random.randint(0,self.a1)
91 r2 = random.random()
92 if r2 < self.a3[r1]:
93 return self.values[r1]
94 else:
95 return self.values[self.a2[r1]]
96
97 elif self.method == 'bisect':
98 r = random.random()
99 return self.values[bisect.bisect(self.cumP,r)-1]
100
101
102 # takes k_range, lambda, [0,1,2,..], returns a DiscreteDistribution object with the corresponding to a poisson distribution
103 MIN_WEIGHT = 1e-12
104 def poisson_list(k_range,l):
105 if l < MIN_WEIGHT:
106 return DiscreteDistribution([1],[0],degenerateVal=0)
107 logFactorial_list = [0.0]
108 for k in k_range[1:]:
109 logFactorial_list.append(np.log(float(k))+logFactorial_list[k-1])
110 w_range = [np.exp(k*np.log(l) - l - logFactorial_list[k]) for k in k_range]
111 w_range = [n for n in w_range if n >= MIN_WEIGHT]
112 if len(w_range) <= 1:
113 return DiscreteDistribution([1],[0],degenerateVal=0)
114 return DiscreteDistribution(w_range,k_range[:len(w_range)])
115
116 # quantize a list of values into blocks
117 MIN_PROB = 1e-12
118 QUANT_BLOCKS = 10
119 def quantize_list(l):
120 suml = float(sum(l))
121 ls = sorted([n for n in l if n >= MIN_PROB*suml])
122 if len(ls) == 0:
123 return None
124 qi = []
125 for i in xrange(QUANT_BLOCKS):
126 #qi.append(ls[int((i)*(len(ls)/float(QUANT_BLOCKS)))])
127 qi.append(ls[0]+(i/float(QUANT_BLOCKS))*(ls[-1]-ls[0]))
128 qi.append(1e12)
129 runningList = []
130 prevBi = None
131 previ = None
132 for i in xrange(len(l)):
133 if l[i] >= MIN_PROB*suml:
134 bi = bisect.bisect(qi,l[i])
135 #print i, l[i], qi[bi-1]
136 if prevBi != None:
137 if bi == prevBi and previ == i-1:
138 runningList[-1][1] += 1
139 else:
140 runningList.append([i,i,qi[bi-1]])
141 else:
142 runningList.append([i,i,qi[bi-1]])
143 prevBi = bi
144 previ = i
145 return runningList
146