Mercurial > repos > thondeboer > neat_genreads
comparison py/probability.py @ 0:6e75a84e9338 draft
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
| author | thondeboer |
|---|---|
| date | Tue, 15 May 2018 02:39:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:6e75a84e9338 |
|---|---|
| 1 import math | |
| 2 import random | |
| 3 import bisect | |
| 4 import copy | |
| 5 import numpy as np | |
| 6 | |
| 7 LOW_PROB_THRESH = 1e-12 | |
| 8 | |
| 9 def mean_ind_of_weighted_list(l): | |
| 10 myMid = sum(l)/2.0 | |
| 11 mySum = 0.0 | |
| 12 for i in xrange(len(l)): | |
| 13 mySum += l[i] | |
| 14 if mySum >= myMid: | |
| 15 return i | |
| 16 | |
| 17 class DiscreteDistribution: | |
| 18 def __init__(self, weights, values, degenerateVal=None, method='bisect'): | |
| 19 | |
| 20 # some sanity checking | |
| 21 if not len(weights) or not len(values): | |
| 22 print '\nError: weight or value vector given to DiscreteDistribution() are 0-length.\n' | |
| 23 asdf = intentional_crash[0] | |
| 24 exit(1) | |
| 25 | |
| 26 self.method = method | |
| 27 sumWeight = float(sum(weights)) | |
| 28 | |
| 29 # if probability of all input events is 0, consider it degenerate and always return the first value | |
| 30 if sumWeight < LOW_PROB_THRESH: | |
| 31 self.degenerate = values[0] | |
| 32 else: | |
| 33 self.weights = [n/sumWeight for n in weights] | |
| 34 self.values = copy.deepcopy(values) | |
| 35 if len(self.values) != len(self.weights): | |
| 36 print '\nError: length and weights and values vectors must be the same.\n' | |
| 37 exit(1) | |
| 38 self.degenerate = degenerateVal | |
| 39 # prune values with probability too low to be worth using [DOESN'T REALLY IMPROVE PERFORMANCE] | |
| 40 ####if self.degenerate != None: | |
| 41 #### for i in xrange(len(self.weights)-1,-1,-1): | |
| 42 #### if self.weights[i] < LOW_PROB_THRESH: | |
| 43 #### del self.weights[i] | |
| 44 #### del self.values[i] | |
| 45 #### if len(self.weights) == 0: | |
| 46 #### print '\nError: probability distribution has no usable values.\n' | |
| 47 #### exit(1) | |
| 48 | |
| 49 if self.method == 'alias': | |
| 50 K = len(self.weights) | |
| 51 q = np.zeros(K) | |
| 52 J = np.zeros(K, dtype=np.int) | |
| 53 smaller = [] | |
| 54 larger = [] | |
| 55 for kk, prob in enumerate(self.weights): | |
| 56 q[kk] = K*prob | |
| 57 if q[kk] < 1.0: | |
| 58 smaller.append(kk) | |
| 59 else: | |
| 60 larger.append(kk) | |
| 61 while len(smaller) > 0 and len(larger) > 0: | |
| 62 small = smaller.pop() | |
| 63 large = larger.pop() | |
| 64 J[small] = large | |
| 65 q[large] = (q[large] + q[small]) - 1.0 | |
| 66 if q[large] < 1.0: | |
| 67 smaller.append(large) | |
| 68 else: | |
| 69 larger.append(large) | |
| 70 | |
| 71 self.a1 = len(J)-1 | |
| 72 self.a2 = J.tolist() | |
| 73 self.a3 = q.tolist() | |
| 74 | |
| 75 elif self.method == 'bisect': | |
| 76 self.cumP = np.cumsum(self.weights).tolist()[:-1] | |
| 77 self.cumP.insert(0,0.) | |
| 78 | |
| 79 def __str__(self): | |
| 80 return str(self.weights)+' '+str(self.values)+' '+self.method | |
| 81 | |
| 82 def sample(self): | |
| 83 | |
| 84 if self.degenerate != None: | |
| 85 return self.degenerate | |
| 86 | |
| 87 else: | |
| 88 | |
| 89 if self.method == 'alias': | |
| 90 r1 = random.randint(0,self.a1) | |
| 91 r2 = random.random() | |
| 92 if r2 < self.a3[r1]: | |
| 93 return self.values[r1] | |
| 94 else: | |
| 95 return self.values[self.a2[r1]] | |
| 96 | |
| 97 elif self.method == 'bisect': | |
| 98 r = random.random() | |
| 99 return self.values[bisect.bisect(self.cumP,r)-1] | |
| 100 | |
| 101 | |
| 102 # takes k_range, lambda, [0,1,2,..], returns a DiscreteDistribution object with the corresponding to a poisson distribution | |
| 103 MIN_WEIGHT = 1e-12 | |
| 104 def poisson_list(k_range,l): | |
| 105 if l < MIN_WEIGHT: | |
| 106 return DiscreteDistribution([1],[0],degenerateVal=0) | |
| 107 logFactorial_list = [0.0] | |
| 108 for k in k_range[1:]: | |
| 109 logFactorial_list.append(np.log(float(k))+logFactorial_list[k-1]) | |
| 110 w_range = [np.exp(k*np.log(l) - l - logFactorial_list[k]) for k in k_range] | |
| 111 w_range = [n for n in w_range if n >= MIN_WEIGHT] | |
| 112 if len(w_range) <= 1: | |
| 113 return DiscreteDistribution([1],[0],degenerateVal=0) | |
| 114 return DiscreteDistribution(w_range,k_range[:len(w_range)]) | |
| 115 | |
| 116 # quantize a list of values into blocks | |
| 117 MIN_PROB = 1e-12 | |
| 118 QUANT_BLOCKS = 10 | |
| 119 def quantize_list(l): | |
| 120 suml = float(sum(l)) | |
| 121 ls = sorted([n for n in l if n >= MIN_PROB*suml]) | |
| 122 if len(ls) == 0: | |
| 123 return None | |
| 124 qi = [] | |
| 125 for i in xrange(QUANT_BLOCKS): | |
| 126 #qi.append(ls[int((i)*(len(ls)/float(QUANT_BLOCKS)))]) | |
| 127 qi.append(ls[0]+(i/float(QUANT_BLOCKS))*(ls[-1]-ls[0])) | |
| 128 qi.append(1e12) | |
| 129 runningList = [] | |
| 130 prevBi = None | |
| 131 previ = None | |
| 132 for i in xrange(len(l)): | |
| 133 if l[i] >= MIN_PROB*suml: | |
| 134 bi = bisect.bisect(qi,l[i]) | |
| 135 #print i, l[i], qi[bi-1] | |
| 136 if prevBi != None: | |
| 137 if bi == prevBi and previ == i-1: | |
| 138 runningList[-1][1] += 1 | |
| 139 else: | |
| 140 runningList.append([i,i,qi[bi-1]]) | |
| 141 else: | |
| 142 runningList.append([i,i,qi[bi-1]]) | |
| 143 prevBi = bi | |
| 144 previ = i | |
| 145 return runningList | |
| 146 |
