Mercurial > repos > thondeboer > neat_genreads
diff py/probability.py @ 0:6e75a84e9338 draft
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
author | thondeboer |
---|---|
date | Tue, 15 May 2018 02:39:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/py/probability.py Tue May 15 02:39:53 2018 -0400 @@ -0,0 +1,146 @@ +import math +import random +import bisect +import copy +import numpy as np + +LOW_PROB_THRESH = 1e-12 + +def mean_ind_of_weighted_list(l): + myMid = sum(l)/2.0 + mySum = 0.0 + for i in xrange(len(l)): + mySum += l[i] + if mySum >= myMid: + return i + +class DiscreteDistribution: + def __init__(self, weights, values, degenerateVal=None, method='bisect'): + + # some sanity checking + if not len(weights) or not len(values): + print '\nError: weight or value vector given to DiscreteDistribution() are 0-length.\n' + asdf = intentional_crash[0] + exit(1) + + self.method = method + sumWeight = float(sum(weights)) + + # if probability of all input events is 0, consider it degenerate and always return the first value + if sumWeight < LOW_PROB_THRESH: + self.degenerate = values[0] + else: + self.weights = [n/sumWeight for n in weights] + self.values = copy.deepcopy(values) + if len(self.values) != len(self.weights): + print '\nError: length and weights and values vectors must be the same.\n' + exit(1) + self.degenerate = degenerateVal + # prune values with probability too low to be worth using [DOESN'T REALLY IMPROVE PERFORMANCE] + ####if self.degenerate != None: + #### for i in xrange(len(self.weights)-1,-1,-1): + #### if self.weights[i] < LOW_PROB_THRESH: + #### del self.weights[i] + #### del self.values[i] + #### if len(self.weights) == 0: + #### print '\nError: probability distribution has no usable values.\n' + #### exit(1) + + if self.method == 'alias': + K = len(self.weights) + q = np.zeros(K) + J = np.zeros(K, dtype=np.int) + smaller = [] + larger = [] + for kk, prob in enumerate(self.weights): + q[kk] = K*prob + if q[kk] < 1.0: + smaller.append(kk) + else: + larger.append(kk) + while len(smaller) > 0 and len(larger) > 0: + small = smaller.pop() + large = larger.pop() + J[small] = large + q[large] = (q[large] + q[small]) - 1.0 + if q[large] < 1.0: + smaller.append(large) + else: + larger.append(large) + + self.a1 = len(J)-1 + self.a2 = J.tolist() + self.a3 = q.tolist() + + elif self.method == 'bisect': + self.cumP = np.cumsum(self.weights).tolist()[:-1] + self.cumP.insert(0,0.) + + def __str__(self): + return str(self.weights)+' '+str(self.values)+' '+self.method + + def sample(self): + + if self.degenerate != None: + return self.degenerate + + else: + + if self.method == 'alias': + r1 = random.randint(0,self.a1) + r2 = random.random() + if r2 < self.a3[r1]: + return self.values[r1] + else: + return self.values[self.a2[r1]] + + elif self.method == 'bisect': + r = random.random() + return self.values[bisect.bisect(self.cumP,r)-1] + + +# takes k_range, lambda, [0,1,2,..], returns a DiscreteDistribution object with the corresponding to a poisson distribution +MIN_WEIGHT = 1e-12 +def poisson_list(k_range,l): + if l < MIN_WEIGHT: + return DiscreteDistribution([1],[0],degenerateVal=0) + logFactorial_list = [0.0] + for k in k_range[1:]: + logFactorial_list.append(np.log(float(k))+logFactorial_list[k-1]) + w_range = [np.exp(k*np.log(l) - l - logFactorial_list[k]) for k in k_range] + w_range = [n for n in w_range if n >= MIN_WEIGHT] + if len(w_range) <= 1: + return DiscreteDistribution([1],[0],degenerateVal=0) + return DiscreteDistribution(w_range,k_range[:len(w_range)]) + +# quantize a list of values into blocks +MIN_PROB = 1e-12 +QUANT_BLOCKS = 10 +def quantize_list(l): + suml = float(sum(l)) + ls = sorted([n for n in l if n >= MIN_PROB*suml]) + if len(ls) == 0: + return None + qi = [] + for i in xrange(QUANT_BLOCKS): + #qi.append(ls[int((i)*(len(ls)/float(QUANT_BLOCKS)))]) + qi.append(ls[0]+(i/float(QUANT_BLOCKS))*(ls[-1]-ls[0])) + qi.append(1e12) + runningList = [] + prevBi = None + previ = None + for i in xrange(len(l)): + if l[i] >= MIN_PROB*suml: + bi = bisect.bisect(qi,l[i]) + #print i, l[i], qi[bi-1] + if prevBi != None: + if bi == prevBi and previ == i-1: + runningList[-1][1] += 1 + else: + runningList.append([i,i,qi[bi-1]]) + else: + runningList.append([i,i,qi[bi-1]]) + prevBi = bi + previ = i + return runningList +