Mercurial > repos > fubar > bigwig_outlier_bed
diff bigwig_outlier_bed_slow1.py @ 0:c71db540eb38 draft
planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
author | fubar |
---|---|
date | Mon, 01 Jul 2024 02:48:46 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bigwig_outlier_bed_slow1.py Mon Jul 01 02:48:46 2024 +0000 @@ -0,0 +1,88 @@ +import os +import pyBigWig + + +class findOut(): + + def __init__(self, bwname="test.bw", bedname="test.bed", sd_lo=3, sd_hi=3, bedwin=10): + self.bedwin = bedwin + self.bwf = pyBigWig.open(bwname, "r") + self.chrlist = self.bwf.chroms() + self.bedf = open(bedname, "w") + self.sd_lo = sd_lo + self.sd_hi = sd_hi + self.makeBed(self.chrlist) + + def makeBed(self, chrlist): + bed = [] + for chr in chrlist: + print(chr) + gm = self.bwf.stats(chr, 0, type="mean")[0] + gstd = self.bwf.stats(chr, 0, type="std")[0] + cutlow = gm - (self.sd_lo * gstd) + cuthi = gm + (self.sd_hi * gstd) + chr_len = self.chrlist[chr] + nb = chr_len // self.bedwin + means = self.bwf.stats(chr, 0, chr_len, type="mean", nBins=nb) + inlo = False + inhi = False + reg_start = None + reg_end = None + reg_means = [] + print('got %d means, gm=%f, lo=%f, hi=%f' % (len(means), gm, cutlow, cuthi)) + for i, m in enumerate(means): + bend = min(chr_len, (i + 1) * self.bedwin) + featname = "%s_%d" % (chr,i) + if m and (m < cutlow or m > cuthi): + if inlo: + if m < cutlow: # extend + reg_end = bend + reg_means.append(m) + else: # high so close + rm = sum(reg_means) / len(reg_means) + bed.append( + "%s\t%d\t%d\t%s\t%.3f\n" + % (chr, reg_start, reg_end, featname, rm) + ) + inlo = False + reg_means = [] + elif inhi: + if m > cuthi: # extend + reg_end = bend + reg_means.append(m) + else: + rm = sum(reg_means) / len(reg_means) + bed.append( + "%s\t%d\t%d\t%s\t%.3f\n" + % (chr, reg_start, reg_end, featname, rm) + ) + inhi = False + reg_means = [] + elif m < cutlow: # start new low region + inlo = True + reg_start = i * self.bedwin + reg_end = bend + reg_means = [m] + elif m > cuthi: + inhi = True + reg_start = i * self.bedwin + reg_end = bend + reg_means = [m] + else: # not out of range - write current extended bed region + if inhi or inlo: + inhi = False + inlo = False + rm = sum(reg_means) / len(reg_means) + bed.append( + "%s\t%d\t%d\t%s\t%.3f\n" + % (chr, reg_start, reg_end, featname, rm) + ) + reg_means = [] + reg_start = None + reg_end = None + self.bedf.write(''.join(bed)) + self.bedf.close() + + +if __name__ == "__main__": + fo = findOut(bwname="test.bw", bedname="test.bed", sd_lo=2, sd_hi=2, bedwin=100)