comparison bigwig_outlier_bed_slow1.py @ 0:2fbbc1be6655 draft

planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
author fubar
date Mon, 01 Jul 2024 00:53:01 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2fbbc1be6655
1 import os
2 import pyBigWig
3
4
5 class findOut():
6
7 def __init__(self, bwname="test.bw", bedname="test.bed", sd_lo=3, sd_hi=3, bedwin=10):
8 self.bedwin = bedwin
9 self.bwf = pyBigWig.open(bwname, "r")
10 self.chrlist = self.bwf.chroms()
11 self.bedf = open(bedname, "w")
12 self.sd_lo = sd_lo
13 self.sd_hi = sd_hi
14 self.makeBed(self.chrlist)
15
16 def makeBed(self, chrlist):
17 bed = []
18 for chr in chrlist:
19 print(chr)
20 gm = self.bwf.stats(chr, 0, type="mean")[0]
21 gstd = self.bwf.stats(chr, 0, type="std")[0]
22 cutlow = gm - (self.sd_lo * gstd)
23 cuthi = gm + (self.sd_hi * gstd)
24 chr_len = self.chrlist[chr]
25 nb = chr_len // self.bedwin
26 means = self.bwf.stats(chr, 0, chr_len, type="mean", nBins=nb)
27 inlo = False
28 inhi = False
29 reg_start = None
30 reg_end = None
31 reg_means = []
32 print('got %d means, gm=%f, lo=%f, hi=%f' % (len(means), gm, cutlow, cuthi))
33 for i, m in enumerate(means):
34 bend = min(chr_len, (i + 1) * self.bedwin)
35 featname = "%s_%d" % (chr,i)
36 if m and (m < cutlow or m > cuthi):
37 if inlo:
38 if m < cutlow: # extend
39 reg_end = bend
40 reg_means.append(m)
41 else: # high so close
42 rm = sum(reg_means) / len(reg_means)
43 bed.append(
44 "%s\t%d\t%d\t%s\t%.3f\n"
45 % (chr, reg_start, reg_end, featname, rm)
46 )
47 inlo = False
48 reg_means = []
49 elif inhi:
50 if m > cuthi: # extend
51 reg_end = bend
52 reg_means.append(m)
53 else:
54 rm = sum(reg_means) / len(reg_means)
55 bed.append(
56 "%s\t%d\t%d\t%s\t%.3f\n"
57 % (chr, reg_start, reg_end, featname, rm)
58 )
59 inhi = False
60 reg_means = []
61 elif m < cutlow: # start new low region
62 inlo = True
63 reg_start = i * self.bedwin
64 reg_end = bend
65 reg_means = [m]
66 elif m > cuthi:
67 inhi = True
68 reg_start = i * self.bedwin
69 reg_end = bend
70 reg_means = [m]
71 else: # not out of range - write current extended bed region
72 if inhi or inlo:
73 inhi = False
74 inlo = False
75 rm = sum(reg_means) / len(reg_means)
76 bed.append(
77 "%s\t%d\t%d\t%s\t%.3f\n"
78 % (chr, reg_start, reg_end, featname, rm)
79 )
80 reg_means = []
81 reg_start = None
82 reg_end = None
83 self.bedf.write(''.join(bed))
84 self.bedf.close()
85
86
87 if __name__ == "__main__":
88 fo = findOut(bwname="test.bw", bedname="test.bed", sd_lo=2, sd_hi=2, bedwin=100)