Mercurial > repos > fubar > bigwigoutlierbed
comparison bigwig_outlier_bed_slow1.py @ 0:2fbbc1be6655 draft
planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
author | fubar |
---|---|
date | Mon, 01 Jul 2024 00:53:01 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2fbbc1be6655 |
---|---|
1 import os | |
2 import pyBigWig | |
3 | |
4 | |
5 class findOut(): | |
6 | |
7 def __init__(self, bwname="test.bw", bedname="test.bed", sd_lo=3, sd_hi=3, bedwin=10): | |
8 self.bedwin = bedwin | |
9 self.bwf = pyBigWig.open(bwname, "r") | |
10 self.chrlist = self.bwf.chroms() | |
11 self.bedf = open(bedname, "w") | |
12 self.sd_lo = sd_lo | |
13 self.sd_hi = sd_hi | |
14 self.makeBed(self.chrlist) | |
15 | |
16 def makeBed(self, chrlist): | |
17 bed = [] | |
18 for chr in chrlist: | |
19 print(chr) | |
20 gm = self.bwf.stats(chr, 0, type="mean")[0] | |
21 gstd = self.bwf.stats(chr, 0, type="std")[0] | |
22 cutlow = gm - (self.sd_lo * gstd) | |
23 cuthi = gm + (self.sd_hi * gstd) | |
24 chr_len = self.chrlist[chr] | |
25 nb = chr_len // self.bedwin | |
26 means = self.bwf.stats(chr, 0, chr_len, type="mean", nBins=nb) | |
27 inlo = False | |
28 inhi = False | |
29 reg_start = None | |
30 reg_end = None | |
31 reg_means = [] | |
32 print('got %d means, gm=%f, lo=%f, hi=%f' % (len(means), gm, cutlow, cuthi)) | |
33 for i, m in enumerate(means): | |
34 bend = min(chr_len, (i + 1) * self.bedwin) | |
35 featname = "%s_%d" % (chr,i) | |
36 if m and (m < cutlow or m > cuthi): | |
37 if inlo: | |
38 if m < cutlow: # extend | |
39 reg_end = bend | |
40 reg_means.append(m) | |
41 else: # high so close | |
42 rm = sum(reg_means) / len(reg_means) | |
43 bed.append( | |
44 "%s\t%d\t%d\t%s\t%.3f\n" | |
45 % (chr, reg_start, reg_end, featname, rm) | |
46 ) | |
47 inlo = False | |
48 reg_means = [] | |
49 elif inhi: | |
50 if m > cuthi: # extend | |
51 reg_end = bend | |
52 reg_means.append(m) | |
53 else: | |
54 rm = sum(reg_means) / len(reg_means) | |
55 bed.append( | |
56 "%s\t%d\t%d\t%s\t%.3f\n" | |
57 % (chr, reg_start, reg_end, featname, rm) | |
58 ) | |
59 inhi = False | |
60 reg_means = [] | |
61 elif m < cutlow: # start new low region | |
62 inlo = True | |
63 reg_start = i * self.bedwin | |
64 reg_end = bend | |
65 reg_means = [m] | |
66 elif m > cuthi: | |
67 inhi = True | |
68 reg_start = i * self.bedwin | |
69 reg_end = bend | |
70 reg_means = [m] | |
71 else: # not out of range - write current extended bed region | |
72 if inhi or inlo: | |
73 inhi = False | |
74 inlo = False | |
75 rm = sum(reg_means) / len(reg_means) | |
76 bed.append( | |
77 "%s\t%d\t%d\t%s\t%.3f\n" | |
78 % (chr, reg_start, reg_end, featname, rm) | |
79 ) | |
80 reg_means = [] | |
81 reg_start = None | |
82 reg_end = None | |
83 self.bedf.write(''.join(bed)) | |
84 self.bedf.close() | |
85 | |
86 | |
87 if __name__ == "__main__": | |
88 fo = findOut(bwname="test.bw", bedname="test.bed", sd_lo=2, sd_hi=2, bedwin=100) |