diff bigwig_outlier_bed_slow1.py @ 0:c71db540eb38 draft

planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
author fubar
date Mon, 01 Jul 2024 02:48:46 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bigwig_outlier_bed_slow1.py	Mon Jul 01 02:48:46 2024 +0000
@@ -0,0 +1,88 @@
+import os
+import pyBigWig
+
+
+class findOut():
+
+    def __init__(self, bwname="test.bw", bedname="test.bed", sd_lo=3, sd_hi=3, bedwin=10):
+        self.bedwin = bedwin
+        self.bwf = pyBigWig.open(bwname, "r")
+        self.chrlist = self.bwf.chroms()
+        self.bedf = open(bedname, "w")
+        self.sd_lo = sd_lo
+        self.sd_hi = sd_hi
+        self.makeBed(self.chrlist)
+
+    def makeBed(self, chrlist):
+        bed = []
+        for chr in chrlist:
+            print(chr)
+            gm = self.bwf.stats(chr, 0, type="mean")[0]
+            gstd = self.bwf.stats(chr, 0, type="std")[0]
+            cutlow = gm - (self.sd_lo * gstd)
+            cuthi = gm + (self.sd_hi * gstd)
+            chr_len = self.chrlist[chr]
+            nb = chr_len // self.bedwin
+            means = self.bwf.stats(chr, 0, chr_len, type="mean", nBins=nb)
+            inlo = False
+            inhi = False
+            reg_start = None
+            reg_end = None
+            reg_means = []
+            print('got %d means, gm=%f, lo=%f, hi=%f' % (len(means), gm, cutlow, cuthi))
+            for i, m in enumerate(means):
+                bend = min(chr_len, (i + 1) * self.bedwin)
+                featname = "%s_%d" % (chr,i)
+                if m and (m < cutlow or m > cuthi):
+                    if inlo:
+                        if m < cutlow:  # extend
+                            reg_end = bend
+                            reg_means.append(m)
+                        else:  # high so close
+                            rm = sum(reg_means) / len(reg_means)
+                            bed.append(
+                                "%s\t%d\t%d\t%s\t%.3f\n"
+                                % (chr, reg_start, reg_end, featname, rm)
+                            )
+                            inlo = False
+                            reg_means = []
+                    elif inhi:
+                        if m > cuthi:  # extend
+                            reg_end = bend
+                            reg_means.append(m)
+                        else:
+                            rm = sum(reg_means) / len(reg_means)
+                            bed.append(
+                                "%s\t%d\t%d\t%s\t%.3f\n"
+                                % (chr, reg_start, reg_end, featname, rm)
+                            )
+                            inhi = False
+                            reg_means = []
+                    elif m < cutlow:  # start new low region
+                        inlo = True
+                        reg_start = i * self.bedwin
+                        reg_end = bend
+                        reg_means = [m]
+                    elif m > cuthi:
+                        inhi = True
+                        reg_start = i * self.bedwin
+                        reg_end = bend
+                        reg_means = [m]
+                else:  # not out of range - write current extended bed region
+                    if inhi or inlo:
+                        inhi = False
+                        inlo = False
+                        rm = sum(reg_means) / len(reg_means)
+                        bed.append(
+                            "%s\t%d\t%d\t%s\t%.3f\n"
+                            % (chr, reg_start, reg_end, featname, rm)
+                        )
+                        reg_means = []
+                        reg_start = None
+                        reg_end = None
+        self.bedf.write(''.join(bed))
+        self.bedf.close()
+
+
+if __name__ == "__main__":
+    fo = findOut(bwname="test.bw", bedname="test.bed", sd_lo=2, sd_hi=2, bedwin=100)