changeset 0:c71db540eb38 draft

planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
author fubar
date Mon, 01 Jul 2024 02:48:46 +0000
parents
children 04129d5129be
files README.md bigwig_outlier_bed.py bigwig_outlier_bed.xml bigwig_outlier_bed_slow1.py bigwigtobed.python.txt other/bigslowtest.bw other/merlin.bw other/test.bw plants.sh test-data/bedouthilo_sample test-data/bigwig_sample
diffstat 11 files changed, 750 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Mon Jul 01 02:48:46 2024 +0000
@@ -0,0 +1,31 @@
+## bigwig peak outlier to bed
+
+### July 30 2024 for the VGP
+
+This code will soon become a Galaxy tool, for building some of the [NIH MARBL T2T assembly polishing](https://github.com/marbl/training) tools as Galaxy workflows.
+
+The next JBrowse2 tool release will include a plugin for optional colours to distinguish bed features, shown being tested in the screenshots below.
+
+### Find and mark BigWig peaks to a bed file for display
+
+In the spirit of DeepTools, but finding contiguous regions where the bigwig value is either above or below a given centile.
+0.99 and 0.01 for example. These quantile cut point values are found and applied over each chromosome using some [cunning numpy code](http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html)
+
+![image](https://github.com/fubar2/bigwig_peak_bed/assets/6016266/cdee3a2b-ae31-4282-b744-992c15fb49db)
+
+![image](https://github.com/fubar2/bigwig_peak_bed/assets/6016266/59d1564b-0c34-42a3-b437-44332cf1b2f0)
+
+Big differences between chromosomes 14,15,21,22 and Y in this "all contigs" view - explanations welcomed:
+
+![image](https://github.com/fubar2/bigwig_peak_bed/assets/6016266/162bf681-2977-4eb8-8d6f-9dad5b3931f8)
+
+
+[pybedtools](https://github.com/jackh726/bigtools) is used for the bigwig interface. Optionally allow
+multiple bigwigs to be processed into a single bed - the bed features have the bigwig name in the label for viewing.
+
+### Note on quantiles per chromosome rather than quantiles for the whole bigwig
+
+It is just not feasible to hold all contigs in the entire decoded bigwig in RAM to estimate quantiles. It may be
+better to sample across all chromosomes so as not to lose any systematic differences between them - the current method will hide those
+differences unfortunately. Sampling might be possible. Looking at the actual quantile values across a couple of test bigwigs suggests that
+there is not much variation between chromosomes but there's now a tabular report to check them for each input bigwig.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bigwig_outlier_bed.py	Mon Jul 01 02:48:46 2024 +0000
@@ -0,0 +1,148 @@
+"""
+Ross Lazarus June 2024 for VGP 
+
+Bigwigs are great, but hard to reliably "see" small low coverage or small very high coverage regions.
+Colouring in JB2 tracks will need a new plugin, so this code will find bigwig regions above and below a chosen percentile point.
+0.99 and 0.01 work well in testing with a minimum span of 10 bp.
+
+Multiple bigwigs **with the same reference** can be combined - bed segments will be named appropriately
+Combining multiple references works but is silly because only display will rely on one reference so others will not be shown...
+
+Tricksy numpy method from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html
+takes about 95 seconds for a 17MB test wiggle
+
+JBrowse2 bed normally displays ignore the score, so could provide separate low/high bed file outputs as an option.
+
+Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores
+Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi).
+
+"""
+
+
+import argparse
+import numpy as np
+import pybigtools
+import sys
+
+
+from pathlib import Path
+
+class findOut():
+
+    def __init__(self, args):
+        self.bwnames=args.bigwig
+        self.bwlabels=args.bigwiglabels
+        self.bedwin=args.minwin
+        self.qlo=args.qlo
+        self.qhi=args.qhi
+        self.bedouthilo=args.bedouthilo
+        self.bedouthi=args.bedouthi
+        self.bedoutlo=args.bedoutlo
+        self.tableout = args.tableout
+        self.bedwin = args.minwin
+        self.qhi = args.qhi
+        self.qlo = args.qlo
+        self.makeBed()
+
+    def processVals(self, bw, isTop):
+        # http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html
+        if isTop:
+            bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s
+        else:
+            bwex = np.r_[False, bw <= self.bwbot, False]
+        bwexd = np.diff(bwex)
+        bwexdnz = bwexd.nonzero()[0]
+        bwregions = np.reshape(bwexdnz, (-1,2))
+        return bwregions
+
+    def writeBed(self, bed, bedfname):
+        """
+        potentially multiple
+        """
+        bed.sort()
+        beds = ['%s\t%d\t%d\t%s\t%d' % x for x in bed]
+        with open(bedfname, "w") as bedf:
+            bedf.write('\n'.join(beds))
+            bedf.write('\n')
+        print('Wrote %d bed regions to %s' % (len(bed), bedfname))
+        
+    def makeBed(self):
+        bedhi = []
+        bedlo = []
+        bwlabels = self.bwlabels
+        bwnames = self.bwnames
+        print('bwnames=', bwnames, "bwlabs=", bwlabels)
+        for i, bwname in enumerate(bwnames):
+            bwlabel = bwlabels[i].replace(" ",'')
+            p = Path('in.bw')
+            p.symlink_to( bwname ) # required by pybigtools (!)
+            bwf = pybigtools.open('in.bw')
+            chrlist = bwf.chroms()
+            chrs = list(chrlist.keys())
+            chrs.sort()
+            restab = ["contig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"]
+            for chr in chrs:
+                bw = bwf.values(chr)
+                bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered
+                if self.qhi is not None:
+                    self.bwtop = np.quantile(bw, self.qhi)
+                    bwhi = self.processVals(bw, isTop=True)
+                    for i, seg in enumerate(bwhi):
+                        if seg[1] - seg[0] >= self.bedwin:
+                            bedhi.append((chr, seg[0], seg[1], '%s_hi' % (bwlabel), 1))
+                if self.qlo is not None:
+                    self.bwbot = np.quantile(bw, self.qlo)
+                    bwlo = self.processVals(bw, isTop=False)            
+                    for i, seg in enumerate(bwlo):
+                        if seg[1] - seg[0] >= self.bedwin:
+                            bedlo.append((chr, seg[0], seg[1], '%s_lo' % (bwlabel), -1))
+                bwmean = np.mean(bw)
+                bwstd = np.std(bw)
+                bwmax = np.max(bw)
+                nrow = np.size(bw)
+                bwmin = np.min(bw)
+                restab.append('%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f' % (chr,nrow,bwmean,bwstd,bwmin,bwmax,self.bwtop,self.bwbot))        
+        print('\n'.join(restab), '\n')
+        if self.tableout:
+            with open(self.tableout) as t:
+                t.write('\n'.join(restab))
+                t.write('\n')
+        if self.bedoutlo:
+            if self.qlo:
+                self.writeBed(bedlo, self.bedoutlo)
+        if self.bedouthi:
+            if self.qhi:
+                self.writeBed(bedhi, self.bedouthi)
+        if self.bedouthilo:
+            allbed = bedlo + bedhi
+            self.writeBed(allbed, self.bedouthilo)
+        return restab
+    
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    a = parser.add_argument
+    a('-m', '--minwin',default=10, type=int)
+    a('-l', '--qlo',default=None, type=float)
+    a('-i', '--qhi',default=None, type=float)
+    a('-w', '--bigwig', nargs='+')
+    a('-n', '--bigwiglabels', nargs='+')
+    a('-o', '--bedouthilo', default=None, help="optional high and low combined bed")
+    a('-u', '--bedouthi', default=None, help="optional high only bed")
+    a('-b', '--bedoutlo', default=None, help="optional low only bed")
+    a('-t', '--tableout', default=None)
+    args = parser.parse_args()
+    print('args=', args)
+    if not (args.bedouthilo or args.bedouthi or args.bedoutlo):
+        sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need a bed output choice - must be one of low only, high only or both combined")
+        sys.exit(2)
+    if not (args.qlo or args.qhi):
+        sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need one or both of quantile cutpoints qhi and qlo")
+        sys.exit(2)
+    restab = findOut(args)
+    if args.tableout:
+        with open(args.tableout, 'w') as tout:
+            tout.write('\n'.join(restab))
+            tout.write('\n')
+    
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bigwig_outlier_bed.xml	Mon Jul 01 02:48:46 2024 +0000
@@ -0,0 +1,212 @@
+<tool name="bigwig_outlier_bed" id="bigwigoutlierbed" version="0.01" profile="22.05">
+  <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay-->
+  <!--Created by toolfactory@galaxy.org at 30/06/2024 19:44:14 using the Galaxy Tool Factory.-->
+  <description>Writes high and low bigwig regions as features in a bed file</description>
+  <edam_topics>
+      <edam_topic>topic_0157</edam_topic>
+      <edam_topic>topic_0092</edam_topic>
+  </edam_topics>
+  <edam_operations>
+      <edam_operation>operation_0337</edam_operation>
+  </edam_operations>  
+  <requirements>
+    <requirement type="package" version="3.12.3">python</requirement>
+    <requirement type="package" version="2.0.0">numpy</requirement>
+    <requirement type="package" version="0.1.4">pybigtools</requirement>
+  </requirements>
+  <version_command><![CDATA[python -c "import pybigtools; from importlib.metadata import version; print(version('pybigtools'))"]]></version_command>
+  <command><![CDATA[python
+'$runme'
+--bigwig
+'$bigwig'
+--bedouthilo
+'$bedouthilo'
+--minwin
+'$minwin'
+--qhi
+'$qhi'
+--qlo
+'$qlo'
+#if $tableout == "set"
+ --tableout
+#end if
+--bigwiglabels
+'$bigwiglabels']]></command>
+  <configfiles>
+    <configfile name="runme"><![CDATA[#raw
+"""
+Bigwigs are great, but hard to reliably "see" small low coverage or small very high coverage regions.
+Colouring in JB2 tracks will need a new plugin, so this code will find bigwig regions above and below a chosen percentile point.
+0.99 and 0.01 work well in testing with a minimum span of 10 bp.
+Multiple bigwigs **with the same reference** can be combined - bed segments will be named appropriately
+Combining multiple references works but is silly because display will rely on one reference so features mapped to other references will not appear.
+
+Tricksy numpy method from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html
+takes about 95 seconds for a 17MB test wiggle
+JBrowse2 bed normally displays ignore the score, so could provide separate low/high bed file outputs as an option.
+Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores
+Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi).
+"""
+
+import argparse
+import numpy as np
+import pybigtools
+import sys
+from pathlib import Path
+
+
+class findOut():
+    def __init__(self, args):
+        self.bwnames=args.bigwig
+        self.bwlabels=args.bigwiglabels
+        self.bedwin=args.minwin
+        self.qlo=args.qlo
+        self.qhi=args.qhi
+        self.bedouthilo=args.bedouthilo
+        self.bedouthi=args.bedouthi
+        self.bedoutlo=args.bedoutlo
+        self.tableout = args.tableout
+        self.bedwin = args.minwin
+        self.qhi = args.qhi
+        self.qlo = args.qlo
+        self.makeBed()
+
+    def processVals(self, bw, isTop):
+        # http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html
+        if isTop:
+            bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s
+        else:
+            bwex = np.r_[False, bw <= self.bwbot, False]
+        bwexd = np.diff(bwex)
+        bwexdnz = bwexd.nonzero()[0]
+        bwregions = np.reshape(bwexdnz, (-1,2))
+        return bwregions
+
+    def writeBed(self, bed, bedfname):
+        """
+        potentially multiple
+        """
+        bed.sort()
+        beds = ['%s\t%d\t%d\t%s\t%d' % x for x in bed]
+        with open(bedfname, "w") as bedf:
+            bedf.write('\n'.join(beds))
+            bedf.write('\n')
+        print('Wrote %d bed regions to %s' % (len(bed), bedfname))
+
+    def makeBed(self):
+        bedhi = []
+        bedlo = []
+        bwlabels = self.bwlabels
+        bwnames = self.bwnames
+        print('bwnames=', bwnames, "bwlabs=", bwlabels)
+        for i, bwname in enumerate(bwnames):
+            bwlabel = bwlabels[i].replace(" ",'')
+            p = Path('in.bw')
+            p.symlink_to( bwname ) # required by pybigtools (!)
+            bwf = pybigtools.open('in.bw')
+            chrlist = bwf.chroms()
+            chrs = list(chrlist.keys())
+            chrs.sort()
+            restab = ["contig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"]
+            for chr in chrs:
+                bw = bwf.values(chr)
+                bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered
+                if self.qhi is not None:
+                    self.bwtop = np.quantile(bw, self.qhi)
+                    bwhi = self.processVals(bw, isTop=True)
+                    for i, seg in enumerate(bwhi):
+                        if seg[1] - seg[0] >= self.bedwin:
+                            bedhi.append((chr, seg[0], seg[1], '%s_hi' % (bwlabel), 1))
+                if self.qlo is not None:
+                    self.bwbot = np.quantile(bw, self.qlo)
+                    bwlo = self.processVals(bw, isTop=False)            
+                    for i, seg in enumerate(bwlo):
+                        if seg[1] - seg[0] >= self.bedwin:
+                            bedlo.append((chr, seg[0], seg[1], '%s_lo' % (bwlabel), -1))
+                bwmean = np.mean(bw)
+                bwstd = np.std(bw)
+                bwmax = np.max(bw)
+                nrow = np.size(bw)
+                bwmin = np.min(bw)
+                restab.append('%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f' % (chr,nrow,bwmean,bwstd,bwmin,bwmax,self.bwtop,self.bwbot))        
+        print('\n'.join(restab), '\n')
+        if self.tableout:
+            with open(self.tableout) as t:
+                t.write('\n'.join(restab))
+                t.write('\n')
+        if self.bedoutlo:
+            if self.qlo:
+                self.writeBed(bedlo, self.bedoutlo)
+        if self.bedouthi:
+            if self.qhi:
+                self.writeBed(bedhi, self.bedouthi)
+        if self.bedouthilo:
+            allbed = bedlo + bedhi
+            self.writeBed(allbed, self.bedouthilo)
+        return restab
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    a = parser.add_argument
+    a('-m', '--minwin',default=10, type=int)
+    a('-l', '--qlo',default=None, type=float)
+    a('-i', '--qhi',default=None, type=float)
+    a('-w', '--bigwig', nargs='+')
+    a('-n', '--bigwiglabels', nargs='+')
+    a('-o', '--bedouthilo', default=None, help="optional high and low combined bed")
+    a('-u', '--bedouthi', default=None, help="optional high only bed")
+    a('-b', '--bedoutlo', default=None, help="optional low only bed")
+    a('-t', '--tableout', default=None)
+    args = parser.parse_args()
+    print('args=', args)
+    if not (args.bedouthilo or args.bedouthi or args.bedoutlo):
+        sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need a bed output choice - must be one of low only, high only or both combined")
+        sys.exit(2)
+    if not (args.qlo or args.qhi):
+        sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need one or both of quantile cutpoints qhi and qlo")
+        sys.exit(2)
+    restab = findOut(args)
+    if args.tableout:
+        with open(args.tableout, 'w') as tout:
+            tout.write('\n'.join(restab))
+            tout.write('\n')
+#end raw]]></configfile>
+  </configfiles>
+  <inputs>
+    <param name="bigwig" type="data" optional="false" label="Bigwig file(s) to process. " help="If more than one, MUST all use the same reference sequence to be displayable. Feature names will include the bigwig label." format="bigwig" multiple="true"/>
+    <param name="minwin" type="integer" value="10" label="Minimum continuous bases to count as a high or low bed feature" help="Actual run length will be found and used for continuous features as long or longer."/>
+    <param name="qhi" type="float" value="0.99" label="Quantile cutoff for a high region - 0.99 will cut off at or above the 99th percentile" help=""/>
+    <param name="qlo" type="float" value="0.01" label="Quantile cutoff for a low region - 0.01 will cut off at or below the 1st percentile." help=""/>
+    <param name="tableout" type="select" label="Write a table showing contig statistics for each bigwig" help="" display="radio">
+      <option value="notset">Do not set this flag</option>
+      <option value="set">Set this flag</option>
+    </param>
+    <param name="bigwiglabels" type="text" value="outbed" label="Label to use in bed feature names to indicate source bigwig contents - such as coverage" help=""/>
+  </inputs>
+  <outputs>
+    <data name="bedouthilo" format="bed" label="Both high and low contiguous regions as long or longer than window length into one bed " hidden="false"/>
+  </outputs>
+  <tests>
+    <test>
+      <output name="bedouthilo" value="bedouthilo_sample" compare="diff" lines_diff="0"/>
+      <param name="bigwig" value="bigwig_sample"/>
+      <param name="minwin" value="10"/>
+      <param name="qhi" value="0.99"/>
+      <param name="qlo" value="0.01"/>
+      <param name="tableout" value="notset"/>
+      <param name="bigwiglabels" value="outbed"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+ **What it Does**
+ 
+ Takes one or more bigwigs mapped to the same reference and finds all the minimum window sized or greater contiguous regions above or below an upper and lower quantile cutoff.
+ A window size of 10 works well, and quantiles set at 0.01 and 0.99 will generally work well.
+ 
+  ]]></help>
+  <citations>
+    <citation type="doi">10.1093/bioinformatics/btae350</citation>
+  </citations>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bigwig_outlier_bed_slow1.py	Mon Jul 01 02:48:46 2024 +0000
@@ -0,0 +1,88 @@
+import os
+import pyBigWig
+
+
+class findOut():
+
+    def __init__(self, bwname="test.bw", bedname="test.bed", sd_lo=3, sd_hi=3, bedwin=10):
+        self.bedwin = bedwin
+        self.bwf = pyBigWig.open(bwname, "r")
+        self.chrlist = self.bwf.chroms()
+        self.bedf = open(bedname, "w")
+        self.sd_lo = sd_lo
+        self.sd_hi = sd_hi
+        self.makeBed(self.chrlist)
+
+    def makeBed(self, chrlist):
+        bed = []
+        for chr in chrlist:
+            print(chr)
+            gm = self.bwf.stats(chr, 0, type="mean")[0]
+            gstd = self.bwf.stats(chr, 0, type="std")[0]
+            cutlow = gm - (self.sd_lo * gstd)
+            cuthi = gm + (self.sd_hi * gstd)
+            chr_len = self.chrlist[chr]
+            nb = chr_len // self.bedwin
+            means = self.bwf.stats(chr, 0, chr_len, type="mean", nBins=nb)
+            inlo = False
+            inhi = False
+            reg_start = None
+            reg_end = None
+            reg_means = []
+            print('got %d means, gm=%f, lo=%f, hi=%f' % (len(means), gm, cutlow, cuthi))
+            for i, m in enumerate(means):
+                bend = min(chr_len, (i + 1) * self.bedwin)
+                featname = "%s_%d" % (chr,i)
+                if m and (m < cutlow or m > cuthi):
+                    if inlo:
+                        if m < cutlow:  # extend
+                            reg_end = bend
+                            reg_means.append(m)
+                        else:  # high so close
+                            rm = sum(reg_means) / len(reg_means)
+                            bed.append(
+                                "%s\t%d\t%d\t%s\t%.3f\n"
+                                % (chr, reg_start, reg_end, featname, rm)
+                            )
+                            inlo = False
+                            reg_means = []
+                    elif inhi:
+                        if m > cuthi:  # extend
+                            reg_end = bend
+                            reg_means.append(m)
+                        else:
+                            rm = sum(reg_means) / len(reg_means)
+                            bed.append(
+                                "%s\t%d\t%d\t%s\t%.3f\n"
+                                % (chr, reg_start, reg_end, featname, rm)
+                            )
+                            inhi = False
+                            reg_means = []
+                    elif m < cutlow:  # start new low region
+                        inlo = True
+                        reg_start = i * self.bedwin
+                        reg_end = bend
+                        reg_means = [m]
+                    elif m > cuthi:
+                        inhi = True
+                        reg_start = i * self.bedwin
+                        reg_end = bend
+                        reg_means = [m]
+                else:  # not out of range - write current extended bed region
+                    if inhi or inlo:
+                        inhi = False
+                        inlo = False
+                        rm = sum(reg_means) / len(reg_means)
+                        bed.append(
+                            "%s\t%d\t%d\t%s\t%.3f\n"
+                            % (chr, reg_start, reg_end, featname, rm)
+                        )
+                        reg_means = []
+                        reg_start = None
+                        reg_end = None
+        self.bedf.write(''.join(bed))
+        self.bedf.close()
+
+
+if __name__ == "__main__":
+    fo = findOut(bwname="test.bw", bedname="test.bed", sd_lo=2, sd_hi=2, bedwin=100)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bigwigtobed.python.txt	Mon Jul 01 02:48:46 2024 +0000
@@ -0,0 +1,131 @@
+#raw
+"""
+Ross Lazarus June 2024 for VGP 
+Bigwigs are great, but hard to reliably "see" small low coverage or small very high coverage regions.
+Colouring in JB2 tracks will need a new plugin, so this code will find bigwig regions above and below a chosen percentile point.
+0.99 and 0.01 work well in testing with a minimum span of 10 bp.
+Multiple bigwigs **with the same reference** can be combined - bed segments will be named appropriately
+Combining multiple references works but is silly because only display will rely on one reference so others will not be shown...
+Tricksy numpy method from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html
+takes about 95 seconds for a 17MB test wiggle
+JBrowse2 bed normally displays ignore the score, so could provide separate low/high bed file outputs as an option.
+Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores
+Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi).
+"""
+import argparse
+import numpy as np
+import pybigtools
+import sys
+from pathlib import Path
+class findOut():
+    def __init__(self, args):
+        self.bwnames=args.bigwig
+        self.bwlabels=args.bigwiglabels
+        self.bedwin=args.minwin
+        self.qlo=args.qlo
+        self.qhi=args.qhi
+        self.bedouthilo=args.bedouthilo
+        self.bedouthi=args.bedouthi
+        self.bedoutlo=args.bedoutlo
+        self.tableout = args.tableout
+        self.bedwin = args.minwin
+        self.qhi = args.qhi
+        self.qlo = args.qlo
+        self.makeBed()
+    def processVals(self, bw, isTop):
+        # http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html
+        if isTop:
+            bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s
+        else:
+            bwex = np.r_[False, bw <= self.bwbot, False]
+        bwexd = np.diff(bwex)
+        bwexdnz = bwexd.nonzero()[0]
+        bwregions = np.reshape(bwexdnz, (-1,2))
+        return bwregions
+    def writeBed(self, bed, bedfname):
+        """
+        potentially multiple
+        """
+        bed.sort()
+        beds = ['%s\t%d\t%d\t%s\t%d' % x for x in bed]
+        with open(bedfname, "w") as bedf:
+            bedf.write('\n'.join(beds))
+            bedf.write('\n')
+        print('Wrote %d bed regions to %s' % (len(bed), bedfname))
+    def makeBed(self):
+        bedhi = []
+        bedlo = []
+        bwlabels = self.bwlabels
+        bwnames = self.bwnames
+        print('bwnames=', bwnames, "bwlabs=", bwlabels)
+        for i, bwname in enumerate(bwnames):
+            bwlabel = bwlabels[i].replace(" ",'')
+            p = Path('in.bw')
+            p.symlink_to( bwname ) # required by pybigtools (!)
+            bwf = pybigtools.open('in.bw')
+            chrlist = bwf.chroms()
+            chrs = list(chrlist.keys())
+            chrs.sort()
+            restab = ["contig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"]
+            for chr in chrs:
+                bw = bwf.values(chr)
+                bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered
+                if self.qhi is not None:
+                    self.bwtop = np.quantile(bw, self.qhi)
+                    bwhi = self.processVals(bw, isTop=True)
+                    for i, seg in enumerate(bwhi):
+                        if seg[1] - seg[0] >= self.bedwin:
+                            bedhi.append((chr, seg[0], seg[1], '%s_hi' % (bwlabel), 1))
+                if self.qlo is not None:
+                    self.bwbot = np.quantile(bw, self.qlo)
+                    bwlo = self.processVals(bw, isTop=False)            
+                    for i, seg in enumerate(bwlo):
+                        if seg[1] - seg[0] >= self.bedwin:
+                            bedlo.append((chr, seg[0], seg[1], '%s_lo' % (bwlabel), -1))
+                bwmean = np.mean(bw)
+                bwstd = np.std(bw)
+                bwmax = np.max(bw)
+                nrow = np.size(bw)
+                bwmin = np.min(bw)
+                restab.append('%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f' % (chr,nrow,bwmean,bwstd,bwmin,bwmax,self.bwtop,self.bwbot))        
+        print('\n'.join(restab), '\n')
+        if self.tableout:
+            with open(self.tableout) as t:
+                t.write('\n'.join(restab))
+                t.write('\n')
+        if self.bedoutlo:
+            if self.qlo:
+                self.writeBed(bedlo, self.bedoutlo)
+        if self.bedouthi:
+            if self.qhi:
+                self.writeBed(bedhi, self.bedouthi)
+        if self.bedouthilo:
+            allbed = bedlo + bedhi
+            self.writeBed(allbed, self.bedouthilo)
+        return restab
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    a = parser.add_argument
+    a('-m', '--minwin',default=10, type=int)
+    a('-l', '--qlo',default=None, type=float)
+    a('-i', '--qhi',default=None, type=float)
+    a('-w', '--bigwig', nargs='+')
+    a('-n', '--bigwiglabels', nargs='+')
+    a('-o', '--bedouthilo', default=None, help="optional high and low combined bed")
+    a('-u', '--bedouthi', default=None, help="optional high only bed")
+    a('-b', '--bedoutlo', default=None, help="optional low only bed")
+    a('-t', '--tableout', default=None)
+    args = parser.parse_args()
+    print('args=', args)
+    if not (args.bedouthilo or args.bedouthi or args.bedoutlo):
+        sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need a bed output choice - must be one of low only, high only or both combined")
+        sys.exit(2)
+    if not (args.qlo or args.qhi):
+        sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need one or both of quantile cutpoints qhi and qlo")
+        sys.exit(2)
+    restab = findOut(args)
+    if args.tableout:
+        with open(args.tableout, 'w') as tout:
+            tout.write('\n'.join(restab))
+            tout.write('\n')
+#end raw
Binary file other/bigslowtest.bw has changed
Binary file other/merlin.bw has changed
Binary file other/test.bw has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plants.sh	Mon Jul 01 02:48:46 2024 +0000
@@ -0,0 +1,1 @@
+planemo shed_update --shed_target toolshed --owner fubar --name bigwig_outlier_bed --shed_key bbfaa3d1d522066d41a4250dc419cc6f  ./
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bedouthilo_sample	Mon Jul 01 02:48:46 2024 +0000
@@ -0,0 +1,139 @@
+Merlin	20	30	outbed_hi	1
+Merlin	80	90	outbed_lo	-1
+Merlin	190	200	outbed_hi	1
+Merlin	430	440	outbed_lo	-1
+Merlin	1230	1240	outbed_lo	-1
+Merlin	1570	1580	outbed_hi	1
+Merlin	1730	1740	outbed_hi	1
+Merlin	1850	1860	outbed_hi	1
+Merlin	1880	1890	outbed_hi	1
+Merlin	2670	2680	outbed_lo	-1
+Merlin	3400	3410	outbed_hi	1
+Merlin	3740	3750	outbed_lo	-1
+Merlin	4030	4040	outbed_hi	1
+Merlin	4700	4710	outbed_lo	-1
+Merlin	5870	5890	outbed_lo	-1
+Merlin	6290	6300	outbed_lo	-1
+Merlin	7120	7130	outbed_hi	1
+Merlin	7860	7870	outbed_hi	1
+Merlin	8280	8290	outbed_hi	1
+Merlin	8860	8870	outbed_lo	-1
+Merlin	10520	10530	outbed_lo	-1
+Merlin	10820	10830	outbed_hi	1
+Merlin	11190	11200	outbed_lo	-1
+Merlin	11380	11390	outbed_lo	-1
+Merlin	11810	11820	outbed_lo	-1
+Merlin	12660	12670	outbed_lo	-1
+Merlin	13310	13320	outbed_hi	1
+Merlin	14160	14170	outbed_lo	-1
+Merlin	14750	14760	outbed_lo	-1
+Merlin	16270	16280	outbed_lo	-1
+Merlin	17240	17250	outbed_lo	-1
+Merlin	17570	17580	outbed_hi	1
+Merlin	17940	17950	outbed_hi	1
+Merlin	18290	18300	outbed_lo	-1
+Merlin	18340	18350	outbed_hi	1
+Merlin	18810	18820	outbed_hi	1
+Merlin	19080	19090	outbed_lo	-1
+Merlin	19220	19230	outbed_lo	-1
+Merlin	20240	20250	outbed_lo	-1
+Merlin	21650	21660	outbed_lo	-1
+Merlin	21810	21820	outbed_lo	-1
+Merlin	22310	22320	outbed_hi	1
+Merlin	23630	23640	outbed_lo	-1
+Merlin	23760	23770	outbed_hi	1
+Merlin	24580	24590	outbed_hi	1
+Merlin	24860	24870	outbed_hi	1
+Merlin	25030	25040	outbed_lo	-1
+Merlin	25700	25710	outbed_hi	1
+Merlin	26260	26270	outbed_lo	-1
+Merlin	26450	26460	outbed_hi	1
+Merlin	26560	26570	outbed_lo	-1
+Merlin	26610	26620	outbed_lo	-1
+Merlin	27220	27230	outbed_hi	1
+Merlin	27280	27290	outbed_hi	1
+Merlin	27670	27680	outbed_lo	-1
+Merlin	27970	27980	outbed_lo	-1
+Merlin	28500	28510	outbed_hi	1
+Merlin	29400	29410	outbed_lo	-1
+Merlin	29740	29750	outbed_hi	1
+Merlin	29910	29920	outbed_lo	-1
+Merlin	31540	31550	outbed_hi	1
+Merlin	31890	31900	outbed_hi	1
+Merlin	32970	32980	outbed_hi	1
+Merlin	33460	33470	outbed_lo	-1
+Merlin	33910	33920	outbed_lo	-1
+Merlin	33980	33990	outbed_hi	1
+Merlin	34470	34490	outbed_lo	-1
+Merlin	35320	35330	outbed_lo	-1
+Merlin	35440	35450	outbed_hi	1
+Merlin	35620	35630	outbed_lo	-1
+Merlin	35880	35890	outbed_lo	-1
+Merlin	36510	36520	outbed_hi	1
+Merlin	36810	36820	outbed_lo	-1
+Merlin	37230	37240	outbed_lo	-1
+Merlin	37560	37570	outbed_lo	-1
+Merlin	37750	37760	outbed_lo	-1
+Merlin	38330	38340	outbed_lo	-1
+Merlin	39600	39610	outbed_lo	-1
+Merlin	39690	39700	outbed_hi	1
+Merlin	40350	40360	outbed_hi	1
+Merlin	40550	40560	outbed_hi	1
+Merlin	40780	40790	outbed_hi	1
+Merlin	41390	41400	outbed_hi	1
+Merlin	41900	41910	outbed_lo	-1
+Merlin	41940	41950	outbed_hi	1
+Merlin	42050	42060	outbed_lo	-1
+Merlin	42470	42480	outbed_hi	1
+Merlin	42560	42570	outbed_hi	1
+Merlin	42730	42740	outbed_hi	1
+Merlin	43010	43020	outbed_hi	1
+Merlin	43910	43920	outbed_hi	1
+Merlin	44440	44450	outbed_lo	-1
+Merlin	44670	44680	outbed_hi	1
+Merlin	46010	46020	outbed_hi	1
+Merlin	46470	46480	outbed_hi	1
+Merlin	46610	46620	outbed_lo	-1
+Merlin	49290	49300	outbed_lo	-1
+Merlin	49370	49380	outbed_hi	1
+Merlin	50310	50320	outbed_hi	1
+Merlin	50880	50890	outbed_lo	-1
+Merlin	52230	52240	outbed_lo	-1
+Merlin	52460	52470	outbed_lo	-1
+Merlin	52740	52750	outbed_lo	-1
+Merlin	52770	52780	outbed_lo	-1
+Merlin	52980	52990	outbed_hi	1
+Merlin	53150	53160	outbed_lo	-1
+Merlin	54630	54640	outbed_hi	1
+Merlin	54830	54840	outbed_lo	-1
+Merlin	55430	55440	outbed_hi	1
+Merlin	56020	56030	outbed_lo	-1
+Merlin	56450	56460	outbed_hi	1
+Merlin	56630	56640	outbed_hi	1
+Merlin	57220	57230	outbed_lo	-1
+Merlin	57610	57620	outbed_hi	1
+Merlin	57790	57800	outbed_hi	1
+Merlin	58730	58740	outbed_hi	1
+Merlin	59360	59370	outbed_hi	1
+Merlin	59450	59460	outbed_hi	1
+Merlin	60210	60220	outbed_hi	1
+Merlin	61110	61120	outbed_hi	1
+Merlin	61250	61260	outbed_lo	-1
+Merlin	62470	62480	outbed_hi	1
+Merlin	63190	63200	outbed_lo	-1
+Merlin	63290	63300	outbed_lo	-1
+Merlin	63330	63340	outbed_hi	1
+Merlin	63350	63360	outbed_hi	1
+Merlin	64660	64670	outbed_lo	-1
+Merlin	65050	65060	outbed_lo	-1
+Merlin	66210	66220	outbed_hi	1
+Merlin	66510	66520	outbed_hi	1
+Merlin	66660	66670	outbed_lo	-1
+Merlin	66850	66860	outbed_hi	1
+Merlin	66900	66910	outbed_lo	-1
+Merlin	67150	67160	outbed_lo	-1
+Merlin	67690	67700	outbed_hi	1
+Merlin	68760	68780	outbed_hi	1
+Merlin	68790	68800	outbed_lo	-1
+Merlin	69660	69670	outbed_lo	-1
+Merlin	69850	69860	outbed_hi	1
Binary file test-data/bigwig_sample has changed