Mercurial > repos > fubar > bigwig_outlier_bed
changeset 0:c71db540eb38 draft
planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
author | fubar |
---|---|
date | Mon, 01 Jul 2024 02:48:46 +0000 |
parents | |
children | 04129d5129be |
files | README.md bigwig_outlier_bed.py bigwig_outlier_bed.xml bigwig_outlier_bed_slow1.py bigwigtobed.python.txt other/bigslowtest.bw other/merlin.bw other/test.bw plants.sh test-data/bedouthilo_sample test-data/bigwig_sample |
diffstat | 11 files changed, 750 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Mon Jul 01 02:48:46 2024 +0000 @@ -0,0 +1,31 @@ +## bigwig peak outlier to bed + +### July 30 2024 for the VGP + +This code will soon become a Galaxy tool, for building some of the [NIH MARBL T2T assembly polishing](https://github.com/marbl/training) tools as Galaxy workflows. + +The next JBrowse2 tool release will include a plugin for optional colours to distinguish bed features, shown being tested in the screenshots below. + +### Find and mark BigWig peaks to a bed file for display + +In the spirit of DeepTools, but finding contiguous regions where the bigwig value is either above or below a given centile. +0.99 and 0.01 for example. These quantile cut point values are found and applied over each chromosome using some [cunning numpy code](http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html) + +![image](https://github.com/fubar2/bigwig_peak_bed/assets/6016266/cdee3a2b-ae31-4282-b744-992c15fb49db) + +![image](https://github.com/fubar2/bigwig_peak_bed/assets/6016266/59d1564b-0c34-42a3-b437-44332cf1b2f0) + +Big differences between chromosomes 14,15,21,22 and Y in this "all contigs" view - explanations welcomed: + +![image](https://github.com/fubar2/bigwig_peak_bed/assets/6016266/162bf681-2977-4eb8-8d6f-9dad5b3931f8) + + +[pybedtools](https://github.com/jackh726/bigtools) is used for the bigwig interface. Optionally allow +multiple bigwigs to be processed into a single bed - the bed features have the bigwig name in the label for viewing. + +### Note on quantiles per chromosome rather than quantiles for the whole bigwig + +It is just not feasible to hold all contigs in the entire decoded bigwig in RAM to estimate quantiles. It may be +better to sample across all chromosomes so as not to lose any systematic differences between them - the current method will hide those +differences unfortunately. Sampling might be possible. Looking at the actual quantile values across a couple of test bigwigs suggests that +there is not much variation between chromosomes but there's now a tabular report to check them for each input bigwig.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bigwig_outlier_bed.py Mon Jul 01 02:48:46 2024 +0000 @@ -0,0 +1,148 @@ +""" +Ross Lazarus June 2024 for VGP + +Bigwigs are great, but hard to reliably "see" small low coverage or small very high coverage regions. +Colouring in JB2 tracks will need a new plugin, so this code will find bigwig regions above and below a chosen percentile point. +0.99 and 0.01 work well in testing with a minimum span of 10 bp. + +Multiple bigwigs **with the same reference** can be combined - bed segments will be named appropriately +Combining multiple references works but is silly because only display will rely on one reference so others will not be shown... + +Tricksy numpy method from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html +takes about 95 seconds for a 17MB test wiggle + +JBrowse2 bed normally displays ignore the score, so could provide separate low/high bed file outputs as an option. + +Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores +Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi). + +""" + + +import argparse +import numpy as np +import pybigtools +import sys + + +from pathlib import Path + +class findOut(): + + def __init__(self, args): + self.bwnames=args.bigwig + self.bwlabels=args.bigwiglabels + self.bedwin=args.minwin + self.qlo=args.qlo + self.qhi=args.qhi + self.bedouthilo=args.bedouthilo + self.bedouthi=args.bedouthi + self.bedoutlo=args.bedoutlo + self.tableout = args.tableout + self.bedwin = args.minwin + self.qhi = args.qhi + self.qlo = args.qlo + self.makeBed() + + def processVals(self, bw, isTop): + # http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html + if isTop: + bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s + else: + bwex = np.r_[False, bw <= self.bwbot, False] + bwexd = np.diff(bwex) + bwexdnz = bwexd.nonzero()[0] + bwregions = np.reshape(bwexdnz, (-1,2)) + return bwregions + + def writeBed(self, bed, bedfname): + """ + potentially multiple + """ + bed.sort() + beds = ['%s\t%d\t%d\t%s\t%d' % x for x in bed] + with open(bedfname, "w") as bedf: + bedf.write('\n'.join(beds)) + bedf.write('\n') + print('Wrote %d bed regions to %s' % (len(bed), bedfname)) + + def makeBed(self): + bedhi = [] + bedlo = [] + bwlabels = self.bwlabels + bwnames = self.bwnames + print('bwnames=', bwnames, "bwlabs=", bwlabels) + for i, bwname in enumerate(bwnames): + bwlabel = bwlabels[i].replace(" ",'') + p = Path('in.bw') + p.symlink_to( bwname ) # required by pybigtools (!) + bwf = pybigtools.open('in.bw') + chrlist = bwf.chroms() + chrs = list(chrlist.keys()) + chrs.sort() + restab = ["contig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"] + for chr in chrs: + bw = bwf.values(chr) + bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered + if self.qhi is not None: + self.bwtop = np.quantile(bw, self.qhi) + bwhi = self.processVals(bw, isTop=True) + for i, seg in enumerate(bwhi): + if seg[1] - seg[0] >= self.bedwin: + bedhi.append((chr, seg[0], seg[1], '%s_hi' % (bwlabel), 1)) + if self.qlo is not None: + self.bwbot = np.quantile(bw, self.qlo) + bwlo = self.processVals(bw, isTop=False) + for i, seg in enumerate(bwlo): + if seg[1] - seg[0] >= self.bedwin: + bedlo.append((chr, seg[0], seg[1], '%s_lo' % (bwlabel), -1)) + bwmean = np.mean(bw) + bwstd = np.std(bw) + bwmax = np.max(bw) + nrow = np.size(bw) + bwmin = np.min(bw) + restab.append('%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f' % (chr,nrow,bwmean,bwstd,bwmin,bwmax,self.bwtop,self.bwbot)) + print('\n'.join(restab), '\n') + if self.tableout: + with open(self.tableout) as t: + t.write('\n'.join(restab)) + t.write('\n') + if self.bedoutlo: + if self.qlo: + self.writeBed(bedlo, self.bedoutlo) + if self.bedouthi: + if self.qhi: + self.writeBed(bedhi, self.bedouthi) + if self.bedouthilo: + allbed = bedlo + bedhi + self.writeBed(allbed, self.bedouthilo) + return restab + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + a = parser.add_argument + a('-m', '--minwin',default=10, type=int) + a('-l', '--qlo',default=None, type=float) + a('-i', '--qhi',default=None, type=float) + a('-w', '--bigwig', nargs='+') + a('-n', '--bigwiglabels', nargs='+') + a('-o', '--bedouthilo', default=None, help="optional high and low combined bed") + a('-u', '--bedouthi', default=None, help="optional high only bed") + a('-b', '--bedoutlo', default=None, help="optional low only bed") + a('-t', '--tableout', default=None) + args = parser.parse_args() + print('args=', args) + if not (args.bedouthilo or args.bedouthi or args.bedoutlo): + sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need a bed output choice - must be one of low only, high only or both combined") + sys.exit(2) + if not (args.qlo or args.qhi): + sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need one or both of quantile cutpoints qhi and qlo") + sys.exit(2) + restab = findOut(args) + if args.tableout: + with open(args.tableout, 'w') as tout: + tout.write('\n'.join(restab)) + tout.write('\n') + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bigwig_outlier_bed.xml Mon Jul 01 02:48:46 2024 +0000 @@ -0,0 +1,212 @@ +<tool name="bigwig_outlier_bed" id="bigwigoutlierbed" version="0.01" profile="22.05"> + <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay--> + <!--Created by toolfactory@galaxy.org at 30/06/2024 19:44:14 using the Galaxy Tool Factory.--> + <description>Writes high and low bigwig regions as features in a bed file</description> + <edam_topics> + <edam_topic>topic_0157</edam_topic> + <edam_topic>topic_0092</edam_topic> + </edam_topics> + <edam_operations> + <edam_operation>operation_0337</edam_operation> + </edam_operations> + <requirements> + <requirement type="package" version="3.12.3">python</requirement> + <requirement type="package" version="2.0.0">numpy</requirement> + <requirement type="package" version="0.1.4">pybigtools</requirement> + </requirements> + <version_command><![CDATA[python -c "import pybigtools; from importlib.metadata import version; print(version('pybigtools'))"]]></version_command> + <command><![CDATA[python +'$runme' +--bigwig +'$bigwig' +--bedouthilo +'$bedouthilo' +--minwin +'$minwin' +--qhi +'$qhi' +--qlo +'$qlo' +#if $tableout == "set" + --tableout +#end if +--bigwiglabels +'$bigwiglabels']]></command> + <configfiles> + <configfile name="runme"><![CDATA[#raw +""" +Bigwigs are great, but hard to reliably "see" small low coverage or small very high coverage regions. +Colouring in JB2 tracks will need a new plugin, so this code will find bigwig regions above and below a chosen percentile point. +0.99 and 0.01 work well in testing with a minimum span of 10 bp. +Multiple bigwigs **with the same reference** can be combined - bed segments will be named appropriately +Combining multiple references works but is silly because display will rely on one reference so features mapped to other references will not appear. + +Tricksy numpy method from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html +takes about 95 seconds for a 17MB test wiggle +JBrowse2 bed normally displays ignore the score, so could provide separate low/high bed file outputs as an option. +Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores +Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi). +""" + +import argparse +import numpy as np +import pybigtools +import sys +from pathlib import Path + + +class findOut(): + def __init__(self, args): + self.bwnames=args.bigwig + self.bwlabels=args.bigwiglabels + self.bedwin=args.minwin + self.qlo=args.qlo + self.qhi=args.qhi + self.bedouthilo=args.bedouthilo + self.bedouthi=args.bedouthi + self.bedoutlo=args.bedoutlo + self.tableout = args.tableout + self.bedwin = args.minwin + self.qhi = args.qhi + self.qlo = args.qlo + self.makeBed() + + def processVals(self, bw, isTop): + # http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html + if isTop: + bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s + else: + bwex = np.r_[False, bw <= self.bwbot, False] + bwexd = np.diff(bwex) + bwexdnz = bwexd.nonzero()[0] + bwregions = np.reshape(bwexdnz, (-1,2)) + return bwregions + + def writeBed(self, bed, bedfname): + """ + potentially multiple + """ + bed.sort() + beds = ['%s\t%d\t%d\t%s\t%d' % x for x in bed] + with open(bedfname, "w") as bedf: + bedf.write('\n'.join(beds)) + bedf.write('\n') + print('Wrote %d bed regions to %s' % (len(bed), bedfname)) + + def makeBed(self): + bedhi = [] + bedlo = [] + bwlabels = self.bwlabels + bwnames = self.bwnames + print('bwnames=', bwnames, "bwlabs=", bwlabels) + for i, bwname in enumerate(bwnames): + bwlabel = bwlabels[i].replace(" ",'') + p = Path('in.bw') + p.symlink_to( bwname ) # required by pybigtools (!) + bwf = pybigtools.open('in.bw') + chrlist = bwf.chroms() + chrs = list(chrlist.keys()) + chrs.sort() + restab = ["contig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"] + for chr in chrs: + bw = bwf.values(chr) + bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered + if self.qhi is not None: + self.bwtop = np.quantile(bw, self.qhi) + bwhi = self.processVals(bw, isTop=True) + for i, seg in enumerate(bwhi): + if seg[1] - seg[0] >= self.bedwin: + bedhi.append((chr, seg[0], seg[1], '%s_hi' % (bwlabel), 1)) + if self.qlo is not None: + self.bwbot = np.quantile(bw, self.qlo) + bwlo = self.processVals(bw, isTop=False) + for i, seg in enumerate(bwlo): + if seg[1] - seg[0] >= self.bedwin: + bedlo.append((chr, seg[0], seg[1], '%s_lo' % (bwlabel), -1)) + bwmean = np.mean(bw) + bwstd = np.std(bw) + bwmax = np.max(bw) + nrow = np.size(bw) + bwmin = np.min(bw) + restab.append('%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f' % (chr,nrow,bwmean,bwstd,bwmin,bwmax,self.bwtop,self.bwbot)) + print('\n'.join(restab), '\n') + if self.tableout: + with open(self.tableout) as t: + t.write('\n'.join(restab)) + t.write('\n') + if self.bedoutlo: + if self.qlo: + self.writeBed(bedlo, self.bedoutlo) + if self.bedouthi: + if self.qhi: + self.writeBed(bedhi, self.bedouthi) + if self.bedouthilo: + allbed = bedlo + bedhi + self.writeBed(allbed, self.bedouthilo) + return restab + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + a = parser.add_argument + a('-m', '--minwin',default=10, type=int) + a('-l', '--qlo',default=None, type=float) + a('-i', '--qhi',default=None, type=float) + a('-w', '--bigwig', nargs='+') + a('-n', '--bigwiglabels', nargs='+') + a('-o', '--bedouthilo', default=None, help="optional high and low combined bed") + a('-u', '--bedouthi', default=None, help="optional high only bed") + a('-b', '--bedoutlo', default=None, help="optional low only bed") + a('-t', '--tableout', default=None) + args = parser.parse_args() + print('args=', args) + if not (args.bedouthilo or args.bedouthi or args.bedoutlo): + sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need a bed output choice - must be one of low only, high only or both combined") + sys.exit(2) + if not (args.qlo or args.qhi): + sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need one or both of quantile cutpoints qhi and qlo") + sys.exit(2) + restab = findOut(args) + if args.tableout: + with open(args.tableout, 'w') as tout: + tout.write('\n'.join(restab)) + tout.write('\n') +#end raw]]></configfile> + </configfiles> + <inputs> + <param name="bigwig" type="data" optional="false" label="Bigwig file(s) to process. " help="If more than one, MUST all use the same reference sequence to be displayable. Feature names will include the bigwig label." format="bigwig" multiple="true"/> + <param name="minwin" type="integer" value="10" label="Minimum continuous bases to count as a high or low bed feature" help="Actual run length will be found and used for continuous features as long or longer."/> + <param name="qhi" type="float" value="0.99" label="Quantile cutoff for a high region - 0.99 will cut off at or above the 99th percentile" help=""/> + <param name="qlo" type="float" value="0.01" label="Quantile cutoff for a low region - 0.01 will cut off at or below the 1st percentile." help=""/> + <param name="tableout" type="select" label="Write a table showing contig statistics for each bigwig" help="" display="radio"> + <option value="notset">Do not set this flag</option> + <option value="set">Set this flag</option> + </param> + <param name="bigwiglabels" type="text" value="outbed" label="Label to use in bed feature names to indicate source bigwig contents - such as coverage" help=""/> + </inputs> + <outputs> + <data name="bedouthilo" format="bed" label="Both high and low contiguous regions as long or longer than window length into one bed " hidden="false"/> + </outputs> + <tests> + <test> + <output name="bedouthilo" value="bedouthilo_sample" compare="diff" lines_diff="0"/> + <param name="bigwig" value="bigwig_sample"/> + <param name="minwin" value="10"/> + <param name="qhi" value="0.99"/> + <param name="qlo" value="0.01"/> + <param name="tableout" value="notset"/> + <param name="bigwiglabels" value="outbed"/> + </test> + </tests> + <help><![CDATA[ + **What it Does** + + Takes one or more bigwigs mapped to the same reference and finds all the minimum window sized or greater contiguous regions above or below an upper and lower quantile cutoff. + A window size of 10 works well, and quantiles set at 0.01 and 0.99 will generally work well. + + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btae350</citation> + </citations> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bigwig_outlier_bed_slow1.py Mon Jul 01 02:48:46 2024 +0000 @@ -0,0 +1,88 @@ +import os +import pyBigWig + + +class findOut(): + + def __init__(self, bwname="test.bw", bedname="test.bed", sd_lo=3, sd_hi=3, bedwin=10): + self.bedwin = bedwin + self.bwf = pyBigWig.open(bwname, "r") + self.chrlist = self.bwf.chroms() + self.bedf = open(bedname, "w") + self.sd_lo = sd_lo + self.sd_hi = sd_hi + self.makeBed(self.chrlist) + + def makeBed(self, chrlist): + bed = [] + for chr in chrlist: + print(chr) + gm = self.bwf.stats(chr, 0, type="mean")[0] + gstd = self.bwf.stats(chr, 0, type="std")[0] + cutlow = gm - (self.sd_lo * gstd) + cuthi = gm + (self.sd_hi * gstd) + chr_len = self.chrlist[chr] + nb = chr_len // self.bedwin + means = self.bwf.stats(chr, 0, chr_len, type="mean", nBins=nb) + inlo = False + inhi = False + reg_start = None + reg_end = None + reg_means = [] + print('got %d means, gm=%f, lo=%f, hi=%f' % (len(means), gm, cutlow, cuthi)) + for i, m in enumerate(means): + bend = min(chr_len, (i + 1) * self.bedwin) + featname = "%s_%d" % (chr,i) + if m and (m < cutlow or m > cuthi): + if inlo: + if m < cutlow: # extend + reg_end = bend + reg_means.append(m) + else: # high so close + rm = sum(reg_means) / len(reg_means) + bed.append( + "%s\t%d\t%d\t%s\t%.3f\n" + % (chr, reg_start, reg_end, featname, rm) + ) + inlo = False + reg_means = [] + elif inhi: + if m > cuthi: # extend + reg_end = bend + reg_means.append(m) + else: + rm = sum(reg_means) / len(reg_means) + bed.append( + "%s\t%d\t%d\t%s\t%.3f\n" + % (chr, reg_start, reg_end, featname, rm) + ) + inhi = False + reg_means = [] + elif m < cutlow: # start new low region + inlo = True + reg_start = i * self.bedwin + reg_end = bend + reg_means = [m] + elif m > cuthi: + inhi = True + reg_start = i * self.bedwin + reg_end = bend + reg_means = [m] + else: # not out of range - write current extended bed region + if inhi or inlo: + inhi = False + inlo = False + rm = sum(reg_means) / len(reg_means) + bed.append( + "%s\t%d\t%d\t%s\t%.3f\n" + % (chr, reg_start, reg_end, featname, rm) + ) + reg_means = [] + reg_start = None + reg_end = None + self.bedf.write(''.join(bed)) + self.bedf.close() + + +if __name__ == "__main__": + fo = findOut(bwname="test.bw", bedname="test.bed", sd_lo=2, sd_hi=2, bedwin=100)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bigwigtobed.python.txt Mon Jul 01 02:48:46 2024 +0000 @@ -0,0 +1,131 @@ +#raw +""" +Ross Lazarus June 2024 for VGP +Bigwigs are great, but hard to reliably "see" small low coverage or small very high coverage regions. +Colouring in JB2 tracks will need a new plugin, so this code will find bigwig regions above and below a chosen percentile point. +0.99 and 0.01 work well in testing with a minimum span of 10 bp. +Multiple bigwigs **with the same reference** can be combined - bed segments will be named appropriately +Combining multiple references works but is silly because only display will rely on one reference so others will not be shown... +Tricksy numpy method from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html +takes about 95 seconds for a 17MB test wiggle +JBrowse2 bed normally displays ignore the score, so could provide separate low/high bed file outputs as an option. +Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores +Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi). +""" +import argparse +import numpy as np +import pybigtools +import sys +from pathlib import Path +class findOut(): + def __init__(self, args): + self.bwnames=args.bigwig + self.bwlabels=args.bigwiglabels + self.bedwin=args.minwin + self.qlo=args.qlo + self.qhi=args.qhi + self.bedouthilo=args.bedouthilo + self.bedouthi=args.bedouthi + self.bedoutlo=args.bedoutlo + self.tableout = args.tableout + self.bedwin = args.minwin + self.qhi = args.qhi + self.qlo = args.qlo + self.makeBed() + def processVals(self, bw, isTop): + # http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html + if isTop: + bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s + else: + bwex = np.r_[False, bw <= self.bwbot, False] + bwexd = np.diff(bwex) + bwexdnz = bwexd.nonzero()[0] + bwregions = np.reshape(bwexdnz, (-1,2)) + return bwregions + def writeBed(self, bed, bedfname): + """ + potentially multiple + """ + bed.sort() + beds = ['%s\t%d\t%d\t%s\t%d' % x for x in bed] + with open(bedfname, "w") as bedf: + bedf.write('\n'.join(beds)) + bedf.write('\n') + print('Wrote %d bed regions to %s' % (len(bed), bedfname)) + def makeBed(self): + bedhi = [] + bedlo = [] + bwlabels = self.bwlabels + bwnames = self.bwnames + print('bwnames=', bwnames, "bwlabs=", bwlabels) + for i, bwname in enumerate(bwnames): + bwlabel = bwlabels[i].replace(" ",'') + p = Path('in.bw') + p.symlink_to( bwname ) # required by pybigtools (!) + bwf = pybigtools.open('in.bw') + chrlist = bwf.chroms() + chrs = list(chrlist.keys()) + chrs.sort() + restab = ["contig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"] + for chr in chrs: + bw = bwf.values(chr) + bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered + if self.qhi is not None: + self.bwtop = np.quantile(bw, self.qhi) + bwhi = self.processVals(bw, isTop=True) + for i, seg in enumerate(bwhi): + if seg[1] - seg[0] >= self.bedwin: + bedhi.append((chr, seg[0], seg[1], '%s_hi' % (bwlabel), 1)) + if self.qlo is not None: + self.bwbot = np.quantile(bw, self.qlo) + bwlo = self.processVals(bw, isTop=False) + for i, seg in enumerate(bwlo): + if seg[1] - seg[0] >= self.bedwin: + bedlo.append((chr, seg[0], seg[1], '%s_lo' % (bwlabel), -1)) + bwmean = np.mean(bw) + bwstd = np.std(bw) + bwmax = np.max(bw) + nrow = np.size(bw) + bwmin = np.min(bw) + restab.append('%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f' % (chr,nrow,bwmean,bwstd,bwmin,bwmax,self.bwtop,self.bwbot)) + print('\n'.join(restab), '\n') + if self.tableout: + with open(self.tableout) as t: + t.write('\n'.join(restab)) + t.write('\n') + if self.bedoutlo: + if self.qlo: + self.writeBed(bedlo, self.bedoutlo) + if self.bedouthi: + if self.qhi: + self.writeBed(bedhi, self.bedouthi) + if self.bedouthilo: + allbed = bedlo + bedhi + self.writeBed(allbed, self.bedouthilo) + return restab +if __name__ == "__main__": + parser = argparse.ArgumentParser() + a = parser.add_argument + a('-m', '--minwin',default=10, type=int) + a('-l', '--qlo',default=None, type=float) + a('-i', '--qhi',default=None, type=float) + a('-w', '--bigwig', nargs='+') + a('-n', '--bigwiglabels', nargs='+') + a('-o', '--bedouthilo', default=None, help="optional high and low combined bed") + a('-u', '--bedouthi', default=None, help="optional high only bed") + a('-b', '--bedoutlo', default=None, help="optional low only bed") + a('-t', '--tableout', default=None) + args = parser.parse_args() + print('args=', args) + if not (args.bedouthilo or args.bedouthi or args.bedoutlo): + sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need a bed output choice - must be one of low only, high only or both combined") + sys.exit(2) + if not (args.qlo or args.qhi): + sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need one or both of quantile cutpoints qhi and qlo") + sys.exit(2) + restab = findOut(args) + if args.tableout: + with open(args.tableout, 'w') as tout: + tout.write('\n'.join(restab)) + tout.write('\n') +#end raw
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plants.sh Mon Jul 01 02:48:46 2024 +0000 @@ -0,0 +1,1 @@ +planemo shed_update --shed_target toolshed --owner fubar --name bigwig_outlier_bed --shed_key bbfaa3d1d522066d41a4250dc419cc6f ./
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bedouthilo_sample Mon Jul 01 02:48:46 2024 +0000 @@ -0,0 +1,139 @@ +Merlin 20 30 outbed_hi 1 +Merlin 80 90 outbed_lo -1 +Merlin 190 200 outbed_hi 1 +Merlin 430 440 outbed_lo -1 +Merlin 1230 1240 outbed_lo -1 +Merlin 1570 1580 outbed_hi 1 +Merlin 1730 1740 outbed_hi 1 +Merlin 1850 1860 outbed_hi 1 +Merlin 1880 1890 outbed_hi 1 +Merlin 2670 2680 outbed_lo -1 +Merlin 3400 3410 outbed_hi 1 +Merlin 3740 3750 outbed_lo -1 +Merlin 4030 4040 outbed_hi 1 +Merlin 4700 4710 outbed_lo -1 +Merlin 5870 5890 outbed_lo -1 +Merlin 6290 6300 outbed_lo -1 +Merlin 7120 7130 outbed_hi 1 +Merlin 7860 7870 outbed_hi 1 +Merlin 8280 8290 outbed_hi 1 +Merlin 8860 8870 outbed_lo -1 +Merlin 10520 10530 outbed_lo -1 +Merlin 10820 10830 outbed_hi 1 +Merlin 11190 11200 outbed_lo -1 +Merlin 11380 11390 outbed_lo -1 +Merlin 11810 11820 outbed_lo -1 +Merlin 12660 12670 outbed_lo -1 +Merlin 13310 13320 outbed_hi 1 +Merlin 14160 14170 outbed_lo -1 +Merlin 14750 14760 outbed_lo -1 +Merlin 16270 16280 outbed_lo -1 +Merlin 17240 17250 outbed_lo -1 +Merlin 17570 17580 outbed_hi 1 +Merlin 17940 17950 outbed_hi 1 +Merlin 18290 18300 outbed_lo -1 +Merlin 18340 18350 outbed_hi 1 +Merlin 18810 18820 outbed_hi 1 +Merlin 19080 19090 outbed_lo -1 +Merlin 19220 19230 outbed_lo -1 +Merlin 20240 20250 outbed_lo -1 +Merlin 21650 21660 outbed_lo -1 +Merlin 21810 21820 outbed_lo -1 +Merlin 22310 22320 outbed_hi 1 +Merlin 23630 23640 outbed_lo -1 +Merlin 23760 23770 outbed_hi 1 +Merlin 24580 24590 outbed_hi 1 +Merlin 24860 24870 outbed_hi 1 +Merlin 25030 25040 outbed_lo -1 +Merlin 25700 25710 outbed_hi 1 +Merlin 26260 26270 outbed_lo -1 +Merlin 26450 26460 outbed_hi 1 +Merlin 26560 26570 outbed_lo -1 +Merlin 26610 26620 outbed_lo -1 +Merlin 27220 27230 outbed_hi 1 +Merlin 27280 27290 outbed_hi 1 +Merlin 27670 27680 outbed_lo -1 +Merlin 27970 27980 outbed_lo -1 +Merlin 28500 28510 outbed_hi 1 +Merlin 29400 29410 outbed_lo -1 +Merlin 29740 29750 outbed_hi 1 +Merlin 29910 29920 outbed_lo -1 +Merlin 31540 31550 outbed_hi 1 +Merlin 31890 31900 outbed_hi 1 +Merlin 32970 32980 outbed_hi 1 +Merlin 33460 33470 outbed_lo -1 +Merlin 33910 33920 outbed_lo -1 +Merlin 33980 33990 outbed_hi 1 +Merlin 34470 34490 outbed_lo -1 +Merlin 35320 35330 outbed_lo -1 +Merlin 35440 35450 outbed_hi 1 +Merlin 35620 35630 outbed_lo -1 +Merlin 35880 35890 outbed_lo -1 +Merlin 36510 36520 outbed_hi 1 +Merlin 36810 36820 outbed_lo -1 +Merlin 37230 37240 outbed_lo -1 +Merlin 37560 37570 outbed_lo -1 +Merlin 37750 37760 outbed_lo -1 +Merlin 38330 38340 outbed_lo -1 +Merlin 39600 39610 outbed_lo -1 +Merlin 39690 39700 outbed_hi 1 +Merlin 40350 40360 outbed_hi 1 +Merlin 40550 40560 outbed_hi 1 +Merlin 40780 40790 outbed_hi 1 +Merlin 41390 41400 outbed_hi 1 +Merlin 41900 41910 outbed_lo -1 +Merlin 41940 41950 outbed_hi 1 +Merlin 42050 42060 outbed_lo -1 +Merlin 42470 42480 outbed_hi 1 +Merlin 42560 42570 outbed_hi 1 +Merlin 42730 42740 outbed_hi 1 +Merlin 43010 43020 outbed_hi 1 +Merlin 43910 43920 outbed_hi 1 +Merlin 44440 44450 outbed_lo -1 +Merlin 44670 44680 outbed_hi 1 +Merlin 46010 46020 outbed_hi 1 +Merlin 46470 46480 outbed_hi 1 +Merlin 46610 46620 outbed_lo -1 +Merlin 49290 49300 outbed_lo -1 +Merlin 49370 49380 outbed_hi 1 +Merlin 50310 50320 outbed_hi 1 +Merlin 50880 50890 outbed_lo -1 +Merlin 52230 52240 outbed_lo -1 +Merlin 52460 52470 outbed_lo -1 +Merlin 52740 52750 outbed_lo -1 +Merlin 52770 52780 outbed_lo -1 +Merlin 52980 52990 outbed_hi 1 +Merlin 53150 53160 outbed_lo -1 +Merlin 54630 54640 outbed_hi 1 +Merlin 54830 54840 outbed_lo -1 +Merlin 55430 55440 outbed_hi 1 +Merlin 56020 56030 outbed_lo -1 +Merlin 56450 56460 outbed_hi 1 +Merlin 56630 56640 outbed_hi 1 +Merlin 57220 57230 outbed_lo -1 +Merlin 57610 57620 outbed_hi 1 +Merlin 57790 57800 outbed_hi 1 +Merlin 58730 58740 outbed_hi 1 +Merlin 59360 59370 outbed_hi 1 +Merlin 59450 59460 outbed_hi 1 +Merlin 60210 60220 outbed_hi 1 +Merlin 61110 61120 outbed_hi 1 +Merlin 61250 61260 outbed_lo -1 +Merlin 62470 62480 outbed_hi 1 +Merlin 63190 63200 outbed_lo -1 +Merlin 63290 63300 outbed_lo -1 +Merlin 63330 63340 outbed_hi 1 +Merlin 63350 63360 outbed_hi 1 +Merlin 64660 64670 outbed_lo -1 +Merlin 65050 65060 outbed_lo -1 +Merlin 66210 66220 outbed_hi 1 +Merlin 66510 66520 outbed_hi 1 +Merlin 66660 66670 outbed_lo -1 +Merlin 66850 66860 outbed_hi 1 +Merlin 66900 66910 outbed_lo -1 +Merlin 67150 67160 outbed_lo -1 +Merlin 67690 67700 outbed_hi 1 +Merlin 68760 68780 outbed_hi 1 +Merlin 68790 68800 outbed_lo -1 +Merlin 69660 69670 outbed_lo -1 +Merlin 69850 69860 outbed_hi 1