bigwig_outlier_bed: bigwig_outlier

comparison bigwig_outlier_bed.xml @ 0:c71db540eb38 draft

planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty

author	fubar
date	Mon, 01 Jul 2024 02:48:46 +0000
parents
children	a7d26bca0a3b

comparison

equal deleted inserted replaced

--1:000000000000
+:c71db540eb38
+<tool name="bigwig_outlier_bed" id="bigwigoutlierbed" version="0.01" profile="22.05">
+<!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay-->
+<!--Created by toolfactory@galaxy.org at 30/06/2024 19:44:14 using the Galaxy Tool Factory.-->
+<description>Writes high and low bigwig regions as features in a bed file</description>
+<edam_topics>
+<edam_topic>topic_0157</edam_topic>
+<edam_topic>topic_0092</edam_topic>
+</edam_topics>
+<edam_operations>
+<edam_operation>operation_0337</edam_operation>
+</edam_operations>
+<requirements>
+<requirement type="package" version="3.12.3">python</requirement>
+<requirement type="package" version="2.0.0">numpy</requirement>
+<requirement type="package" version="0.1.4">pybigtools</requirement>
+</requirements>
+<version_command><![CDATA[python -c "import pybigtools; from importlib.metadata import version; print(version('pybigtools'))"]]></version_command>
+<command><![CDATA[python
+'$runme'
+--bigwig
+'$bigwig'
+--bedouthilo
+'$bedouthilo'
+--minwin
+'$minwin'
+--qhi
+'$qhi'
+--qlo
+'$qlo'
+#if $tableout == "set"
+--tableout
+#end if
+--bigwiglabels
+'$bigwiglabels']]></command>
+<configfiles>
+<configfile name="runme"><![CDATA[#raw
+"""
+Bigwigs are great, but hard to reliably "see" small low coverage or small very high coverage regions.
+Colouring in JB2 tracks will need a new plugin, so this code will find bigwig regions above and below a chosen percentile point.
+0.99 and 0.01 work well in testing with a minimum span of 10 bp.
+Multiple bigwigs **with the same reference** can be combined - bed segments will be named appropriately
+Combining multiple references works but is silly because display will rely on one reference so features mapped to other references will not appear.
+Tricksy numpy method from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html
+takes about 95 seconds for a 17MB test wiggle
+JBrowse2 bed normally displays ignore the score, so could provide separate low/high bed file outputs as an option.
+Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores
+Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi).
+"""
+import argparse
+import numpy as np
+import pybigtools
+import sys
+from pathlib import Path
+class findOut():
+def __init__(self, args):
+self.bwnames=args.bigwig
+self.bwlabels=args.bigwiglabels
+self.bedwin=args.minwin
+self.qlo=args.qlo
+self.qhi=args.qhi
+self.bedouthilo=args.bedouthilo
+self.bedouthi=args.bedouthi
+self.bedoutlo=args.bedoutlo
+self.tableout = args.tableout
+self.bedwin = args.minwin
+self.qhi = args.qhi
+self.qlo = args.qlo
+self.makeBed()
+def processVals(self, bw, isTop):
+# http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html
+if isTop:
+bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s
+else:
+bwex = np.r_[False, bw <= self.bwbot, False]
+bwexd = np.diff(bwex)
+bwexdnz = bwexd.nonzero()[0]
+bwregions = np.reshape(bwexdnz, (-1,2))
+return bwregions
+def writeBed(self, bed, bedfname):
+"""
+potentially multiple
+"""
+bed.sort()
+beds = ['%s\t%d\t%d\t%s\t%d' % x for x in bed]
+with open(bedfname, "w") as bedf:
+bedf.write('\n'.join(beds))
+bedf.write('\n')
+print('Wrote %d bed regions to %s' % (len(bed), bedfname))
+def makeBed(self):
+bedhi = []
+bedlo = []
+bwlabels = self.bwlabels
+bwnames = self.bwnames
+print('bwnames=', bwnames, "bwlabs=", bwlabels)
+for i, bwname in enumerate(bwnames):
+bwlabel = bwlabels[i].replace(" ",'')
+p = Path('in.bw')
+p.symlink_to( bwname ) # required by pybigtools (!)
+bwf = pybigtools.open('in.bw')
+chrlist = bwf.chroms()
+chrs = list(chrlist.keys())
+chrs.sort()
+restab = ["contig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"]
+for chr in chrs:
+bw = bwf.values(chr)
+bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered
+if self.qhi is not None:
+self.bwtop = np.quantile(bw, self.qhi)
+bwhi = self.processVals(bw, isTop=True)
+for i, seg in enumerate(bwhi):
+if seg[1] - seg[0] >= self.bedwin:
+bedhi.append((chr, seg[0], seg[1], '%s_hi' % (bwlabel), 1))
+if self.qlo is not None:
+self.bwbot = np.quantile(bw, self.qlo)
+bwlo = self.processVals(bw, isTop=False)
+for i, seg in enumerate(bwlo):
+if seg[1] - seg[0] >= self.bedwin:
+bedlo.append((chr, seg[0], seg[1], '%s_lo' % (bwlabel), -1))
+bwmean = np.mean(bw)
+bwstd = np.std(bw)
+bwmax = np.max(bw)
+nrow = np.size(bw)
+bwmin = np.min(bw)
+restab.append('%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f' % (chr,nrow,bwmean,bwstd,bwmin,bwmax,self.bwtop,self.bwbot))
+print('\n'.join(restab), '\n')
+if self.tableout:
+with open(self.tableout) as t:
+t.write('\n'.join(restab))
+t.write('\n')
+if self.bedoutlo:
+if self.qlo:
+self.writeBed(bedlo, self.bedoutlo)
+if self.bedouthi:
+if self.qhi:
+self.writeBed(bedhi, self.bedouthi)
+if self.bedouthilo:
+allbed = bedlo + bedhi
+self.writeBed(allbed, self.bedouthilo)
+return restab
+if __name__ == "__main__":
+parser = argparse.ArgumentParser()
+a = parser.add_argument
+a('-m', '--minwin',default=10, type=int)
+a('-l', '--qlo',default=None, type=float)
+a('-i', '--qhi',default=None, type=float)
+a('-w', '--bigwig', nargs='+')
+a('-n', '--bigwiglabels', nargs='+')
+a('-o', '--bedouthilo', default=None, help="optional high and low combined bed")
+a('-u', '--bedouthi', default=None, help="optional high only bed")
+a('-b', '--bedoutlo', default=None, help="optional low only bed")
+a('-t', '--tableout', default=None)
+args = parser.parse_args()
+print('args=', args)
+if not (args.bedouthilo or args.bedouthi or args.bedoutlo):
+sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need a bed output choice - must be one of low only, high only or both combined")
+sys.exit(2)
+if not (args.qlo or args.qhi):
+sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need one or both of quantile cutpoints qhi and qlo")
+sys.exit(2)
+restab = findOut(args)
+if args.tableout:
+with open(args.tableout, 'w') as tout:
+tout.write('\n'.join(restab))
+tout.write('\n')
+#end raw]]></configfile>
+</configfiles>
+<inputs>
+<param name="bigwig" type="data" optional="false" label="Bigwig file(s) to process. " help="If more than one, MUST all use the same reference sequence to be displayable. Feature names will include the bigwig label." format="bigwig" multiple="true"/>
+<param name="minwin" type="integer" value="10" label="Minimum continuous bases to count as a high or low bed feature" help="Actual run length will be found and used for continuous features as long or longer."/>
+<param name="qhi" type="float" value="0.99" label="Quantile cutoff for a high region - 0.99 will cut off at or above the 99th percentile" help=""/>
+<param name="qlo" type="float" value="0.01" label="Quantile cutoff for a low region - 0.01 will cut off at or below the 1st percentile." help=""/>
+<param name="tableout" type="select" label="Write a table showing contig statistics for each bigwig" help="" display="radio">
+<option value="notset">Do not set this flag</option>
+<option value="set">Set this flag</option>
+</param>
+<param name="bigwiglabels" type="text" value="outbed" label="Label to use in bed feature names to indicate source bigwig contents - such as coverage" help=""/>
+</inputs>
+<outputs>
+<data name="bedouthilo" format="bed" label="Both high and low contiguous regions as long or longer than window length into one bed " hidden="false"/>
+</outputs>
+<tests>
+<test>
+<output name="bedouthilo" value="bedouthilo_sample" compare="diff" lines_diff="0"/>
+<param name="bigwig" value="bigwig_sample"/>
+<param name="minwin" value="10"/>
+<param name="qhi" value="0.99"/>
+<param name="qlo" value="0.01"/>
+<param name="tableout" value="notset"/>
+<param name="bigwiglabels" value="outbed"/>
+</test>
+</tests>
+<help><![CDATA[
+**What it Does**
+Takes one or more bigwigs mapped to the same reference and finds all the minimum window sized or greater contiguous regions above or below an upper and lower quantile cutoff.
+A window size of 10 works well, and quantiles set at 0.01 and 0.99 will generally work well.
+]]></help>
+<citations>
+<citation type="doi">10.1093/bioinformatics/btae350</citation>
+</citations>
+</tool>

Mercurial > repos > fubar > bigwig_outlier_bed

comparison bigwig_outlier_bed.xml @ 0:c71db540eb38 draft