Mercurial > repos > fubar > bigwig_outlier_bed
diff bigwig_outlier_bed.xml @ 0:c71db540eb38 draft
planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
author | fubar |
---|---|
date | Mon, 01 Jul 2024 02:48:46 +0000 |
parents | |
children | a7d26bca0a3b |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bigwig_outlier_bed.xml Mon Jul 01 02:48:46 2024 +0000 @@ -0,0 +1,212 @@ +<tool name="bigwig_outlier_bed" id="bigwigoutlierbed" version="0.01" profile="22.05"> + <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay--> + <!--Created by toolfactory@galaxy.org at 30/06/2024 19:44:14 using the Galaxy Tool Factory.--> + <description>Writes high and low bigwig regions as features in a bed file</description> + <edam_topics> + <edam_topic>topic_0157</edam_topic> + <edam_topic>topic_0092</edam_topic> + </edam_topics> + <edam_operations> + <edam_operation>operation_0337</edam_operation> + </edam_operations> + <requirements> + <requirement type="package" version="3.12.3">python</requirement> + <requirement type="package" version="2.0.0">numpy</requirement> + <requirement type="package" version="0.1.4">pybigtools</requirement> + </requirements> + <version_command><![CDATA[python -c "import pybigtools; from importlib.metadata import version; print(version('pybigtools'))"]]></version_command> + <command><![CDATA[python +'$runme' +--bigwig +'$bigwig' +--bedouthilo +'$bedouthilo' +--minwin +'$minwin' +--qhi +'$qhi' +--qlo +'$qlo' +#if $tableout == "set" + --tableout +#end if +--bigwiglabels +'$bigwiglabels']]></command> + <configfiles> + <configfile name="runme"><![CDATA[#raw +""" +Bigwigs are great, but hard to reliably "see" small low coverage or small very high coverage regions. +Colouring in JB2 tracks will need a new plugin, so this code will find bigwig regions above and below a chosen percentile point. +0.99 and 0.01 work well in testing with a minimum span of 10 bp. +Multiple bigwigs **with the same reference** can be combined - bed segments will be named appropriately +Combining multiple references works but is silly because display will rely on one reference so features mapped to other references will not appear. + +Tricksy numpy method from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html +takes about 95 seconds for a 17MB test wiggle +JBrowse2 bed normally displays ignore the score, so could provide separate low/high bed file outputs as an option. +Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores +Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi). +""" + +import argparse +import numpy as np +import pybigtools +import sys +from pathlib import Path + + +class findOut(): + def __init__(self, args): + self.bwnames=args.bigwig + self.bwlabels=args.bigwiglabels + self.bedwin=args.minwin + self.qlo=args.qlo + self.qhi=args.qhi + self.bedouthilo=args.bedouthilo + self.bedouthi=args.bedouthi + self.bedoutlo=args.bedoutlo + self.tableout = args.tableout + self.bedwin = args.minwin + self.qhi = args.qhi + self.qlo = args.qlo + self.makeBed() + + def processVals(self, bw, isTop): + # http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html + if isTop: + bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s + else: + bwex = np.r_[False, bw <= self.bwbot, False] + bwexd = np.diff(bwex) + bwexdnz = bwexd.nonzero()[0] + bwregions = np.reshape(bwexdnz, (-1,2)) + return bwregions + + def writeBed(self, bed, bedfname): + """ + potentially multiple + """ + bed.sort() + beds = ['%s\t%d\t%d\t%s\t%d' % x for x in bed] + with open(bedfname, "w") as bedf: + bedf.write('\n'.join(beds)) + bedf.write('\n') + print('Wrote %d bed regions to %s' % (len(bed), bedfname)) + + def makeBed(self): + bedhi = [] + bedlo = [] + bwlabels = self.bwlabels + bwnames = self.bwnames + print('bwnames=', bwnames, "bwlabs=", bwlabels) + for i, bwname in enumerate(bwnames): + bwlabel = bwlabels[i].replace(" ",'') + p = Path('in.bw') + p.symlink_to( bwname ) # required by pybigtools (!) + bwf = pybigtools.open('in.bw') + chrlist = bwf.chroms() + chrs = list(chrlist.keys()) + chrs.sort() + restab = ["contig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"] + for chr in chrs: + bw = bwf.values(chr) + bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered + if self.qhi is not None: + self.bwtop = np.quantile(bw, self.qhi) + bwhi = self.processVals(bw, isTop=True) + for i, seg in enumerate(bwhi): + if seg[1] - seg[0] >= self.bedwin: + bedhi.append((chr, seg[0], seg[1], '%s_hi' % (bwlabel), 1)) + if self.qlo is not None: + self.bwbot = np.quantile(bw, self.qlo) + bwlo = self.processVals(bw, isTop=False) + for i, seg in enumerate(bwlo): + if seg[1] - seg[0] >= self.bedwin: + bedlo.append((chr, seg[0], seg[1], '%s_lo' % (bwlabel), -1)) + bwmean = np.mean(bw) + bwstd = np.std(bw) + bwmax = np.max(bw) + nrow = np.size(bw) + bwmin = np.min(bw) + restab.append('%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f' % (chr,nrow,bwmean,bwstd,bwmin,bwmax,self.bwtop,self.bwbot)) + print('\n'.join(restab), '\n') + if self.tableout: + with open(self.tableout) as t: + t.write('\n'.join(restab)) + t.write('\n') + if self.bedoutlo: + if self.qlo: + self.writeBed(bedlo, self.bedoutlo) + if self.bedouthi: + if self.qhi: + self.writeBed(bedhi, self.bedouthi) + if self.bedouthilo: + allbed = bedlo + bedhi + self.writeBed(allbed, self.bedouthilo) + return restab + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + a = parser.add_argument + a('-m', '--minwin',default=10, type=int) + a('-l', '--qlo',default=None, type=float) + a('-i', '--qhi',default=None, type=float) + a('-w', '--bigwig', nargs='+') + a('-n', '--bigwiglabels', nargs='+') + a('-o', '--bedouthilo', default=None, help="optional high and low combined bed") + a('-u', '--bedouthi', default=None, help="optional high only bed") + a('-b', '--bedoutlo', default=None, help="optional low only bed") + a('-t', '--tableout', default=None) + args = parser.parse_args() + print('args=', args) + if not (args.bedouthilo or args.bedouthi or args.bedoutlo): + sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need a bed output choice - must be one of low only, high only or both combined") + sys.exit(2) + if not (args.qlo or args.qhi): + sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need one or both of quantile cutpoints qhi and qlo") + sys.exit(2) + restab = findOut(args) + if args.tableout: + with open(args.tableout, 'w') as tout: + tout.write('\n'.join(restab)) + tout.write('\n') +#end raw]]></configfile> + </configfiles> + <inputs> + <param name="bigwig" type="data" optional="false" label="Bigwig file(s) to process. " help="If more than one, MUST all use the same reference sequence to be displayable. Feature names will include the bigwig label." format="bigwig" multiple="true"/> + <param name="minwin" type="integer" value="10" label="Minimum continuous bases to count as a high or low bed feature" help="Actual run length will be found and used for continuous features as long or longer."/> + <param name="qhi" type="float" value="0.99" label="Quantile cutoff for a high region - 0.99 will cut off at or above the 99th percentile" help=""/> + <param name="qlo" type="float" value="0.01" label="Quantile cutoff for a low region - 0.01 will cut off at or below the 1st percentile." help=""/> + <param name="tableout" type="select" label="Write a table showing contig statistics for each bigwig" help="" display="radio"> + <option value="notset">Do not set this flag</option> + <option value="set">Set this flag</option> + </param> + <param name="bigwiglabels" type="text" value="outbed" label="Label to use in bed feature names to indicate source bigwig contents - such as coverage" help=""/> + </inputs> + <outputs> + <data name="bedouthilo" format="bed" label="Both high and low contiguous regions as long or longer than window length into one bed " hidden="false"/> + </outputs> + <tests> + <test> + <output name="bedouthilo" value="bedouthilo_sample" compare="diff" lines_diff="0"/> + <param name="bigwig" value="bigwig_sample"/> + <param name="minwin" value="10"/> + <param name="qhi" value="0.99"/> + <param name="qlo" value="0.01"/> + <param name="tableout" value="notset"/> + <param name="bigwiglabels" value="outbed"/> + </test> + </tests> + <help><![CDATA[ + **What it Does** + + Takes one or more bigwigs mapped to the same reference and finds all the minimum window sized or greater contiguous regions above or below an upper and lower quantile cutoff. + A window size of 10 works well, and quantiles set at 0.01 and 0.99 will generally work well. + + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btae350</citation> + </citations> +</tool> +