view bigwig_outlier_bed.xml @ 6:eb17eb8a3658 draft

planemo upload commit 1baff96e75def9248afdcf21edec9bdc7ed42b1f-dirty
author fubar
date Tue, 23 Jul 2024 23:12:23 +0000
parents 68cb8e7e266b
children c8e22efcaeda
line wrap: on
line source

<tool name="Bigwig extremes to bed features" id="bigwig_outlier_bed" version="@TOOL_VERSION@" profile="22.05">
  <description>Writes high and low bigwig runs as features in a bed file</description>
  <macros>
  <token name="@TOOL_VERSION@">0.2.0</token>
  <token name="@NUMPY_VERSION@">2.0.0</token>
  <token name="@PYTHON_VERSION@">3.12.3</token>
  </macros>
  <edam_topics>
      <edam_topic>topic_0157</edam_topic>
      <edam_topic>topic_0092</edam_topic>
  </edam_topics>
  <edam_operations>
      <edam_operation>operation_0337</edam_operation>
  </edam_operations>
  <xrefs>
    <xref type="bio.tools">bigtools</xref>
  </xrefs>
  <requirements>
    <requirement type="package" version="@PYTHON_VERSION@">python</requirement>
    <requirement type="package" version="@NUMPY_VERSION@">numpy</requirement>
    <requirement type="package" version="@TOOL_VERSION@">pybigtools</requirement>
  </requirements>
  <required_files>
      <include path="bigwig_outlier_bed.py"/>
  </required_files>
  <version_command><![CDATA[python -c "import pybigtools; from importlib.metadata import version; print(version('pybigtools'))"]]></version_command>
  <command><![CDATA[python '${__tool_directory__}/bigwig_outlier_bed.py'
--bigwig 
#for bw in $bigwig: 
 '$bw' 
#end for
--bigwiglabels
#for bw in $bigwig: 
  '$bw.name'
#end for
--outbeds '$outbeds'
#if $outbeds in ['outhilo', 'outall']:
  --bedouthilo '$bedouthilo'
#end if
#if $outbeds in ['outhi', 'outall', 'outlohi']:
  --bedouthi '$bedouthi'
#end if
#if $outbeds in ['outlo', 'outall', 'outlohi']:
  --bedoutlo '$bedoutlo'
#end if
--minwin '$minwin'
#if $qhi:
--qhi '$qhi'
#end if
#if $qlo:
--qlo '$qlo'
#end if
#if $tableout == "create" or $outbeds == "outtab":
 --tableoutfile '$tableoutfile'
#end if
]]></command>
  <inputs>
    <param name="bigwig" type="data" optional="false" label="Choose one or more bigwig file(s) to return outlier regions as a bed file" 
      help="If more than one, MUST all use the same reference sequence to be displayable. Feature names will include the bigwig label." format="bigwig" multiple="true"/>
    <param name="minwin" type="integer" value="10" label="Minimum continuous bases to count as a high or low bed feature" 
      help="Continuous features as long or longer than this window size will appear as bed features"/>
    <param name="qhi" type="float" value="0.99" label="Quantile cutoff for a high region - 0.99 will cut off at or above the 99th percentile" help="Required" optional="false"/>
    <param name="qlo" type="float" value="0.01" label="Quantile cutoff for a low region - 0.01 will cut off at or below the 1st percentile." help="Optional" optional="true"/>
    <param name="outbeds" type="select" label="Select the required bed file outputs" help="Any combination of the 3 different kinds of bed file output can be made">
      <option value="outhilo" selected="true">Make 1 bed output with both low and high regions</option>
      <option value="outhi">Make 1 bed output with high regions only</option>
      <option value="outlo">Make 1 bed output with low regions only</option>
      <option value="outall">Make 3 bed outputs with low and high together in one, high in one and low in the other</option>
      <option value="outlohi">Make 2 bed outputs with high in one and low in the other</option>
      <option value="outtab">NO bed outputs. Report bigwig value distribution only</option>
    </param>
    <param name="tableout" type="select" label="Write a table showing contig statistics for each bigwig input" help="">
      <option value="donotmake">Do not create this report</option>
      <option value="create" selected="true">Create this report</option>
    </param>
  </inputs>
  <outputs>
    <data name="bedouthilo" format="bed" label="High_and_low_bed" hidden="false">
      <filter>outbeds in ["outall", "outhilo"]</filter>
    </data>
    <data name="bedouthi" format="bed" label="High bed" hidden="false">
      <filter>outbeds in ["outall", "outlohi", "outhi"]</filter>
    </data>
    <data name="bedoutlo" format="bed" label="Low bed" hidden="false">
      <filter>outbeds in ["outall", "outlohi", "outlo"]</filter>
    </data>
    <data name="tableoutfile" format="tabular" label="Contig statistics" hidden="false">
      <filter>tableout == "create"</filter>
    </data>
  </outputs>
  <tests>
    <test expect_num_outputs="1">
      <output name="bedouthilo" value="bedouthilo_sample" compare="diff" lines_diff="0"/>
      <param name="outbeds" value="outhilo"/>
      <param name="bigwig" value="bigwig_sample"/>
      <param name="minwin" value="10"/>
      <param name="qhi" value="0.99"/>
      <param name="qlo" value="0.01"/>
      <param name="tableout" value="donotmake"/>
    </test>
    <test expect_num_outputs="1">
      <output name="tableoutfile" value="table_only_sample" compare="diff" lines_diff="0"/>
      <param name="outbeds" value="outtab"/>
      <param name="bigwig" value="bigwig_sample,1.bigwig"/>
      <param name="minwin" value="10"/>
      <param name="qhi" value="0.99"/>
      <param name="qlo" value="0.01"/>
      <param name="tableout" value="create"/>
    </test>
    <test expect_num_outputs="2">
      <output name="bedouthilo" value="bedouthilo_sample" compare="diff" lines_diff="0"/>
      <output name="tableoutfile" value="table_sample" compare="diff" lines_diff="0"/>
      <param name="outbeds" value="outhilo"/>
      <param name="bigwig" value="bigwig_sample"/>
      <param name="minwin" value="10"/>
      <param name="qhi" value="0.99"/>
      <param name="qlo" value="0.01"/>
      <param name="tableout" value="create"/>
    </test>
    <test expect_num_outputs="2">
      <output name="bedouthi" value="bedouthi_qlo_notset_sample" compare="diff" lines_diff="0"/>
      <output name="tableoutfile" value="table_qlo_notset_sample" compare="diff" lines_diff="0"/>
      <param name="outbeds" value="outhi"/>
      <param name="bigwig" value="bigwig_sample"/>
      <param name="minwin" value="10"/>
      <param name="qhi" value="0.99"/>
      <param name="qlo" value=""/>
      <param name="tableout" value="create"/>
    </test>
    <test expect_num_outputs="3">
      <output name="bedouthi" value="bedouthi_sample" compare="diff" lines_diff="0"/>
      <output name="bedoutlo" value="bedoutlo_sample" compare="diff" lines_diff="0"/>
      <output name="tableoutfile" value="table3_sample" compare="diff" lines_diff="0"/>
      <param name="outbeds" value="outlohi"/>
      <param name="bigwig" value="bigwig_sample"/>
      <param name="minwin" value="1"/>
      <param name="qhi" value="0.9"/>
      <param name="qlo" value="0.1"/>
      <param name="tableout" value="create"/>
    </test>
    <test expect_num_outputs="4">
      <output name="bedouthilo" value="bedouthilo2_sample" compare="diff" lines_diff="0"/>
      <output name="bedoutlo" value="bedoutlo2_sample" compare="diff" lines_diff="0"/>
      <output name="bedouthi" value="bedouthi2_sample" compare="diff" lines_diff="0"/>
      <output name="tableoutfile" value="table2_sample" compare="diff" lines_diff="0"/>
      <param name="outbeds" value="outall"/>
      <param name="bigwig" value="bigwig_sample,1.bigwig"/>
      <param name="minwin" value="1"/>
      <param name="qhi" value="0.9"/>
      <param name="qlo" value="0.1"/>
      <param name="tableout" value="create"/>
    </test>
  </tests>
  <help><![CDATA[

 **Purpose**
 
 *Combine bigwig outlier regions into bed files*
 
 Bigwigs allow quantative tracks to be viewed in an interactive genome browser like JBrowse2. 
 Peaks are easy to see. Unusually low regions can be harder to spot, even if they are relatively large, unless the view is zoomed right in.
 Automated methods for combining evidence from multiple bigwigs can be useful for constructing browseable *issues* or other kinds of summary bed format tracks.
 For example, combining coverage outlier regions, with the frequency of specific dicnucleotide short tandem repeats,
 for evaluating technical sequencing technology effects in the evaluation of a genome assembly described at https://github.com/arangrhie/T2T-Polish

 **What does it produce?**

 Bed format results are output, containing each continuous segment of at least *minwin* base pairs above a cut point, or below another cut point. 
 These can be viewed as features on the reference genome using a genome browser tool like JBrowse2.
 Three kinds of bed files can be created depending on the values included.  
 Both high and low regions in one bed output is the default. This can be displayed in JBrowse2 with colour indicating the high or low status, 
 one less track and a little easier to understand. High and low features can be output as separate bed files.

 **How is it controlled?**

 The cut points are calculated using a user supplied quantile, from each chromosome's bigwig value distribution. 
 The defaults are 0.99 and 0.01 and the default *minwin* is 10.  
 The probability of 10 values at or below the 1st percentile purely by chance is about 0.01**10, so false positives should be
 rare, even in a 3GB genome.  
 This data driven and non-parametric method is preferred for the asymmetrical distributions found in typical bigwigs, such as depth of coverage 
 for genome sequencing reads. Coverage values are truncated at zero, and regions with very high values often form a long sparse right tail. 

 **How do I choose the input data?**

 One or more bigwigs and can be selected as inputs.  
 Multiple bigwigs will be combined in bed files, so must share the reference genome to display
 using JBrowse2.
 
 .. class:: warningmark

 **Lower quantile may not behave as expected in bigwigs with large fractions of zero values**

 The lower cut point may be problematic for integer values like coverage if many values are zero. For example, if 5% of bases have zero coverage, the 1st percentile is also zero, 
 but that cut point will include the entire 5% *at or below 0*

 
  ]]></help>
  <citations>
    <citation type="doi">10.1093/bioinformatics/btae350</citation>
  </citations>
</tool>