view coverageBed.xml @ 39:3e38c9b3214f draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bedtools commit 1e25e8d0bd1ebeb2b94c4bbdff222e56defc1fc2"
author iuc
date Wed, 01 Sep 2021 11:04:15 +0000
parents ce3c7f062223
children 7ab85ac5f64b
line wrap: on
line source

<tool id="bedtools_coveragebed" name="bedtools Compute both the depth and breadth of coverage" version="@TOOL_VERSION@" profile="@PROFILE@">
    <description>of features in file B on the features in file A (bedtools coverage)</description>
    <expand macro="bio_tools" />
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements">
        <requirement type="package" version="@SAMTOOLS_VERSION@">samtools</requirement>
    </expand>
    <expand macro="stdio" />
    <command><![CDATA[
bedtools coverage
$d
$hist
$split
$strandedness
#if str($overlap_a):
    -f $overlap_a
#end if
#if str($overlap_b):
    -F $overlap_b
#end if
$reciprocal_overlap
$a_or_b
-a '$inputA'
#if str($reduce_or_iterate.reduce_or_iterate_selector) == 'iterate':
    -b '$reduce_or_iterate.inputB'
#else:
    -b
    #for $file in $reduce_or_iterate.inputB
        '$file'
    #end for
#end if
@SORTED@
#if $inputA.is_of_type('gff'):
    | sort -k1,1 -k4,2n
#else:
    | sort -k1,1 -k2,2n
#end if
> '$output'
    ]]></command>
    <inputs>
        <param name="inputA" argument="-a" type="data" format="bam,@STD_BEDTOOLS_INPUTS@" label="File A (on which coverage is calculated)" help="BAM/@STD_BEDTOOLS_INPUT_LABEL@ format" />
        <conditional name="reduce_or_iterate">
            <param name="reduce_or_iterate_selector" type="select" label="Combined or separate output files">
                <option value="iterate" selected="true">One output file per 'input B' file</option>
                <option value="reduce">Single output containing results for all 'input B' files</option>
            </param>
            <when value="iterate">
                <param name="inputB" argument="-b" type="data" format="bam,@STD_BEDTOOLS_INPUTS@"
                       label="File B (for which coverage is calculated)" help="BAM/@STD_BEDTOOLS_INPUT_LABEL@ format"/>
            </when>
            <when value="reduce">
                <param name="inputB" argument="-b" type="data" format="bam,@STD_BEDTOOLS_INPUTS@" multiple="true"
                       label="File(s) B (for which coverage is calculated)" help="BAM/@STD_BEDTOOLS_INPUT_LABEL@ format"/>
            </when>
        </conditional>
        <expand macro="split" />
        <param name="strandedness" argument="-s" type="boolean" label="Force strandedness" truevalue="-s" falsevalue="" checked="false"
            help="Only report hits in B that overlap A on the same strand. By default, overlaps are reported without respect to strand"/>
        <param argument="-d" type="boolean" truevalue="-d" falsevalue="" checked="false"
            label="Report the depth at each position in each A feature"
            help="Positions reported are one based. Each position and depth follow the complete B feature" />
        <param argument="-hist" type="boolean" truevalue="-hist" falsevalue="" checked="false"
            label="Report a histogram of coverage for each feature in A as well as a summary histogram for all features in A"
            help="Additional columns after each feature in A: 1) depth 2) # bases at depth 3) size of A 4) % of A at depth" />
        <expand macro="overlap" name="overlap_a" />
        <expand macro="overlap" name="overlap_b" argument="-F" fracof="B" />
        <param name="reciprocal_overlap" argument="-r" type="boolean" truevalue="-r" falsevalue="" checked="false"
            label="Require that the fraction overlap be reciprocal for A AND B."
            help="if -f is 0.90 and -r is used, this requires that B overlap 90% of A and A _also_ overlaps 90% of B" />
        <param name="a_or_b" argument="-e" type="boolean" truevalue="-e" falsevalue="" checked="false"
            label="Require that the minimum fraction be satisfied for A OR B."
            help="If -e is used with -f 0.90 and -F 0.10 this requires that either 90% of A is covered OR 10% of  B is covered. Without -e, both fractions would have to be satisfied" />
        <!-- -sorted -g  -->
        <expand macro="sorted" />
    </inputs>
    <outputs>
        <data name="output" format="bed" metadata_source="inputA" label="Count of overlaps on ${inputA.name}"/>
    </outputs>
    <tests>
        <test>
            <param name="inputA" value="coverageBedA.bed" ftype="bed" />
            <param name="inputB" value="coverageBedB.bed" ftype="bed" />
            <output name="output" file="coverageBed_result1.bed" ftype="bed" />
        </test>
        <test>
            <param name="inputA" value="multiCov1.bed" ftype="bed" />
            <param name="inputB" value="srma_in3.bam" ftype="bam" />
            <param name="sorted" value="true"/>
            <output name="output" file="multicov1_by_srma_in3.cov.bed" ftype="bed" />
        </test>
        <test>
            <param name="inputA" value="multiCov1.bed" ftype="bed" />
            <param name="reduce_or_iterate_selector" value="reduce" />
            <param name="inputB" value="srma_in3.bam" ftype="bam" />
            <param name="sorted" value="true"/>
            <output name="output" file="multicov1_by_srma_in3.cov.bed" ftype="bed" />
        </test>
        <test>
            <param name="inputA" value="coverageBedA.bed" ftype="bed" />
            <param name="inputB" value="coverageBedB.bed" ftype="bed" />
            <param name="overlap_b" value="1"  />
            <output name="output" file="coverageBed_result2_F1.bed" ftype="bed" />
        </test>
        <test>
            <param name="inputA" value="coverageBedA.bed" ftype="bed" />
            <param name="inputB" value="coverageBedB.bed" ftype="bed" />
            <param name="overlap_a" value="1E-5"  />
            <param name="reciprocal_overlap" value="true"  />
            <output name="output" file="coverageBed_result3_f1r.bed" ftype="bed" />
        </test>
        <test>
            <param name="inputA" value="multiCov1.bed" ftype="bed" />
            <param name="reduce_or_iterate_selector" value="reduce" />
            <param name="inputB" value="srma_in3.bam,coverageBed.bam" ftype="bam" />
            <param name="sorted" value="true"/>
            <output name="output" file="coverageBed_result4_2bam.bed" ftype="bed" />
        </test>
        <test>
            <param name="inputA" value="coverageBedA2.bed" ftype="bed" />
            <param name="inputB" value="coverageBed.bam" ftype="bam" />
            <output name="output" file="coverageBed_result5_unsorted.bed" ftype="bed" />
        </test>
    </tests>
    <help><![CDATA[
**What it does**

`bedtools coverage`_ computes both the *depth* and *breadth* of coverage of features in
file B on the features in file A. For example, ``bedtools coverage`` can compute the coverage of sequence alignments
(file B) across 1 kilobase (arbitrary) windows (file A) tiling a genome of interest.
One advantage that ``bedtools coverage`` offers is that it not only *counts* the number of features that
overlap an interval in file A, it also computes the fraction of bases in the interval in A that were overlapped by one or more features.
Thus, ``bedtools coverage`` also computes the *breadth* of coverage for each interval in A.

.. _bedtools coverage: http://bedtools.readthedocs.org/en/latest/content/tools/coverage.html

.. class:: infomark

The lines in the output will be comprised of each interval in A, followed by:

1. The number of features in B that overlapped (by at least one base pair) the A interval.
2. The number of bases in A that had non-zero coverage from features in B.
3. The length of the entry in A.
4. The fraction of bases in A that had non-zero coverage from features in B.

@REFERENCES@
    ]]></help>
    <expand macro="citations" />
</tool>