Mercurial > repos > iuc > bedtools

<tool id="bedtools_closestbed" name="bedtools ClosestBed" version="@TOOL_VERSION@" profile="@PROFILE@">
    <description>find the closest, potentially non-overlapping interval</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools" />
    <expand macro="requirements" />
    <expand macro="stdio" />
    <command><![CDATA[
closestBed
$strand
$addition
#if $addition2.addition2_select:
    -D $addition2.addition2_select
    $addition2.iu
    $addition2.id
#end if
$io
-mdb $mdb
-t $ties
#if str($k):
    -k $k
#end if
-a '$inputA'
#if str($overlap_with.source) == "data_table":
    -b '$overlap_with.table.fields.path'
#else
    -b
    #for $file in $overlap_with.inputB:
        '$file'
    #end for
#end if
> '$output'
    ]]></command>
    <inputs>
        <param name="inputA" argument="-a" type="data" format="@STD_BEDTOOLS_INPUTS@" label="@STD_BEDTOOLS_INPUT_LABEL@ file"/>
        <!-- overlap with file (inputB) -->
        <conditional name="overlap_with">
            <param name="source" type="select" label="Overlap with: will you select a @STD_BEDTOOLS_INPUT_LABEL@ file from your history or use a built-in GFF file?" help="Built-in GFF files contain full annotation of a reference genome">
                <option value="history" selected="true">Use a @STD_BEDTOOLS_INPUT_LABEL@ file from the history</option>
                <option value="data_table">Use a built-in GFF file</option>
            </param>
            <when value="data_table">
                <param name="table" type="select" label="Select reference GFF" help="If your genome of interest is not listed, contact the Galaxy team">
                    <options from_data_table="all_gff">
                        <filter type="sort_by" column="2"/>
                        <validator type="no_options" message="No files are available"/>
                    </options>
                </param>
            </when>
            <when value="history">
                <param name="inputB" argument="-b" type="data" format="@STD_BEDTOOLS_INPUTS@" multiple="true" label="Select a @STD_BEDTOOLS_INPUT_LABEL@ file" />
            </when>
        </conditional>

        <param name="ties" type="select"
            label="How ties for closest feature should be handled"
            help="This occurs when two features in B have exactly the same overlap with a feature in A.">
            <option value="all" selected="true">all - Report all ties (default)</option>
            <option value="first">first - Report the first tie that occurred in the B file</option>
            <option value="last">last - Report the last tie that occurred in the B file</option>
        </param>

        <expand macro="strand2" />

        <param name="addition" argument="-d" type="boolean" truevalue="-d" falsevalue="" checked="false"
            label="In addition to the closest feature in B, report its distance to A as an extra column"
            help="The reported distance for overlapping features will be 0" />

        <conditional name="addition2">
            <param name="addition2_select" argument="-D" type="select"
                label="Add additional columns to report distance to upstream feature. Distance defintion"
                help="Like -d, report the closest feature in B, and its distance to A as an extra column. However unlike -d, use negative distances to report upstream features">
                <option value="" selected="true">Do not report the distance et all.</option>
                <option value="ref">Report distance with respect to the reference genome. B features with a lower (start, stop) are upstream. (-ref)</option>
                <option value="a">Report distance with respect to A. When A is on the - strand, "upstream" means B has a higher (start,stop). (-a)</option>
                <option value="b">Report distance with respect to B. When B is on the - strand, "upstream" means A has a higher (start,stop). (-b)</option>
            </param>
            <when value="" />
            <when value="ref">
                <expand macro="closest_D_option" />
            </when>
            <when value="a">
                <expand macro="closest_D_option" />
            </when>
            <when value="b">
                <expand macro="closest_D_option" />
            </when>
        </conditional>

        <param argument="-k" type="integer" min="1" value="1" optional="true"
            label="Report the k closest hits" />

        <param argument="-io" type="boolean" truevalue="-io" falsevalue="" checked="false"
            label="Ignore features in B that overlap A"
            help="That is, we want close, yet not touching features only" />

        <param argument="-mdb" type="select"
            label="How multiple databases are resolved">
            <option value="each" selected="true">Report closest records for each database. (-each)</option>
            <option value="all">Report closest records among all databases. (-all)</option>
        </param>
    </inputs>
    <outputs>
        <!-- Would like to use a nicer name, but since there are possibly many inputB datasets, falling back to ${on_string} -->
        <!-- <data name="output" format_source="inputA" metadata_source="inputA" label="Closest regions from ${inputB[0].name} to ${inputA.name}"/> -->
        <data name="output" format_source="inputA" metadata_source="inputA" label="Closest regions from ${on_string}" />
    </outputs>
    <tests>
        <test>
            <param name="inputA" value="closestBedA.bed" ftype="bed" />
            <param name="source" value="history" />
            <param name="inputB" value="closestBedB.bed" ftype="bed" />
            <output name="output" file="closestBed_result1.bed" ftype="bed" />
        </test>
        <test>
            <param name="inputA" value="closestBed_a.bed" ftype="bed" />
            <param name="source" value="history" />
            <param name="inputB" value="closestBed_b1.bed,closestBed_b2.bed" ftype="bed" />
            <param name="addition" value="true" />
            <output name="output" file="closestBed_result2.bed" ftype="bed" />
        </test>
        <test>
            <param name="inputA" value="closestBed_a.bed" ftype="bed" />
            <param name="source" value="history" />
            <param name="inputB" value="closestBed_b1.bed,closestBed_b2.bed" ftype="bed" />
            <param name="addition" value="True" />
            <param name="mdb" value="all" />
            <output name="output" file="closestBed_result3.bed" ftype="bed" />
        </test>
        <test>
            <param name="inputA" value="closestBed_c.bed" ftype="bed" />
            <param name="source" value="history" />
            <param name="inputB" value="closestBed_d.bed" ftype="bed" />
            <param name="addition2_select" value="ref" />
            <output name="output" file="closestBed_result4.bed" ftype="bed" />
        </test>
        <test>
            <param name="inputA" value="closestBed_c.bed" ftype="bed" />
            <param name="source" value="history" />
            <param name="inputB" value="closestBed_d.bed" ftype="bed" />
            <param name="addition2_select" value="a" />
            <output name="output" file="closestBed_result5.bed" ftype="bed" />
        </test>
        <test>
            <param name="inputA" value="closestBedA.bed" ftype="bed" />
            <param name="source" value="history" />
            <param name="inputB" value="a.bed" ftype="bed" />
            <param name="k" value="3" />
            <output name="output" file="closestBed_result6.bed" ftype="bed" />
        </test>
        <test>
            <param name="inputA" value="closestBedA.bed" ftype="bed" />
            <param name="source" value="data_table" />
            <param name="inputB" value="testid" />
            <param name="k" value="3" />
            <output name="output" file="closestBed_result6.bed" ftype="bed" />
        </test>
    </tests>
    <help><![CDATA[
**What it does**

Similar to intersectBed, closestBed searches for overlapping features in A and B. In the event that no feature in B overlaps the current feature in A, closestBed will report the closest (that is, least genomic distance from the start or end of A) feature in B. For example, one might want to find which is the closest gene to a significant GWAS polymorphism. Note that closestBed will report an overlapping feature as the closest—that is, it does not restrict to closest non-overlapping feature.

.. image:: $PATH_TO_IMAGES/closest-glyph.png

@REFERENCES@
    ]]></help>
    <expand macro="citations" />
</tool>
author	iuc
date	Sat, 18 May 2024 23:28:38 +0000
parents	07e8b80f278c
children