view unionBedGraphs.xml @ 4:607c0576c6ab draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bedtools commit 6692e3a4fa1bf6e9a407735afdbb2454ed32b316
author iuc
date Wed, 27 Jan 2016 15:15:59 -0500
parents 82aac94b06c3
children c78cf6fe3018
line wrap: on
line source

<tool id="bedtools_unionbedgraph" name="Merge BedGraph files" version="@WRAPPER_VERSION@.0">
    <description>combines coverage intervals from multiple BEDGRAPH files</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <expand macro="stdio" />
    <command>
<![CDATA[
    unionBedGraphs
        $header
        -filler "${filler}"
        #if $zero.value == True:
            -empty
            -g $genome
        #end if

        #if str($tag.tag_select) == "tag":
            #set files = '" "'.join( [ str( $file ) for $file in $tag.inputs ] )
            -i "${files}"
        #else:
            -i
            #for $bg in $tag.bedgraphs:
                "${bg.input}"
            #end for
            -names
            #for $bg in $tag.bedgraphs:
                "${bg.custom_name}"
            #end for
        #end if
        > "${output}"
]]>
    </command>
    <inputs>
        <conditional name="tag">
            <param name="tag_select" type="select" label="Sample name">
                <option value="tag" selected="true">Use input's tag</option>
                <option value="custom">Enter custom name per file</option>
            </param>
            <when value="tag">
                <param name="inputs" format="bedgraph" type="data" multiple="True" label="BedGraph files" />
            </when>
            <when value="custom">
                <repeat name="bedgraphs" title="Add BedGraph files" min="2" >
                    <param name="input" format="bedgraph" type="data" multiple="True" label="BedGraph file" />
                    <param name="custom_name" type="text" area="false" label="Custom sample name"/>
                </repeat>
            </when>
        </conditional>
        <expand macro="genome" />
        <param name="zero" type="boolean" checked="true"
            label="Report regions with zero coverage"
            help="If set, regions without any coverage will also be reported. Requires a valid organism key for all input datasets" />
        <param name="filler" type="text" value="N/A"
            label="Text to use for no-coverage value"
            help="Can be 0.0, N/A, - or any other value. (-filler)" />
        <expand macro="print_header" />
    </inputs>
    <outputs>
        <data name="output" format="bedgraph" />
    </outputs>
    <tests>
        <test>
            <param name="tag_select" value="tag"/>
            <param name="inputs" value="unionBedGraphs1.bg,unionBedGraphs2.bg,unionBedGraphs3.bg" ftype="bedgraph" />
            <param name="zero" value="False"/>
            <output name="output" file="unionBedGraphs_result1.bg" ftype="bedgraph" />
        </test>
        <test>
            <param name="tag_select" value="tag"/>
            <param name="inputs" value="unionBedGraphs1.bg,unionBedGraphs2.bg,unionBedGraphs3.bg" ftype="bedgraph" />
            <param name="header" value="True"/>
            <param name="zero" value="False"/>
            <output name="output" file="unionBedGraphs_result2.bg" ftype="bedgraph" />
        </test>
        <test>
            <param name="tag_select" value="tag"/>
            <param name="inputs" value="unionBedGraphs1.bg,unionBedGraphs2.bg,unionBedGraphs3.bg" ftype="bedgraph" />
            <param name="zero" value="True"/>
            <param name="genome" value="unionBedGraphs1.len"/>
            <output name="output" file="unionBedGraphs_result3.bg" ftype="bedgraph" />
        </test>
        <test>
            <param name="tag_select" value="custom"/>
            <repeat name="bedgraphs">
                <param name="input" value="unionBedGraphs1.bg" />
                <param name="custom_name" value="first" />
            </repeat>
            <repeat name="bedgraphs">
                <param name="input" value="unionBedGraphs2.bg" />
                <param name="custom_name" value="second" />
            </repeat>
            <repeat name="bedgraphs">
                <param name="input" value="unionBedGraphs3.bg" />
                <param name="custom_name" value="third" />
            </repeat>
            <param name="zero" value="True"/>
            <param name="genome" value="unionBedGraphs1.len"/>
            <output name="output" file="unionBedGraphs_result4.bg" ftype="bedgraph" />
        </test>
    </tests>
    <help>
<![CDATA[
**What it does**

This tool merges multiple BedGraph files, allowing direct and fine-scale coverage comparisons among many samples/files. The BedGraph files need not represent the same intervals; the tool will identify both common and file-specific intervals. In addition, the BedGraph values need not be numeric: one can use any text as the BedGraph value and the tool will compare the values from multiple files.

.. image:: http://people.virginia.edu/~arq5x/files/bedtools-galaxy/ubg.png


.. class:: warningmark

This tool requires that each BedGraph file is reference-sorted (chrom, then start) and contains non-overlapping intervals (within a given file).


------

**Example input**::

    # 1.bedgraph
    chr1  1000    1500    10
    chr1  2000    2100    20

    # 2.bedgraph
    chr1  900       1600    60
    chr1  1700    2050    50

    # 3.bedgraph
    chr1  1980    2070    80
    chr1  2090    2100    20


------

**Examples using the Zero Coverage checkbox**

Output example (*without* checking "Report regions with zero coverage")::
    
    chr1     900    1000     0    60     0
    chr1    1000    1500    10    60     0
    chr1    1500    1600     0    60     0
    chr1    1700    1980     0    50     0
    chr1    1980    2000     0    50    80
    chr1    2000    2050    20    50    80
    chr1    2050    2070    20     0    80
    chr1    2070    2090    20     0     0
    chr1    2090    2100    20     0    20


Output example (*with* checking "Report regions with zero coverage"). The lines marked with (*) are not covered in any input file, but are still reported (The asterisk marking does not appear in the file).::
    
    chr1       0          900     0     0     0 (*)
    chr1     900         1000     0    60     0
    chr1    1000         1500    10    60     0
    chr1    1500         1600     0    60     0
    chr1    1600         1700     0     0     0 (*)
    chr1    1700         1980     0    50     0
    chr1    1980         2000     0    50    80
    chr1    2000         2050    20    50    80
    chr1    2050         2070    20     0    80
    chr1    2070         2090    20     0     0
    chr1    2090         2100    20     0    20
    chr1    2100    247249719     0     0     0 (*) 


------

**Examples adjusting the "Filler value" for no-covered intervals**

The default value is '0', but you can use any other value.

Output example with **filler = N/A**::
    
    chr1     900    1000    N/A     60    N/A
    chr1    1000    1500     10     60    N/A
    chr1    1500    1600    N/A     60    N/A
    chr1    1600    1700    N/A    N/A    N/A
    chr1    1700    1980    N/A     50    N/A
    chr1    1980    2000    N/A     50     80
    chr1    2000    2050     20     50     80
    chr1    2050    2070     20    N/A     80
    chr1    2070    2090     20    N/A    N/A
    chr1    2090    2100     20    N/A     20


------

**Examples using the "sample name" labels**::

    chrom   start   end     WT-1    WT-2    KO-1
    chr1    900     1000    N/A     60      N/A
    chr1    1000    1500     10     60      N/A
    chr1    1500    1600    N/A     60      N/A
    chr1    1600    1700    N/A    N/A      N/A
    chr1    1700    1980    N/A     50      N/A
    chr1    1980    2000    N/A     50      80
    chr1    2000    2050     20     50      80
    chr1    2050    2070     20    N/A      80
    chr1    2070    2090     20    N/A      N/A
    chr1    2090    2100     20    N/A      20


------

**Non-numeric values**

The input BedGraph files can contain any kind of value in the fourth column, not necessarily a numeric value.

Input Example::
        
    File-1                           File-2 
    chr1   200   300   Sample1       chr1   100   240   0.75
    chr1   400   450   Sample1       chr1   250   700   0.43
    chr1   530   600   Sample2

Output Example::
    
    chr1   100    200    0         0.75
    chr1   200    240    Sample1   0.75
    chr1   240    250    Sample1   0
    chr1   250    300    Sample1   0.43
    chr1   300    400    0         0.43
    chr1   400    450    Sample1   0.43
    chr1   450    530    0         0.43
    chr1   530    600    Sample2   0.43
    chr1   600    700    0         0.43

@REFERENCES@
]]>
    </help>
    <expand macro="citations" />
</tool>