view mergeBed.xml @ 3:2cd7e321d259 draft

Uploaded
author iuc
date Wed, 27 May 2015 12:54:35 -0400
parents 457b09031d57
children 607c0576c6ab
line wrap: on
line source

<tool id="bedtools_mergebed" name="Merge BED files" version="@WRAPPER_VERSION@.1">
    <description>(mergeBed)</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <expand macro="stdio" />
    <command>
<![CDATA[
        mergeBed
            -i "${input}"
            $strand
            -d $distance
            $header
            @C_AND_O_ARGUMENT@
            > "${output}"
]]>
    </command>
    <inputs>
        <param  name="input" format="bam,bed,gff,vcf" type="data" label="Sort the following BAM/BED/VCF/GFF file"/>
        <param name="strand" type="select" label="Calculation based on strandedness?">
            <option value="" selected="True">Overlaps on either strand</option>
            <option value="-s">Force strandedness. That is, only merge features that are the same strand.</option>
            <option value="-S +">Force merge for forward strand only.</option>
            <option value="-S -">Force merge for reverse strand only.</option>
        </param>
        <param name="distance" type="integer" value="0"
            label="Maximum distance between features allowed for features to be merged"
            help="That is, overlapping and/or book-ended features are merged. (-d)"/>
        <expand macro="print_header" />
        <expand macro="c_and_o_argument">
            <param name="col" type="data_column" data_ref="input" label="Specify the column(s) that should be summarized" help="(-c)" />
        </expand>
    </inputs>
    <outputs>
        <data format="bed" name="output" metadata_source="input" label="Merged ${input.name}"/>
    </outputs>
    <tests>
        <test>
            <param name="input" value="mergedBed1.bed" ftype="bed" />
            <output name="output" file="mergedBed_result1.bed" ftype="bed" />
        </test>
        <test>
            <param name="input" value="mergedBed2.bed" ftype="bed" />
            <param name="strandedness" value="-s" />
            <output name="output" file="mergedBed_result2.bed" ftype="bed" />
        </test>
        <test>
            <param name="input" value="mergedBed3.bed" ftype="bed" />
            <param name="report_number" value="-n" />
            <output name="output" file="mergedBed_result3.bed" ftype="bed" />
        </test>
        <test>
            <param name="input" value="mergedBed4.bed" ftype="bed" />
            <param name="distance" value="1000" />
            <output name="output" file="mergedBed_result4.bed" ftype="bed" />
        </test>
        <test>
            <param name="input" value="mergedBed4.bed" ftype="bed" />
            <param name="distance" value="1000" />
            <repeat name="c_and_o_argument_repeat">
                <param name="col" value="1"/>
                <param name="operation" value="count"/>
            </repeat>
            <output name="output" file="mergedBed_result5.bed" ftype="bed" />
        </test>
    </tests>
    <help>
<![CDATA[
**What it does**

bedtools merge combines overlapping or "book-ended" features in an interval file into a single feature which spans all of the combined features.


.. image:: $PATH_TO_IMAGES/merge-glyph.png


.. class:: warningmark

bedtools merge requires that you presort your data by chromosome and then by start position.


==========================================================================
Default behavior
==========================================================================
By default, ``bedtools merge`` combines overlapping (by at least 1 bp) and/or
bookended intervals into a single, "flattened" or "merged" interval.

::

  $ cat A.bed
  chr1  100  200
  chr1  180  250
  chr1  250  500
  chr1  501  1000

  $ bedtools merge -i A.bed
  chr1  100  500
  chr1  501  1000


==========================================================================
*-s* Enforcing "strandedness"
==========================================================================
The ``-s`` option will only merge intervals that are overlapping/bookended
*and* are on the same strand.

::

  $ cat A.bed
  chr1  100  200   a1  1 +
  chr1  180  250   a2  2 +
  chr1  250  500   a3  3 -
  chr1  501  1000  a4  4 +

  $ bedtools merge -i A.bed -s
  chr1  100  250    +
  chr1  501  1000   +
  chr1  250  500    -


==========================================================================
*-d* Controlling how close two features must be in order to merge 
==========================================================================
By default, only overlapping or book-ended features are combined into a new 
feature. However, one can force ``merge`` to combine more distant features 
with the ``-d`` option. For example, were one to set ``-d`` to 1000, any 
features that overlap or are within 1000 base pairs of one another will be 
combined.

::

  $ cat A.bed
  chr1  100  200
  chr1  501  1000
  
  $ bedtools merge -i A.bed
  chr1  100  200
  chr1  501  1000

  $ bedtools merge -i A.bed -d 1000
  chr1  100  200  1000


@REFERENCES@
]]>
    </help>
    <expand macro="citations" />
</tool>