view intersectBed.xml @ 1:82aac94b06c3 draft

Uploaded
author iuc
date Thu, 08 Jan 2015 14:25:51 -0500
parents b8348686a0b9
children 607c0576c6ab
line wrap: on
line source

<tool id="bedtools_intersectbed" name="Intersect interval files" version="@WRAPPER_VERSION@.0">
    <description></description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <expand macro="stdio" />
    <command>
<![CDATA[
        #set inputBs = '" "'.join( [ str( $file ) for $file in $inputB ] )
        #set modes = ' '.join( str($overlap_mode).split(',') )

        bedtools intersect
            #if $inputA.ext == "bam":
                -abam "${inputA}"
            #else:
                -a "${inputA}"
            #end if

            -b "${inputBs}"
            $split
            $strand
            #if str($fraction) != "None" and str($fraction):
              -f "${fraction}"
            #end if
            $reciprocal
            $invert
            $once
            $header
            $modes
            > "${output}"
]]>
    </command>
    <inputs>
        <param format="bed,bam,vcf,gff,gff3" name="inputA" type="data" label="BED/VCF/GFF/BAM file"/>
        <param format="bed,bam,gff,vcf,gff3" name="inputB" type="data" multiple="True" label="One or more BAM/BED/GFF/VCF file(s)"/>
        <expand macro="strand2" />
        <param name="overlap_mode" type="select" multiple="True" label="What should be written to the output file?">
            <option value="-wa" selected="True">Write the original entry in A for each overlap (-wa)</option>
            <option value="-wb">Write the original entry in B for each overlap. Useful for knowing what A overlaps. Restricted by the fraction- and reciprocal option (-wb)</option>
            <option value="-wo">Write the original A and B entries plus the number of base pairs of overlap between the two features. Only A features with overlap are reported. Restricted by the fraction- and reciprocal option (-wo)</option>
            <option value="-wao">Write the original A and B entries plus the number of base pairs of overlap between the two features. However, A features w/o overlap are also reported with a NULL B feature and overlap = 0. Restricted by the fraction- and reciprocal option (-wao)</option>
            <option value="-loj">Perform a "left outer join". That is, for each feature in A report each overlap with B.  If no overlaps are found, report a NULL feature for B (-loj)</option>
        </param>

        <expand macro="split" />
        <!-- -f -->
        <param name="fraction" type="text"
            label="Minimum overlap required as a fraction of the BAM alignment"
            help="Alignments are only retained if the overlap with the an interval in the BED file comprises at least this fraction of the BAM alignment's length.  For example, to require that the overlap affects 50% of the BAM alignment, use 0.50. (-f)"/>
        <!-- -r -->
        <expand macro="reciprocal" />
        <!-- -v -->
        <param name="invert" type="boolean" checked="false" truevalue="-v" falsevalue=""
            label="Report only those alignments that **do not** overlap the BED file"
            help="(-v)"/>
        <!-- -u -->
        <param name="once" type="boolean" checked="false" truevalue="-u" falsevalue=""
            label="Write the original A entry _once_ if _any_ overlaps found in B."
            help="Just report the fact >=1 hit was found. (-u)" />
        <!-- -c -->
        <param name="count" type="boolean" checked="false" truevalue="-c" falsevalue=""
            label="For each entry in A, report the number of overlaps with B."
            help="Reports 0 for A entries that have no overlap with B. (-c)" />
        <expand macro="print_header" />
    </inputs>
    <outputs>
        <data format_source="inputA" name="output" metadata_source="inputA"/>
    </outputs>
    <tests>
        <test>
            <param name="inputA" value="intersectBed1.bed" ftype="bed" />
            <param name="inputB" value="intersectBed2.bed" ftype="bed" />
            <param name="overlap_mode" value="-wa" />
            <param name="split" value="False" />
            <output name="output" file="intersectBed_result1.bed" ftype="bed" />
        </test>
        <test>
            <param name="inputA" value="intersectBed1.bed" ftype="bed" />
            <param name="inputB" value="intersectBed2.bed" ftype="bed" />
            <param name="overlap_mode" value="-wa,-wb" />
            <param name="split" value="False" />
            <output name="output" file="intersectBed_result2.bed" ftype="bed" />
        </test>
        <test>
            <param name="inputA" value="intersectBed1.bed" ftype="bed" />
            <param name="inputB" value="intersectBed2.bed" ftype="bed" />
            <param name="invert" value="True" />
            <param name="split" value="False" />
            <output name="output" file="intersectBed_result3.bed" ftype="bed" />
        </test>
    </tests>
    <help>
<![CDATA[
**What it does**

By far, the most common question asked of two sets of genomic features is whether or not any of the features in the two sets “overlap” with one another. This is known as feature intersection. bedtools intersect allows one to screen for overlaps between two sets of genomic features. Moreover, it allows one to have fine control as to how the intersections are reported. bedtools intersect works with both BED/GFF/VCF and BAM files as input.

.. image:: $PATH_TO_IMAGES/intersect-glyph.png

.. class:: infomark

Note that each BAM alignment is treated individually. Therefore, if one end of a paired-end alignment overlaps an interval in the BED file, yet the other end does not, the output file will only include the overlapping end.

.. class:: infomark

Note that a BAM alignment will be sent to the output file **once** even if it overlaps more than one interval in the BED file.

@REFERENCES@
]]>
    </help>
    <expand macro="citations" />
</tool>