view bamCorrelate.xml @ 30:5231f398d784 draft

planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit 3bc1d1c6f4e28ac7ff8df79fe4e3f00a195938e6-dirty
author bgruening
date Tue, 20 Oct 2015 14:43:12 -0400
parents 3a2aab18a217
children
line wrap: on
line source

<tool id="deeptools_bamCorrelate" name="bamCorrelate" version="@WRAPPER_VERSION@.0">
    <description>correlates pairs of BAM files</description>
    <macros>
        <token name="@BINARY@">bamCorrelate</token>
        <import>deepTools_macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command>
<![CDATA[
        #set files=[]
        #set labels=[]

        @multiple_input_bams@

        bamCorrelate

        $mode.modeOpt

        @THREADS@

        --bamfiles '#echo "' '".join($files)#'
        --labels '#echo "' '".join($labels)#'
        --fragmentLength $fragmentLength
        --corMethod $corMethod

        --plotFile $outFileName

        #if $output.showOutputSettings == "yes"
            --outRawCounts '$outFileRawCounts' 
            --outFileCorMatrix '$outFileCorMatrix'
            --plotFileFormat '$output.outFileFormat'
        #else:
            --plotFileFormat 'png'
        #end if

        #if $mode.modeOpt == "bins":
            --binSize '$mode.binSize'
            --distanceBetweenBins '$mode.distanceBetweenBins'
            $mode.doNotRemoveOutliers
        #else:
            --BED $mode.region_file
        #end if

        #### options available in both modes
        #if str($mode.region.value) != '':
            --region '$mode.region'
        #end if

        #if $plotTitle and str($plotTitle).strip() != "":
            --plotTitle '$plotTitle'
        #end if
        $plotNumbers
        #if $mode.advancedOpt.showAdvancedOpt == "yes":

            $mode.advancedOpt.doNotExtendPairedEnds
            $mode.advancedOpt.ignoreDuplicates
            $mode.advancedOpt.includeZeros

            #if $mode.advancedOpt.minMappingQuality:
                --minMappingQuality '$mode.advancedOpt.minMappingQuality'
            #end if

            #if $mode.advancedOpt.zMin:
                --zMin $mode.advancedOpt.zMin
            #end if
            #if $mode.advancedOpt.zMax:
                --zMax $mode.advancedOpt.zMax
            #end if
            --colorMap '$mode.advancedOpt.colorMap'

        #end if
]]>
    </command>

    <inputs>
        <expand macro="multiple_input_bams" />

        <param name="fragmentLength" type="integer" value="200" min="1"
            label="Length of the average fragment size"
            help ="Reads will be extended to match this length unless they are paired-end,
            in which case they will be extended to match the fragment length.
            *NOTE*: If the BAM files contain mated and unmated paired-end reads, unmated reads will
            be extended to match the fragment length. (--fragmentLength)"/>

        <param name="corMethod" type="select" label="Correlation method">
            <option value="spearman" selected="True">Spearman</option>
            <option value="pearson">Pearson</option>
        </param>

        <conditional name="mode">
            <param name="modeOpt" type="select" label="Choose computation mode" 
                help="In the bins mode, the correlation is computed based on equal
                length bins. In the BED file mode, as list of genomic regions in BED
                format has to be given. For each region in the BED file the number of
                overlapping reads is counted in each of the BAM files.
                Then the correlation is computed.">
                <option value="bins" selected="true">Bins</option>
                <option value="BED-file">Limit correlation to certain regions (BED file)</option>
            </param>
            <when value="bins">
                <param name="binSize" type="integer" value="10000" min="1" 
                    label="Bin size in bp"
                    help="Length in base pairs for a window used to sample the genome. (--binSize)"/>

                <param name="distanceBetweenBins" type="integer" value="0" min="0"
                    label="Distance between bins"
                    help="By default, bamCorrelate considers consecutive bins of
                        the specified 'Bin size'. However, to reduce the
                        computation time, a larger distance between bins can
                        by given. Larger distances result in less bins being
                        considered. (--distanceBetweenBins)"/>

                <param name="doNotRemoveOutliers" type="boolean"
                    truevalue="--doNotRemoveOutliers" falsevalue="" label="Do not filter outliers"
                    help="By default, bins with very large counts are removed.
                        By setting this option, outliers will not be
                        removed. Bins with unusually large counts normally
                        correspond to regions in the genome that accumulate
                        lot of reads like satellite regions. If outliers are not
                        removed the pearson correlation will wrongly report a
                        very high correlation; that's why, by default,
                        bamCorrelate tries to remove outliers using
                        the median absolute deviation (MAD) method applying a
                        threshold of 200 to only consider extremely large
                        deviations from the median. (--doNotRemoveOutliers)"/>

                <expand macro="bamCorrelate_mode_actions" />
            </when>
            <when value="BED-file">
                <param name="region_file" type="data" format="bed"
                    label="Region file in BED format"
                    help="Correlation is computed for the number of reads that overlap such regions."/>
                <expand macro="bamCorrelate_mode_actions" />
            </when>
        </conditional>
        <expand macro="plotTitle" />
        <expand macro="plotNumbers" />
        <conditional name="output">
            <param name="showOutputSettings" type="select" label="Show advanced output settings" >
                <option value="no" selected="true">no</option>
                <option value="yes">yes</option>
            </param>
            <when value="no" />
            <when value="yes">
                <expand macro="input_image_file_format"/>
                <param name="saveRawCounts" type="boolean" label="Save the bin counts"/>
                <param name="saveCorMatrix" type="boolean" label="Save the correlation matrix"/>
            </when>
        </conditional>

    </inputs>
    <outputs>
        <expand macro="output_image_file_format" />
        <data format="tabular" name="outFileRawCounts" label="${tool.name} on ${on_string}: bin counts">
            <filter>
            ((
                output['showOutputSettings'] == 'yes' and 
                output['saveRawCounts'] is True
            ))
            </filter>
        </data>
        <data format="tabular" name="outFileCorMatrix" label="${tool.name} on ${on_string}: correlation matrix">
            <filter>
            ((
                output['showOutputSettings'] == 'yes' and 
                output['saveCorMatrix'] is True
            ))
            </filter>
        </data>
    </outputs>
    <tests>
        <test>
            <repeat name="input_files">
                <param name="bamfile" value="bowtie2-test1.bam" ftype="bam" />
                <param name="label" value="first BAM file" />
            </repeat>
            <repeat name="input_files">
                <param name="bamfile" value="bowtie2-test1.bam" ftype="bam" />
                <param name="label" value="second BAM file" />
            </repeat>
            <param name="modeOpt" value="bins" />
            <param name="binSize" value="10" />
            <param name="showOutputSettings" value="no" />
            <output name="outFileName" file="bamCorrelate_result1.png" ftype="png" compare="sim_size" />
        </test>
    </tests>
    <help>
<![CDATA[
**What it does**

This tool is useful to assess the overall similarity of different BAM files. A typical application
is to check the correlation between replicates or published data sets.

The tool splits the genomes into bins of given length. For each bin, the number of reads
found in each BAM file is counted and a correlation (either Pearson or Spearman) is computed for all
pairs of BAM files. Finally, a heatmap is drawn based on the similarity of the samples.


.. image:: $PATH_TO_IMAGES/QC_bamCorrelate_humanSamples.png
   :alt: Heatmap of RNA Polymerase II ChIP-seq


You can find more details on the bamCorrelate wiki page: https://github.com/fidelram/deepTools/wiki/QC#wiki-bamCorrelate


**Output files**:

- **diagnostic plot**: clustered heatmap displaying the values for each pair-wise correlation, see below for an example
- data matrix (optional): if you want to plot the correlation values using a different program, e.g. R, this matrix can be used


-----

@REFERENCES@
]]>
    </help>
    <expand macro="citations" />
</tool>