view plotCorrelation.xml @ 21:e97f5de6b2da draft

planemo upload for repository https://github.com/deeptools/deepTools/tree/master/galaxy/wrapper/ commit 3024062b63fdc502b46e4f328083493c2274182a
author bgruening
date Wed, 22 Aug 2018 16:30:22 -0400
parents 68c06ccce7bf
children 32e451def88c
line wrap: on
line source

<tool id="deeptools_plot_correlation" name="plotCorrelation" version="@WRAPPER_VERSION@.0" profile="18.01">
    <description>Create a heatmap or scatterplot of correlation scores between different samples </description>
    <macros>
        <token name="@BINARY@">plotCorrelation</token>
        <import>deepTools_macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command>
<![CDATA[
        @BINARY@
            --corData '$corData'
            --plotFile '$outFileName'
            --corMethod '$corMethod'
            --whatToPlot '$plotting_type.whatToPlot'
            #if str($plotting_type.whatToPlot) == 'heatmap':
                @HEATMAP_OPTIONS@
                --plotWidth $plotting_type.plotWidth
                --plotHeight $plotting_type.plotHeight
            #else:
                --plotTitle '$plotting_type.plotTitle'
                #if str($plotting_type.xRange) != '':
                    --xRange '$plotting_type.xRange'
                #end if
                #if str($plotting_type.yRange) != '':
                    --yRange '$plotting_type.yRange'
                #end if
                $plotting_type.log1p
            #end if
            $skipZeros
            --plotFileFormat '$outFileFormat'
            $removeOutliers
            #if $outFileCorMatrix:
                --outFileCorMatrix '$matrix'
            #end if

]]>
    </command>
    <inputs>
        <param name="corData" format="deeptools_coverage_matrix" type="data" label="Matrix file from the multiBamSummary tool"/>
        <expand macro="corMethod" />

        <conditional name="plotting_type" >
            <param argument="--whatToPlot" type="select" label="Plotting type">
                <option value="heatmap" selected="True">Heatmap</option>
                <option value="scatterplot">Scatterplot</option>
            </param>
            <when value="heatmap">
                <expand macro="heatmap_options" />
                <expand macro="plotWidthHeight" PLOTWIDTH="11.0" PLOTHEIGHT="9.5" />
            </when>
            <when value="scatterplot">
                <expand macro="plotTitle" />
                <param argument="--xRange" type="text" value="" optional="true"
                    label="X axis range"
                    help="X axis range, the default scales these such that the full range of dots is displayed. The input should be two integers separated by a space." />
                <param argument="--yRange" type="text" value="" optional="true"
                    label="Y axis range"
                    help="Y axis range, the default scales these such that the full range of dots is displayed. The input should be two integers separated by a space." />
                <param argument="--log1p" type="boolean" truevalue="--log1p" falsevalue=""
                    label="Log transform"
                    help="Plot the natural log of the scatter plot after adding 1. Note that this is ONLY for plotting, the correlation is unaffected." />
            </when>
        </conditional>

        <expand macro="skipZeros" />

        <expand macro="input_image_file_format" />

        <param argument="--removeOutliers" type="boolean"
            truevalue="--removeOutliers" falsevalue="" label="Remove regions with very large counts"
            help="If set, bins with very large counts are removed. Bins
                with abnormally high reads counts artificially
                increase pearson correlation; that's why, by default,
                plotCorrelation tries to remove outliers using the median
                absolute deviation (MAD) method applying a threshold
                of 200 to only consider extremely large deviations
                from the median. ENCODE blacklist page (https://sites.
                google.com/site/anshulkundaje/projects/blacklists)
                contains useful information about regions with
                unusually high counts."/>

        <param name="outFileCorMatrix" type="boolean" label="Save the matrix of values underlying the heatmap"/>

    </inputs>
    <outputs>
        <expand macro="output_image_file_format_not_nested" />
        <data format="tabular" name="matrix" label="${tool.name} on ${on_string}: Correlation matrix">
            <filter>outFileCorMatrix is True</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="corData" value="multiBamSummary_result1.npz" ftype="deeptools_coverage_matrix" />
            <param name="outFileFormat" value="png" />
            <param name="outFileCorMatrix" value="True" />
            <output name="matrix" file="plotCorrelation_result1.tabular" ftype="tabular" />
            <output name="outFileName" file="plotCorrelation_result1.png" ftype="png" compare="sim_size" delta="400" />
        </test>
        <test>
            <param name="corData" value="multiBamSummary_result1.npz" ftype="deeptools_coverage_matrix" />
            <param name="outFileFormat" value="png" />
            <param name="whatToPlot" value="scatterplot" />
            <param name="removeOutliers" value="True" />
            <param name="plotTitle" value="Test Plot" />
            <output name="outFileName" file="plotCorrelation_result2.png" ftype="png" compare="sim_size" delta="300" />
        </test>
    </tests>
    <help>
<![CDATA[
What it does
------------

This tools takes the default output of ``multiBamSummary`` or ``multiBigwigSummary``, and computes the pairwise correlation among samples.
Results can be visualized as **scatterplots** or as a **heatmap** of correlation coefficients (see below for examples).

Theoretical Background
----------------------

The result of the correlation computation is a **table of correlation coefficients** that indicates how "strong" the relationship between two samples is and it will consist of numbers between -1 and 1. (-1 indicates perfect anti-correlation, 1 perfect correlation.)

We offer two different functions for the correlation computation: *Pearson* or *Spearman*.

The *Pearson method* measures the **metric differences** between samples and is therefore influenced by outliers.
The *Spearman method* is based on **rankings**.

Output
------

The default output is a **diagnostic plot** -- either a scatterplot or a clustered heatmap displaying the values for each pair-wise correlation (see below for example plots).

Optionally, you can also obtain a table of the pairwise correlation coefficients.

.. image:: $PATH_TO_IMAGES/plotCorrelation_output.png
    :width: 600
    :height: 271

Example plots
-------------

The following is the output of ``plotCorrelation`` with our test ChIP-Seq datasets (to be found under "Shared Data" --> "Data Library").

Average coverages were computed over 10 kb bins for chromosome X,
from bigWig files using ``multiBigwigSummary``. This was then used with ``plotCorrelation`` to make a heatmap of Spearman correlation coefficients.

.. image:: $PATH_TO_IMAGES/plotCorrelation_galaxy_bw_heatmap_output.png
    :width: 600
    :height: 518

The scatterplot could look like this:

.. image:: $PATH_TO_IMAGES/plotCorrelation_scatterplot_PearsonCorr_bigwigScores.png
    :width: 600
    :height: 600

-----

@REFERENCES@
]]>
    </help>
    <expand macro="citations" />
</tool>