view hicAggregateContacts.xml @ 11:ee370ade24cd draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicexplorer commit 2a0943e78bdc8ebb13f181399206a9eea37ed78f"
author iuc
date Tue, 16 Mar 2021 15:21:53 +0000
parents ce16a538ab46
children e30485c5e93b
line wrap: on
line source

<tool id="hicexplorer_hicaggregatecontacts" name="@BINARY@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
    <description>allow plotting of aggregated Hi-C contacts between regions specified in a file</description>
    <macros>
        <token name="@BINARY@">hicAggregateContacts</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
    ln -s '$matrix_h5_cooler' 'matrix.$matrix_h5_cooler.ext' &&
    @BINARY@
    --matrix 'matrix.$matrix_h5_cooler.ext'
    --BED '$BED'
    #if $BED2:
        --BED2 '$BED2'
    #end if

    --range '$range_min:$range_max'
    --numberOfBins '$numberOfBins'

    --transform $transform
    --operationType $operationType
    --mode $mode
    $row_wise
    $perChr
    --largeRegionsOperation $largeRegionsOperation
    #if $outputs and 'PrefixMatrix' in $outputs:
        --outFilePrefixMatrix 'matrix_values'
    #end if

    #if $outputs and 'outFileContactPairs' in $outputs:
        --outFileContactPairs 'contact_positions'
    #end if

    #if $outputs and 'HeatmapFile' in $outputs:
        --diagnosticHeatmapFile 'heatmap'
    #end if

    #if $outputs and 'outFileObsExp' in $outputs:
        --outFileObsExp 'ObsExp.$matrix_h5_cooler.ext'
    #end if

    #if $clustering:
        $clustering
    #end if

    #if $howToCluster:
        --howToCluster $howToCluster
    #end if

    #if $chromosomes:
        --chromosomes #echo "' '".join([ "'%s'" % $chrom.chromosome for $chrom in $chromosomes ])#
    #end if

    #if $plotType:
        --plotType $plotType
    #end if

    #if $colormap:
        --colorMap $colormap
    #end if

    #if $vMin:
        --vMin $vMin
    #end if

    #if $vMax:
        --vMax $vMax
    #end if

    --outFileName plot.$image_file_format
    &&
    mv plot.$image_file_format plot
    #if $outputs and 'outFileObsExp' in $outputs:
        && mv 'ObsExp.$matrix_h5_cooler.ext' matrix
    #end if

]]>
    </command>
    <inputs>
        <expand macro='matrix_h5_cooler_macro' />
        <param argument="--BED" type="data" format="bed" label="Interactions between regions in this BED file are plotted" />
        <param argument="--BED2" type="data" format="bed" optional="true" label="Interactions between regions in first and second BED file are plotted" />

        <expand macro='range' />
        <param argument="--mode" type="select" label="Regions to consider.">
            <option value="inter-chr">inter-chr</option>
            <option value="intra-chr">intra-chr</option>
            <option value="all" selected="true">all</option>
        </param>

        <param argument="--row_wise" type="boolean" truevalue="--row_wise" falsevalue="" label="Row wise" help="If given,the insteractions between each row of the BED file and its 
                corresponding row of the BED2 file are computed. If intra-chromosomal 
                contacts are computed, the rows with different chromosomes are ignored. 
                If inter-chromosomal, the rows with same chromosomes are ignored. 
                It keeps all the rows if `all`." />

        <param argument="--perChr" type="boolean" truevalue="--perChr" falsevalue="" label="Per chromosome" help="If set, it generates a plot per chromosome. It is only affected if intra-chromosomal contacts are of interest." />

        <repeat name="chromosomes" title="List of chromosomes to plot" min="0">
            <param name="chromosome" type="text" label="chromosome (one per field)">
                <validator type="empty_field" />
            </param>
        </repeat>

        <param argument="--numberOfBins" type="integer" optional="true" label="Number of bins to include in the submatrix" help="The bed regions will be centered between -half number of bins and +half number of bins indicated." />

        <param argument="--transform" type="select" label="Type of transformation for the matrix" help="If total counts are selected, then the sub-matrix values
            are divided by the total counts for normalization. If
            z-score or obs/exp are selected, then H-C matrix is
            converted into a z-score or observed / expected matrix.">
            <option value="none" selected="true">none</option>
            <option value="total-counts">total-counts</option>
            <option value="z-score">z-score</option>
            <option value="obs/exp">obs/exp</option>
        </param>

        <param argument="--operationType" type="select" label="Type of operation to compute final matrix">
            <option value="median" selected="true">median</option>
            <option value="mean">mean</option>
            <option value="sum">sum</option>
        </param>

        <param argument="--largeRegionsOperation" type="select" label="Large regions operation" help="If a given coordinate in the bed file is larger than 
                           a bin of the input matrix, by default only the first bin 
                           is taken into account. However there are more posibilities 
                           to handel such a case. Users can ask for the last bin or 
                           for center of the region. As an example if a region falls into bins [4,5,6] 
                           and `--numberOfBins = 2` then if first, bins [3,4,5] are kept. 
                           If last: [5,6,7] and if center: [4,5,6].">
            <option value="first" selected="true">first</option>
            <option value="last">last</option>
            <option value="center">center</option>
        </param>

        <param name="clustering" type="select" optional="true" label="Number of clusters per chromosome" help="When this option is set, then the matrix is split into
                  clusters using the hierarchical clustering algorithm,
                  using 'ward linkage'. hclust could be very slow if
                  you have >1000 submatrices per chromosome. In those
                  cases, you might prefer kmeans.">
            <option value="--kmeans">kmeans</option>
            <option value="--hclust">hclust (#clusters per chromosome)</option>
            <option value="--spectral">spectral</option>
        </param>

        <param argument="--howToCluster" type="select" optional="true" label="How to cluster" help="Options are 'full', 'center' and 'diagonal'. The full clustering '
                'takes all values of each submatrix for clustering. center, takes only a square of '
                'length 3x3 from each submatrix and uses only  this values for clustering. With the '
                'diagonal option the clustering is only carried out based on the submatrix diagonal '
                '(representing values at the same distance to each other)">
            <option value="full">full</option>
            <option value="center">center</option>
            <option value="diagonal">diagonal</option>
        </param>
        <param argument="--plotType" type="select" optional="true" label="Plot type">
            <option value="2d">2D</option>
            <option value="3d">3D</option>
        </param>
        <expand macro="colormap" />
        <param argument="--vMin" type="float" optional="true" label="vMin" help= "Minimum value of the plotted score." />
        <param argument="--vMax" type="float" optional="true" label="vMax" help= "Maximum value of the plotted score." />
        <param name="image_file_format" type="select" label="Image output format">
            <option value="png" selected="True">png</option>
            <option value="svg">svg</option>
            <option value="pdf">pdf</option>
        </param>

        <param name="outputs" type="select" optional="true" multiple="true" label="Optional output files">
            <option value="PrefixMatrix">Save values underlying the final matrix</option>
            <option value="outFileContactPairs">Save the position of the contacts</option>
            <option value="HeatmapFile">Heatmap file per chromosome</option>
            <option value="outFileObsExp">Save obs/exp matrix</option>
        </param>

    </inputs>
    <outputs>
        <data name="outFileName" from_work_dir="plot" format="png" label="${tool.name} on ${on_string}: Plot">
            <change_format>
                <when input="image_file_format" value="svg" format="svg" />
                <when input="image_file_format" value="pdf" format="pdf" />
            </change_format>
        </data>
        <collection name="matrix_values" type="list" label="${tool.name} on ${on_string}: Matrix values">
            <discover_datasets pattern="matrix_values_(?P&lt;designation&gt;.*)\..*" directory="./" format="tabular" />
        </collection>
        <collection name="contact_positions" type="list" label="${tool.name} on ${on_string}: Matrix values">
            <discover_datasets pattern="contact_positions_(?P&lt;designation&gt;.*)\..*" directory="./" format="tabular" />
        </collection>
        <collection name="heatmap" type="list" label="${tool.name} on ${on_string}: Matrix values">
            <discover_datasets pattern="heatmap_(?P&lt;designation&gt;.*)\..*" directory="./" format="tabular" />
        </collection>
        <data name="matrix_obs_exp_output" from_work_dir="matrix" format="h5" label="${tool.name} MATRIX on ${on_string}">
            <filter>'outFileObsExp' in outputs</filter>
            <change_format>
                <when input="matrix_h5_cooler.ext" value="cool" format="cool" />
            </change_format>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="matrix_h5_cooler" value="Li_et_al_2015.cool" ftype="cool" />
            <param name="BED" value="hicAggregateContacts/test_regions.bed" ftype="bed" />
            <param name="numberOfBins" value="30" />
            <param name="range_max" value="900000" />
            <param name="range_min" value="50000" />
            <param name="howToCluster" value="center" />
            <param name="image_file_format" value="png" />
            <output name="outFileName" value="hicAggregateContacts_results1.png" compare="sim_size" />
        </test>
    </tests>
    <help><![CDATA[

Aggregation of Hi-C contacts
============================

**hicAggregateContacts** allows plotting of aggregated Hi-C sub-matrices of a specified list of positions. Positions of interest can for example be binding sites of a specific protein that were determined by ChIP-seq or genetic elements as transcription start sites of active genes.

_________________

Usage
-----

This tool must be used on Hi-C matrices corrected by ``hicCorrectMatrix``. One should also consider bigger bins than restriction enzyme resolution bins using ``hicMergeMatrixBins``.

_________________

Optional parameters
-------------------

Optional data output can be selected:

    - **Save values underlying the final matrix:** if this option is given, then the values underlying the final matrix will be saved to tab-delimited tables (one per chromosome) using the indicated prefix, for example TSS_to_TSS_chrX.tab. If clustering is performed, then the values are saved including the cluster_id a in TSS_to_TSS_chrX_cluster_1.tab

    - **Save the position of the contacts:** if this option is given, then the position of the contacts is saved as (chrom1, start1, end1, chrom2, start2, end2) where chrom_n, start_n, end_n correspond to the pair of positions used to compute the submatrix. The data is saved per chromosome and per cluster separately (one file each).

    - **Heatmap file per chromosome:** if given, a heatmap file (per chromosome) is saved. Each row in the heatmap contains the diagonal of each of the submatrices centered on the bed file. This file is useful to get an idea of the values that are used for the aggregate matrix and to determine the fraction of submatrices that are aggregated that may have an enrichment at the center.

_________________

Output
------

**hicAggregateContacts** outputs a plot of aggregated contacts.

Below, you can find an example of an aggregate Hi-C matrix obtained from *Drosophila melanogaster* Hi-C data. The interactions are plotted at binding sites of a protein that were determined by ChIP-seq. We plot sub-matrices of 30 bins (1.5 kb bin size, 45 kb in total). The regions specified in the BED file will be centered between half number of bins and the other half number of bins.The considered range is 300-1000 kb. The range should be adjusted and only contain contacts larger than TAD size to reduce background interactions.

.. image:: $PATH_TO_IMAGES/hicAggregateContacts.png
   :width: 80 %

This example was calculated using mean interactions of an observed vs. expected transformed Hi-C matrix. Additional options for the matrix transformation are total-counts or z-score. Aggregate contacts can be plotted in 2D or 3D.

_________________

| For more information about HiCExplorer please consider our documentation on readthedocs.io_

.. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html
.. _Colormaps: https://matplotlib.org/examples/color/colormaps_reference.html
]]>    </help>
    <expand macro="citations" />
</tool>