view hicTransform.xml @ 16:f36828ebdc54 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicexplorer commit 07802a6bd441d9da888cfb8283f8c2135704f7c9
author iuc
date Wed, 18 Oct 2023 10:24:58 +0000
parents 21b3ef2a6a80
children 27fe5cd1729c
line wrap: on
line source

<tool id="hicexplorer_hictransform" name="@BINARY@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
    <description>transform a matrix to obs/exp, pearson and covariance matrices</description>
    <macros>
        <token name="@BINARY@">hicTransform</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[

        ln -s '$matrix_h5_cooler' 'matrix.$matrix_h5_cooler.ext' &&
        @BINARY@ --matrix 'matrix.$matrix_h5_cooler.ext'
        --method $method_selector
        #if $ligation_factor:
            $ligation_factor
        #end if
        $perChromosome
        #set chroms = ' '.join([ '\'' + str($var.chromosome) + '\'' for $var in $chromosomeOrder ])
        #if chroms:
            --chromosomes $chroms
        #end if
        --outFileName 'matrix.$matrix_h5_cooler.ext'

        && mv 'matrix.$matrix_h5_cooler.ext' matrix
]]>
    </command>
    <inputs>
        <expand macro="matrix_h5_cooler_macro" />
        <param name="method_selector" type="select" label="Choose method to apply">
            <option value="obs_exp" selected="True">obs / exp</option>
            <option value="obs_exp_lieberman">obs / exp (Lieberman-Aiden 2009)</option>
            <option value="obs_exp_non_zero">obs / exp (exclude non-zero from exp)</option>
            <option value="pearson">pearson</option>
            <option value="covariance">covariance</option>
        </param>
        <param name="ligation_factor" type="boolean" truevalue="--ligation_factor" falsevalue="" checked="false" label="Multiplies a scaling factor to each entry of the expected matrix to take care of the proximity ligation" />
        <param argument="--perChromosome" type="boolean" truevalue="--perChromosome" falsevalue="" checked="false" label="Computation per chromosome" />
        <repeat name="chromosomeOrder" min="0" title="Chromosomes to include in the computation">
            <param name="chromosome" type="text">
                <validator type="empty_field" />
                <validator type="expression" message="Only alphanumeric characters and the underscore can be used in chromosome names">value.replace('_', '').isalnum()</validator>
            </param>
        </repeat>
    </inputs>
    <outputs>
        <data name="matrix_out" from_work_dir="matrix" format="cool" label="${tool.name} on ${matrix_h5_cooler.name} [${on_string}]: $method_selector">
            <change_format>
                <when input_dataset="matrix_h5_cooler" attribute="ext" value="h5" format="h5" />
            </change_format>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="matrix_h5_cooler" value="small_test_matrix.h5" />
            <param name="method_selector" value="obs_exp_lieberman" />
            <output name="matrix_out" ftype="h5">
                <assert_contents>
                    <has_h5_keys keys="intervals,matrix" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="matrix_h5_cooler" value="small_test_matrix.h5" />
            <param name="method_selector" value="obs_exp_lieberman" />
            <param name="ligation_factor" value="True" />
            <output name="matrix_out" ftype="h5">
                <assert_contents>
                    <has_h5_keys keys="intervals,matrix" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
Transformation of matrix for plotting
=====================================

**hicTransform** computes a matrix based on one Hi-C contact matrix as input:

- An **observed/expected matrix** obtained "by dividing each entry in the contact matrix by the genome-wide average contact probability for loci at that genomic distance" (`Lieberman-Aiden et al. (2009)`_). This transformation allows to better assess long range interactions.
- An **observed/expected norm matrix** which computes the expected matrix as EXP_i,j = sum(diagonal(i-j)) * sum(row(j)) * sum(row(i)) / sum(matrix)
- An **observed/expected non-zero values matrix** which computes the expected matrix as the sum per genomic distance j divided by sum of non-zero contacts: sum(diagonal(j) / number of non-zero elements in diagonal(j)
- A **Pearson correlation matrix** obtained by computing the Pearson correlation between each bin based on observed/expected values. This matrix transformation allows to better identify the bins that are entering in contact together, or not, at long ranges, and thus helps defining compartments in the nucleus (``hicPCA``).
- A **covariance matrix**, which is used as a basis for the Principal Component Analysis (PCA) to compute the eigenvectors outputed by **hicTransform**.

These matrices can be used with ``hicPlotMatrix`` or ``pyGenomeTracks`` for a visualization of the A / B compartment analysis.

_________________

Output
------

From one Hi-C contact matrix, **hicTransform** outputs a matrix with the selected method applied.

_________________

| For more information about HiCExplorer please consider our documentation on readthedocs.io_

.. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html
.. _`Lieberman-Aiden et al. (2009)`: https://pubmed.ncbi.nlm.nih.gov/19815776/
]]>    </help>
    <expand macro="citations" />
</tool>