view hicFindTADs.xml @ 2:a9c1d76b90c4 draft

planemo upload for repository https://github.com/maxplanck-ie/HiCExplorer/tree/master/galaxy/wrapper/ commit 4d61b6bf2fed275ab38c226d0c4390b095a38251
author bgruening
date Thu, 02 Nov 2017 11:13:55 -0400
parents aab371aa615e
children 8b60271e7e54
line wrap: on
line source

<tool id="hicexplorer_hicfindtads" name="@BINARY@" version="@WRAPPER_VERSION@.0">
    <description>find minimum cuts that correspond to boundaries</description>
    <macros>
        <token name="@BINARY@">hicFindTADs</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
        
        hicFindTADs
                --matrix '$matrix'
               
                --delta $delta

                #if $minBoundaryDistance:
                --minBoundaryDistance $minBoundaryDistance
                #end if
                --minDepth $minDepth
                --maxDepth $maxDepth
                --step $step
                #if $multiple_comparison_conditional.multiple_comparison_selector == 'fdr':
                    --correctForMultipleTesting fdr
                    --threshold $multiple_comparison_conditional.threshold
                #elif $multiple_comparison_conditional.multiple_comparison_selector == 'bonferroni':
                    --correctForMultipleTesting bonferroni
                    --threshold $multiple_comparison_conditional.threshold
                #else:
                    --multipleComparisons None             
                #end if

                --numberOfProcessors @THREADS@
                --outPrefix galaxy_tad_prefix
    ]]></command>
    <inputs>
        <param argument="--matrix" type="data" format="h5" label="Corrected Hi-C matrix to use for the computations"/>
        <param argument="--minDepth" type="integer" value="40000"
                label="Minimum window length (in bp) to be considered to the left and to the right of each Hi-C bin."
                help="This number should be at least 3 times as large as the bin size of the Hi-C matrix."/>
        <param argument="--maxDepth" type="integer" value="100000"
                label="Maximum window length (in bp) to be considered to the left and to the right of each Hi-C bin."
                help="This number should around 6-10 times as large as the bin size of the Hi-C matrix."/>
        <param argument="--step" type="integer" value="10000"
                label="Step size when moving from minDepth to maxDepth"
                help="The step size grows exponentially as maxDeph + (step * int(x)**1.5) for x in [0, 1, ...]
                until  it reaches maxDepth. For example, selecting step=10,000, minDepth=20,000
                and maxDepth=150,000 will compute TAD-scores for window sizes:
                20,000, 30,000, 40,000, 70,000 and 100,000"/>
        <conditional name="multiple_comparison_conditional">
            <param name="multiple_comparison_selector" type="select" label="Multiple Testing Corrections" >
                <option value="fdr" selected="True">False discovery rate</option>
                <option value="bonferroni">Bonferroni correction</option>
                <option value="None">No correction</option>
            </param>
            <when value="fdr">
                <param name="threshold" type="float" value="0.01" label="q-value" />
            </when>
            <when value="bonferroni">
                <param name="threshold" type="float" value="0.01" label="p-value" />
            </when>
            <when value="None" />
        </conditional>
        <param argument="--delta" type="float" value="0.001" optional="True"
                label="Minimum threshold of the difference between the TAD-separation score of a putative boundary and the mean of the TAD-sep. score of surrounding bins."
                help="The delta value reduces spurious boundaries that are shallow, which usually
                        occur at the center of large TADs when the TAD-sep. score is flat. Higher
                        delta threshold values produce more conservative boundary estimations. By
                        default, multiple delta thresholds are saved for the following delta
                        values: 0.001, 0.01, 0.03, 0.05, 0.1. Other single or multiple values
                        can be given."/>

        <param argument="--minBoundaryDistance" type="integer" value="" optional="True"
                label="Minimum distance between boundaries (in bp)."
                help="This parameter can be used to reduce spurious boundaries caused by noise. "/>   

    </inputs>
    <outputs>
    
        <data name="boundaries" from_work_dir="galaxy_tad_prefix_boundaries.bed" format="bed"
            label="${tool.name} on ${on_string}: Boundary positions" />
        
        <data name="score" from_work_dir="galaxy_tad_prefix_score.bedgraph" format="bedgraph"
            label="${tool.name} on ${on_string}: Matrix with multi-scale TAD scores" />
        <data name="domains" from_work_dir="galaxy_tad_prefix_domains.bed" format="bed"
            label="${tool.name} on ${on_string}: TAD domains" />
        <data name="boundaries_bin" from_work_dir="galaxy_tad_prefix_boundaries.gff"
            format="gff" label="${tool.name} on ${on_string}: Boundary information plus score" />
        
        <data name="tad_score" from_work_dir="galaxy_tad_prefix_tad_score.bm"
            format="bedgraph" label="${tool.name} on ${on_string}: TAD information in bm file" />

        <data name="matrix_output" from_work_dir="galaxy_tad_prefix_zscore_matrix.h5"
            format="h5" label="${tool.name} on ${on_string}: Z-score matrix in h5" />
    </outputs>
    <tests>
        <test>
            <param name="matrix" value="small_test_matrix.h5" ftype="h5" />
            <param name="minDepth" value="60000"/>
            <param name="maxDepth" value="180000"/>
            <param name="step" value="20000"/>
            <param name="minBoundaryDistance" value="20000" />
            <conditional name="multiple_comparison_conditional">
                <param name="multiple_comparison_selector" value="fdr"/>
                <param name="threshold" value="0.1" />
            </conditional>
            <output name="boundaries" file="find_TADs/multiFDR_boundaries.bed" ftype="bed" compare="sim_size" delta="35000" />
            <output name="boundaries_bin" file="find_TADs/multiFDR_boundaries.gff" ftype="gff" compare="sim_size" delta="35000" />
            <output name="domains" file="find_TADs/multiFDR_domains.bed" ftype="bed" compare="sim_size" delta="35000" />
            <output name="score" file="find_TADs/multiFDR_score.bedgraph" ftype="bedgraph" compare="sim_size" delta="35000" />
            <output name="tad_score" file="find_TADs/multiFDR_tad_score.bm" ftype="bedgraph" compare="sim_size" delta="35000" />
            <output name="matrix_output" file="find_TADs/multiFDR_zscore_matrix.h5" ftype="h5" compare="sim_size" delta="50000" />
        </test>
    </tests>
    <help><![CDATA[

**What it does**

Uses the graph clustering measure "conductance" to find minimum cuts that correspond to boundaries.

]]></help>
    <expand macro="citations" />
</tool>