view hicFindTADs.xml @ 3:f569cd2afecb draft

planemo upload for repository commit 1beb50fdb3f5f8fd60e3b9c3d44964d70075616d
author iuc
date Mon, 27 Nov 2017 11:12:30 -0500
parents a9c1d76b90c4
children 8b60271e7e54
line wrap: on
line source

<tool id="hicexplorer_hicfindtads" name="@BINARY@" version="@WRAPPER_VERSION@.0">
    <description>find minimum cuts that correspond to boundaries</description>
        <token name="@BINARY@">hicFindTADs</token>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
                --matrix '$matrix'
                --delta $delta

                #if $minBoundaryDistance:
                --minBoundaryDistance $minBoundaryDistance
                #end if
                --minDepth $minDepth
                --maxDepth $maxDepth
                --step $step
                #if $multiple_comparison_conditional.multiple_comparison_selector == 'fdr':
                    --correctForMultipleTesting fdr
                    --threshold $multiple_comparison_conditional.threshold
                #elif $multiple_comparison_conditional.multiple_comparison_selector == 'bonferroni':
                    --correctForMultipleTesting bonferroni
                    --threshold $multiple_comparison_conditional.threshold
                    --multipleComparisons None             
                #end if

                --numberOfProcessors @THREADS@
                --outPrefix galaxy_tad_prefix
        <param argument="--matrix" type="data" format="h5" label="Corrected Hi-C matrix to use for the computations"/>
        <param argument="--minDepth" type="integer" value="40000"
                label="Minimum window length (in bp) to be considered to the left and to the right of each Hi-C bin."
                help="This number should be at least 3 times as large as the bin size of the Hi-C matrix."/>
        <param argument="--maxDepth" type="integer" value="100000"
                label="Maximum window length (in bp) to be considered to the left and to the right of each Hi-C bin."
                help="This number should around 6-10 times as large as the bin size of the Hi-C matrix."/>
        <param argument="--step" type="integer" value="10000"
                label="Step size when moving from minDepth to maxDepth"
                help="The step size grows exponentially as maxDeph + (step * int(x)**1.5) for x in [0, 1, ...]
                until  it reaches maxDepth. For example, selecting step=10,000, minDepth=20,000
                and maxDepth=150,000 will compute TAD-scores for window sizes:
                20,000, 30,000, 40,000, 70,000 and 100,000"/>
        <conditional name="multiple_comparison_conditional">
            <param name="multiple_comparison_selector" type="select" label="Multiple Testing Corrections" >
                <option value="fdr" selected="True">False discovery rate</option>
                <option value="bonferroni">Bonferroni correction</option>
                <option value="None">No correction</option>
            <when value="fdr">
                <param name="threshold" type="float" value="0.01" label="q-value" />
            <when value="bonferroni">
                <param name="threshold" type="float" value="0.01" label="p-value" />
            <when value="None" />
        <param argument="--delta" type="float" value="0.001" optional="True"
                label="Minimum threshold of the difference between the TAD-separation score of a putative boundary and the mean of the TAD-sep. score of surrounding bins."
                help="The delta value reduces spurious boundaries that are shallow, which usually
                        occur at the center of large TADs when the TAD-sep. score is flat. Higher
                        delta threshold values produce more conservative boundary estimations. By
                        default, multiple delta thresholds are saved for the following delta
                        values: 0.001, 0.01, 0.03, 0.05, 0.1. Other single or multiple values
                        can be given."/>

        <param argument="--minBoundaryDistance" type="integer" value="" optional="True"
                label="Minimum distance between boundaries (in bp)."
                help="This parameter can be used to reduce spurious boundaries caused by noise. "/>   

        <data name="boundaries" from_work_dir="galaxy_tad_prefix_boundaries.bed" format="bed"
            label="${} on ${on_string}: Boundary positions" />
        <data name="score" from_work_dir="galaxy_tad_prefix_score.bedgraph" format="bedgraph"
            label="${} on ${on_string}: Matrix with multi-scale TAD scores" />
        <data name="domains" from_work_dir="galaxy_tad_prefix_domains.bed" format="bed"
            label="${} on ${on_string}: TAD domains" />
        <data name="boundaries_bin" from_work_dir="galaxy_tad_prefix_boundaries.gff"
            format="gff" label="${} on ${on_string}: Boundary information plus score" />
        <data name="tad_score" from_work_dir=""
            format="bedgraph" label="${} on ${on_string}: TAD information in bm file" />

        <data name="matrix_output" from_work_dir="galaxy_tad_prefix_zscore_matrix.h5"
            format="h5" label="${} on ${on_string}: Z-score matrix in h5" />
            <param name="matrix" value="small_test_matrix.h5" ftype="h5" />
            <param name="minDepth" value="60000"/>
            <param name="maxDepth" value="180000"/>
            <param name="step" value="20000"/>
            <param name="minBoundaryDistance" value="20000" />
            <conditional name="multiple_comparison_conditional">
                <param name="multiple_comparison_selector" value="fdr"/>
                <param name="threshold" value="0.1" />
            <output name="boundaries" file="find_TADs/multiFDR_boundaries.bed" ftype="bed" compare="sim_size" delta="35000" />
            <output name="boundaries_bin" file="find_TADs/multiFDR_boundaries.gff" ftype="gff" compare="sim_size" delta="35000" />
            <output name="domains" file="find_TADs/multiFDR_domains.bed" ftype="bed" compare="sim_size" delta="35000" />
            <output name="score" file="find_TADs/multiFDR_score.bedgraph" ftype="bedgraph" compare="sim_size" delta="35000" />
            <output name="tad_score" file="find_TADs/" ftype="bedgraph" compare="sim_size" delta="35000" />
            <output name="matrix_output" file="find_TADs/multiFDR_zscore_matrix.h5" ftype="h5" compare="sim_size" delta="50000" />

**What it does**

Uses the graph clustering measure "conductance" to find minimum cuts that correspond to boundaries.

    <expand macro="citations" />