diff hicFindTADs.xml @ 2:a9c1d76b90c4 draft

planemo upload for repository https://github.com/maxplanck-ie/HiCExplorer/tree/master/galaxy/wrapper/ commit 4d61b6bf2fed275ab38c226d0c4390b095a38251
author bgruening
date Thu, 02 Nov 2017 11:13:55 -0400
parents aab371aa615e
children 8b60271e7e54
line wrap: on
line diff
--- a/hicFindTADs.xml	Mon Apr 03 07:29:57 2017 -0400
+++ b/hicFindTADs.xml	Thu Nov 02 11:13:55 2017 -0400
@@ -5,133 +5,110 @@
         <import>macros.xml</import>
     </macros>
     <expand macro="requirements" />
-    <command><![CDATA[
-        #if $mode.mode_selector == 'TAD_score':
-            ln -s '$mode.matrix' matrix.npz.h5 &&
-        #end if
-
-        #if $mode.mode_selector == 'find_TADs':
-            ln -s '$tad_score_zscore_matrix' ./tadScoreFile.tabular_zscore_matrix.h5 &&
-        #end if
-
+    <command detect_errors="exit_code"><![CDATA[
+        
         hicFindTADs
-            $mode.mode_selector
+                --matrix '$matrix'
+               
+                --delta $delta
 
-            #if $mode.mode_selector == 'find_TADs':
-                --tadScoreFile '$tadScoreFile'
-                --outPrefix galaxy_tad_prefix
-                ##--maxThreshold $mode.maxThreshold
-                --delta $mode.delta
-                --pvalue $mode.pvalue
-                #if $mode.minBoundaryDistance:
-                    --minBoundaryDistance $mode.minBoundaryDistance
+                #if $minBoundaryDistance:
+                --minBoundaryDistance $minBoundaryDistance
+                #end if
+                --minDepth $minDepth
+                --maxDepth $maxDepth
+                --step $step
+                #if $multiple_comparison_conditional.multiple_comparison_selector == 'fdr':
+                    --correctForMultipleTesting fdr
+                    --threshold $multiple_comparison_conditional.threshold
+                #elif $multiple_comparison_conditional.multiple_comparison_selector == 'bonferroni':
+                    --correctForMultipleTesting bonferroni
+                    --threshold $multiple_comparison_conditional.threshold
+                #else:
+                    --multipleComparisons None             
                 #end if
 
-            #else:
-                --matrix matrix.npz.h5
-                --maxDepth $mode.maxDepth
-                --minDepth $mode.minDepth
-                --outFileName ./tadScoreFile.tabular
-
-                --step $mode.step
-            #end if
-
+                --numberOfProcessors @THREADS@
+                --outPrefix galaxy_tad_prefix
     ]]></command>
     <inputs>
-
-        <conditional name="mode">
-            <param name="mode_selector" type="select" label="Range restriction (in bp)" argument="--range">
-                <option value="find_TADs">find TADs</option>
-                <option value="TAD_score">TAD score</option>
+        <param argument="--matrix" type="data" format="h5" label="Corrected Hi-C matrix to use for the computations"/>
+        <param argument="--minDepth" type="integer" value="40000"
+                label="Minimum window length (in bp) to be considered to the left and to the right of each Hi-C bin."
+                help="This number should be at least 3 times as large as the bin size of the Hi-C matrix."/>
+        <param argument="--maxDepth" type="integer" value="100000"
+                label="Maximum window length (in bp) to be considered to the left and to the right of each Hi-C bin."
+                help="This number should around 6-10 times as large as the bin size of the Hi-C matrix."/>
+        <param argument="--step" type="integer" value="10000"
+                label="Step size when moving from minDepth to maxDepth"
+                help="The step size grows exponentially as maxDeph + (step * int(x)**1.5) for x in [0, 1, ...]
+                until  it reaches maxDepth. For example, selecting step=10,000, minDepth=20,000
+                and maxDepth=150,000 will compute TAD-scores for window sizes:
+                20,000, 30,000, 40,000, 70,000 and 100,000"/>
+        <conditional name="multiple_comparison_conditional">
+            <param name="multiple_comparison_selector" type="select" label="Multiple Testing Corrections" >
+                <option value="fdr" selected="True">False discovery rate</option>
+                <option value="bonferroni">Bonferroni correction</option>
+                <option value="None">No correction</option>
             </param>
-            <when value="find_TADs">
-                <param argument="--tadScoreFile" type="data" format="tabular" label="TAD score file"/>
-                <param name="tad_score_zscore_matrix" type="data" format="h5" label="TAD score Matrix file"/>
+            <when value="fdr">
+                <param name="threshold" type="float" value="0.01" label="q-value" />
+            </when>
+            <when value="bonferroni">
+                <param name="threshold" type="float" value="0.01" label="p-value" />
+            </when>
+            <when value="None" />
+        </conditional>
+        <param argument="--delta" type="float" value="0.001" optional="True"
+                label="Minimum threshold of the difference between the TAD-separation score of a putative boundary and the mean of the TAD-sep. score of surrounding bins."
+                help="The delta value reduces spurious boundaries that are shallow, which usually
+                        occur at the center of large TADs when the TAD-sep. score is flat. Higher
+                        delta threshold values produce more conservative boundary estimations. By
+                        default, multiple delta thresholds are saved for the following delta
+                        values: 0.001, 0.01, 0.03, 0.05, 0.1. Other single or multiple values
+                        can be given."/>
 
-                <param argument="--delta" type="float" value="0.001" optional="True"
-                       label="Minimum threshold of the difference between the TAD-separation score of a putative boundary and the mean of the TAD-sep. score of surrounding bins."
-                        help="The delta value reduces spurious boundaries that are shallow, which usually
-                              occur at the center of large TADs when the TAD-sep. score is flat. Higher
-                              delta threshold values produce more conservative boundary estimations. By
-                              default, multiple delta thresholds are saved for the following delta
-                              values: 0.001, 0.01, 0.03, 0.05, 0.1. Other single or multiple values
-                              can be given."/>                <param argument="--pvalue" type="float" value="0.01" 
-                    label="P-value threshold"
-                    help="The probability of a local minima to be a boundary is estimated by comparing the distribution (Wilcoxon ranksum) of the
-                            zscores between the left and right regions (diamond) at the local minimum with the matrix zscores for a diamond at
-                            --minDepth to the left and a diamond --minDepth to the right.
-                            The reported pvalue is the Bonferroni correction all pvalues."/>
-                <param argument="--minBoundaryDistance" type="integer" value="" optional="True"
-                    label="Minimum distance between boundaries (in bp)."
-                    help="This parameter can be used to reduce spurious boundaries caused by noise. "/>
-
-            </when>
-            <when value="TAD_score">
-
-                <param argument="--matrix" type="data" format="h5" label="Corrected Hi-C matrix to use for the computations"/>
-                <param argument="--minDepth" type="integer" value="30000"
-                    label="Minimum window length (in bp) to be considered to the left and to the right of each Hi-C bin."
-                    help="This number should be at least 3 times as large as the bin size of the Hi-C matrix."/>
-                <param argument="--maxDepth" type="integer" value="100000"
-                    label="Maximum window length (in bp) to be considered to the left and to the right of each Hi-C bin."
-                    help="This number should around 6-10 times as large as the bin size of the Hi-C matrix."/>
-                <param argument="--step" type="integer" value="10000"
-                    label="Step size when moving from minDepth to maxDepth"
-                    help="The step size grows exponentially as `maxDeph + (step * int(x)**1.5) for x in [0, 1, ...]`
-                    until  it reaches `maxDepth`. For example, selecting step=10,000, minDepth=20,000
-                    and maxDepth=150,000 will compute TAD-scores for window sizes:
-                    20,000, 30,000, 40,000, 70,000 and 100,000"/>
-            </when>
-        </conditional>
-
+        <param argument="--minBoundaryDistance" type="integer" value="" optional="True"
+                label="Minimum distance between boundaries (in bp)."
+                help="This parameter can be used to reduce spurious boundaries caused by noise. "/>   
 
     </inputs>
     <outputs>
-        <data name="outFileName" from_work_dir="tadScoreFile.tabular" format="tabular">
-            <filter>mode['mode_selector'] == "TAD_score"</filter>
-        </data>
-        <data name="tad_score_zscore_matrix" from_work_dir="tadScoreFile.tabular_zscore_matrix.h5" format="h5">
-            <filter>mode['mode_selector'] == "TAD_score"</filter>
-        </data>
+    
         <data name="boundaries" from_work_dir="galaxy_tad_prefix_boundaries.bed" format="bed"
-            label="${tool.name} on ${on_string}: Boundary positions">
-            <filter>mode['mode_selector'] == "find_TADs"</filter>
-        </data>
+            label="${tool.name} on ${on_string}: Boundary positions" />
+        
         <data name="score" from_work_dir="galaxy_tad_prefix_score.bedgraph" format="bedgraph"
-            label="${tool.name} on ${on_string}: Matrix with multi-scale TAD scores">
-            <filter>mode['mode_selector'] == "find_TADs"</filter>
-        </data>
+            label="${tool.name} on ${on_string}: Matrix with multi-scale TAD scores" />
         <data name="domains" from_work_dir="galaxy_tad_prefix_domains.bed" format="bed"
-            label="${tool.name} on ${on_string}: TAD domains">
-            <filter>mode['mode_selector'] == "find_TADs"</filter>
-        </data>
+            label="${tool.name} on ${on_string}: TAD domains" />
         <data name="boundaries_bin" from_work_dir="galaxy_tad_prefix_boundaries.gff"
-            format="bed" label="${tool.name} on ${on_string}: Boundary information plus score">
-            <filter>mode['mode_selector'] == "find_TADs"</filter>
-        </data>
+            format="gff" label="${tool.name} on ${on_string}: Boundary information plus score" />
+        
+        <data name="tad_score" from_work_dir="galaxy_tad_prefix_tad_score.bm"
+            format="bedgraph" label="${tool.name} on ${on_string}: TAD information in bm file" />
+
+        <data name="matrix_output" from_work_dir="galaxy_tad_prefix_zscore_matrix.h5"
+            format="h5" label="${tool.name} on ${on_string}: Z-score matrix in h5" />
     </outputs>
     <tests>
-        <!--test>
-            <param name="matrix" value="hicBuildMatrix_result1.h5" ftype="h5"/>
-            <param name="mode_selector" value="TAD_score"/>
-            <param name="minDepth" value="20000"/>
-            <param name="maxDepth" value="60000"/>
-            <param name="step" value="100000"/>
-            <output name="outFileName" file="hicFindTADs_TAD_score.tabular" ftype="tabular"/>
-            <output name="tad_score_zscore_matrix" file="tadScoreFile.tabular_zscore_matrix.h5" ftype="h5" compare="sim_size"/>
+        <test>
+            <param name="matrix" value="small_test_matrix.h5" ftype="h5" />
+            <param name="minDepth" value="60000"/>
+            <param name="maxDepth" value="180000"/>
+            <param name="step" value="20000"/>
+            <param name="minBoundaryDistance" value="20000" />
+            <conditional name="multiple_comparison_conditional">
+                <param name="multiple_comparison_selector" value="fdr"/>
+                <param name="threshold" value="0.1" />
+            </conditional>
+            <output name="boundaries" file="find_TADs/multiFDR_boundaries.bed" ftype="bed" compare="sim_size" delta="35000" />
+            <output name="boundaries_bin" file="find_TADs/multiFDR_boundaries.gff" ftype="gff" compare="sim_size" delta="35000" />
+            <output name="domains" file="find_TADs/multiFDR_domains.bed" ftype="bed" compare="sim_size" delta="35000" />
+            <output name="score" file="find_TADs/multiFDR_score.bedgraph" ftype="bedgraph" compare="sim_size" delta="35000" />
+            <output name="tad_score" file="find_TADs/multiFDR_tad_score.bm" ftype="bedgraph" compare="sim_size" delta="35000" />
+            <output name="matrix_output" file="find_TADs/multiFDR_zscore_matrix.h5" ftype="h5" compare="sim_size" delta="50000" />
         </test>
-        <test>
-            <param name="tadScoreFile" value="hicFindTADs_TAD_score.tabular" ftype="tabular"/>
-            <param name="tad_score_zscore_matrix" value="tadScoreFile.tabular_zscore_matrix.h5" ftype="h5"/>
-            <param name="mode_selector" value="find_TADs"/>
-            <param name="delta" value="0.002"/>
-            <param name="pvalue" value="0.01"/>
-
-            <output name="boundaries" file="hicFindTADs_find_boundaries.bed" ftype="bed"/>
-            <output name="score" file="hicFindTADs_find_score.bedgraph" ftype="bedgraph"/>
-            <output name="domains" file="hicFindTADs_find_domains.bed" ftype="bed"/>
-            <output name="boundaries_bin" file="hicFindTADs_find_boundaries.gff" ftype="gff"/>
-        </test-->
     </tests>
     <help><![CDATA[