diff hicFindTADs.xml @ 12:6b7987d22eab draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicexplorer commit 3b41d687ff30583540d055f6995de00530cca81d-dirty"
author bgruening
date Mon, 16 Dec 2019 15:48:31 -0500
parents b05f292d220c
children 1d9b575fe97d
line wrap: on
line diff
--- a/hicFindTADs.xml	Mon Dec 16 10:36:22 2019 -0500
+++ b/hicFindTADs.xml	Mon Dec 16 15:48:31 2019 -0500
@@ -6,17 +6,28 @@
     </macros>
     <expand macro="requirements" />
     <command detect_errors="exit_code"><![CDATA[
-        hicFindTADs
-                --matrix '$matrix_h5_cooler'
+        ln -s '$matrix_h5_cooler' 'matrix.$matrix_h5_cooler.ext' &&
+        
+        #if $precomputedZscore_conditional.precomputedZscore_selector == 'precomputed':
+            ln -s '$precomputedZscore_conditional.scoreFile' 'prefix_tad_score.bm' && 
+            ln -s '$precomputedZscore_conditional.zscoreMatrix' 'prefix_zscore_matrix.h5' &&
+        #end if
+        @BINARY@
+                --matrix 'matrix.$matrix_h5_cooler.ext'
 
                 --delta $delta
 
                 #if $minBoundaryDistance:
                 --minBoundaryDistance $minBoundaryDistance
                 #end if
-                --minDepth $minDepth
-                --maxDepth $maxDepth
-                --step $step
+
+                #if $precomputedZscore_conditional.precomputedZscore_selector == 'scratch':
+                    --minDepth $precomputedZscore_conditional.minDepth
+                    --maxDepth $precomputedZscore_conditional.maxDepth
+                    --step $precomputedZscore_conditional.step
+                #elif $precomputedZscore_conditional.precomputedZscore_selector == 'precomputed':
+                    --TAD_sep_score_prefix prefix
+                #end if
                 #if $multiple_comparison_conditional.multiple_comparison_selector == 'fdr':
                     --correctForMultipleTesting fdr
                     --threshold $multiple_comparison_conditional.threshold
@@ -29,22 +40,39 @@
 
                 --numberOfProcessors @THREADS@
                 --outPrefix galaxy_tad_prefix
+                #if $chromosomes:
+                    --chromosomes #echo "' '".join([ "'%s'" % $chrom.chromosome for $chrom in $chromosomes ])#
+                #end if
     ]]></command>
     <inputs>
         <expand macro='matrix_h5_cooler_macro' />
 
-        <param argument="--minDepth" type="integer" value="40000"
-                label="Minimum window length (in bp) to be considered to the left and to the right of each Hi-C bin."
-                help="This number should be at least 3 times as large as the bin size of the Hi-C matrix."/>
-        <param argument="--maxDepth" type="integer" value="100000"
-                label="Maximum window length (in bp) to be considered to the left and to the right of each Hi-C bin."
-                help="This number should around 6-10 times as large as the bin size of the Hi-C matrix."/>
-        <param argument="--step" type="integer" value="10000"
-                label="Step size when moving from minDepth to maxDepth"
-                help="The step size grows exponentially as maxDeph + (step * int(x)**1.5) for x in [0, 1, ...]
-                until  it reaches maxDepth. For example, selecting step=10,000, minDepth=20,000
-                and maxDepth=150,000 will compute TAD-scores for window sizes:
-                20,000, 30,000, 40,000, 70,000 and 100,000"/>
+        <conditional name='precomputedZscore_conditional'>
+            <param name='precomputedZscore_selector' type="select" label="Compute from scratch or use precomputed data">
+                <option value='scratch' selected='True'>From scratch</option>
+                <option value='precomputed'>Precomputed z-score matrix</option>
+            </param>
+            <when value='scratch'>
+
+                <param argument="--minDepth" type="integer" value="5000"
+                        label="Minimum window length (in bp) to be considered to the left and to the right of each Hi-C bin."
+                        help="This number should be at least 3 times as large as the bin size of the Hi-C matrix."/>
+                <param argument="--maxDepth" type="integer" value="10000"
+                        label="Maximum window length (in bp) to be considered to the left and to the right of each Hi-C bin."
+                        help="This number should around 6-10 times as large as the bin size of the Hi-C matrix."/>
+                <param argument="--step" type="integer" value="10000"
+                        label="Step size when moving from minDepth to maxDepth"
+                        help="The step size grows exponentially as maxDeph + (step * int(x)**1.5) for x in [0, 1, ...]
+                        until  it reaches maxDepth. For example, selecting step=10,000, minDepth=20,000
+                        and maxDepth=150,000 will compute TAD-scores for window sizes:
+                        20,000, 30,000, 40,000, 70,000 and 100,000"/>
+            </when>
+            <when value='precomputed'>
+                <param name="scoreFile" type="data" format='bedgraph'  label="Precomputed TAD score file (bm)"/>
+                <param name="zscoreMatrix" type="data" format='h5'  label="Precomputed z-score matrix"/>
+            </when>
+        </conditional>
+        
         <conditional name="multiple_comparison_conditional">
             <param name="multiple_comparison_selector" type="select" label="Multiple Testing Corrections" >
                 <option value="fdr" selected="True">False discovery rate</option>
@@ -71,7 +99,11 @@
         <param argument="--minBoundaryDistance" type="integer" value="" optional="True"
                 label="Minimum distance between boundaries (in bp)."
                 help="This parameter can be used to reduce spurious boundaries caused by noise. "/>
-
+        <repeat name="chromosomes" title="List of chromosomes to be included in the correlation" min="0">
+            <param name="chromosome" type="text" label='chromosome (one per field)'>
+                <validator type="empty_field" />
+            </param>
+        </repeat>
     </inputs>
     <outputs>
 
@@ -86,19 +118,51 @@
             format="gff" label="${tool.name} on ${on_string}: Boundary information plus score" />
 
         <data name="tad_score" from_work_dir="galaxy_tad_prefix_tad_score.bm"
-            format="bedgraph" label="${tool.name} on ${on_string}: TAD information in bm file" />
+            format="bedgraph" label="${tool.name} on ${on_string}: TAD information in bm file" >
+            <filter>precomputedZscore_conditional.precomputedZscore_selector == 'scratch'</filter>
+
+        </data>
 
         <data name="matrix_output" from_work_dir="galaxy_tad_prefix_zscore_matrix.h5"
-            format="h5" label="${tool.name} on ${on_string}: Z-score matrix in h5" />
+            format="h5" label="${tool.name} on ${on_string}: Z-score matrix in h5">
+            <filter>precomputedZscore_conditional.precomputedZscore_selector == 'scratch'</filter>
+        </data>
     </outputs>
     <tests>
         <test>
             <param name="matrix_h5_cooler" value="small_test_matrix.h5"/>
+            <conditional name="precomputedZscore_conditional">
+                <param name="precomputedZscore_selector" value="scratch"/>
+                <param name="minDepth" value="15000"/>
+                <param name="maxDepth" value="30000"/>
+                <param name="step" value="15000"/>
+            </conditional>
+            
+            <param name="minBoundaryDistance" value="5000" />
+            <conditional name="multiple_comparison_conditional">
+                <param name="multiple_comparison_selector" value="fdr"/>
+                <param name="threshold" value="0.8" />
+            </conditional>
+            <output name="boundaries" file="find_TADs/multiFDR_boundaries.bed" ftype="bed" compare="sim_size" delta="35000" />
+            <output name="boundaries_bin" file="find_TADs/multiFDR_boundaries.gff" ftype="gff" compare="sim_size" delta="35000" />
+            <output name="domains" file="find_TADs/multiFDR_domains.bed" ftype="bed" compare="sim_size" delta="35000" />
+            <output name="score" file="find_TADs/multiFDR_score.bedgraph" ftype="bedgraph" compare="sim_size" delta="35000" />
+            <output name="tad_score" file="find_TADs/multiFDR_tad_score.bm" ftype="bedgraph" compare="sim_size" delta="35000" />
+            <output name="matrix_output" ftype="h5">
+                <assert_contents>
+                    <has_h5_keys keys='intervals,matrix'/>
+                </assert_contents>
+            </output>
 
-            <param name="minDepth" value="60000"/>
-            <param name="maxDepth" value="180000"/>
-            <param name="step" value="20000"/>
-            <param name="minBoundaryDistance" value="20000" />
+        </test>
+        <test>
+            <param name="matrix_h5_cooler" value="small_test_matrix.h5"/>
+            <conditional name="precomputedZscore_conditional">
+                <param name="precomputedZscore_selector" value="precomputed"/>
+                <param name="scoreFile" value="find_TADs/multiFDR_tad_score.bm" />
+                <param name="zscoreMatrix" value="find_TADs/multiFDR_zscore_matrix.h5"/>
+            </conditional>
+            <param name="minBoundaryDistance" value="5000" />
             <conditional name="multiple_comparison_conditional">
                 <param name="multiple_comparison_selector" value="fdr"/>
                 <param name="threshold" value="0.1" />
@@ -107,7 +171,6 @@
             <output name="boundaries_bin" file="find_TADs/multiFDR_boundaries.gff" ftype="gff" compare="sim_size" delta="35000" />
             <output name="domains" file="find_TADs/multiFDR_domains.bed" ftype="bed" compare="sim_size" delta="35000" />
             <output name="score" file="find_TADs/multiFDR_score.bedgraph" ftype="bedgraph" compare="sim_size" delta="35000" />
-            <output name="tad_score" file="find_TADs/multiFDR_tad_score.bm" ftype="bedgraph" compare="sim_size" delta="35000" />
         </test>
     </tests>
     <help><![CDATA[
@@ -152,7 +215,7 @@
 - TAD boundaries positions with delta, p-value and TAD separation score as GFF.
 - TAD domains as a BED file.
 - TAD seperation score as bigwig (bw), bedgraph and numpy array (npz) format. These files can be used to plot the so-called TAD insulation score or TAD separation score along the genome or at specific regions. This score is much more reliable across samples than the number of TADs or the TADs width that can vary depending on the sequencing depth because of the lack of information at certain bins, and depending on the parameters used with this tool.
-- Matrix with multi-scale TAD scores as a bed-matrix (bm) file that can be plotted inside ``hicPlotTADs`` to nicely display TAD insulation score alongside Hi-C heatmap and other datasets.
+- Matrix with multi-scale TAD scores as a bed-matrix (bm) file that can be plotted inside ``pyGenomeTracks`` to nicely display TAD insulation score alongside Hi-C heatmap and other datasets.
 - Z-score matrix in h5 format that is useful to quickly test the --thresholdComparisons, --delta and --correctForMultipleTesting parameters by using the --TAD_sep_score_prefix option pointing to this zscore_matrix.h5 file (will be added in a future update).
 
 _________________
@@ -160,7 +223,7 @@
 Usage hints
 -----------
 
-It is mandatory to test multiple parameters of TAD calling with **hicFindTADs** before making conclusions about the number of TADs in a given sample or before comparing TAD calling between multiple conditions. In order to compare numerous TAD calling parameters at once, it is recommended to use ``hicPlotTADs``, below you can find a plot where multiple TAD calling parameters are displayed for *Drosophila melanogaster* embryos:
+It is mandatory to test multiple parameters of TAD calling with **hicFindTADs** before making conclusions about the number of TADs in a given sample or before comparing TAD calling between multiple conditions. In order to compare numerous TAD calling parameters at once, it is recommended to use ``pyGenomeTracks``, below you can find a plot where multiple TAD calling parameters are displayed for *Drosophila melanogaster* embryos:
 
 .. image:: $PATH_TO_IMAGES/hicFindTADs_TAD_calling_comparison.png
    :width: 65 %