Mercurial > repos > bgruening > hicexplorer_hicfindtads
diff hicFindTADs.xml @ 2:a9c1d76b90c4 draft
planemo upload for repository https://github.com/maxplanck-ie/HiCExplorer/tree/master/galaxy/wrapper/ commit 4d61b6bf2fed275ab38c226d0c4390b095a38251
author | bgruening |
---|---|
date | Thu, 02 Nov 2017 11:13:55 -0400 |
parents | aab371aa615e |
children | 8b60271e7e54 |
line wrap: on
line diff
--- a/hicFindTADs.xml Mon Apr 03 07:29:57 2017 -0400 +++ b/hicFindTADs.xml Thu Nov 02 11:13:55 2017 -0400 @@ -5,133 +5,110 @@ <import>macros.xml</import> </macros> <expand macro="requirements" /> - <command><![CDATA[ - #if $mode.mode_selector == 'TAD_score': - ln -s '$mode.matrix' matrix.npz.h5 && - #end if - - #if $mode.mode_selector == 'find_TADs': - ln -s '$tad_score_zscore_matrix' ./tadScoreFile.tabular_zscore_matrix.h5 && - #end if - + <command detect_errors="exit_code"><![CDATA[ + hicFindTADs - $mode.mode_selector + --matrix '$matrix' + + --delta $delta - #if $mode.mode_selector == 'find_TADs': - --tadScoreFile '$tadScoreFile' - --outPrefix galaxy_tad_prefix - ##--maxThreshold $mode.maxThreshold - --delta $mode.delta - --pvalue $mode.pvalue - #if $mode.minBoundaryDistance: - --minBoundaryDistance $mode.minBoundaryDistance + #if $minBoundaryDistance: + --minBoundaryDistance $minBoundaryDistance + #end if + --minDepth $minDepth + --maxDepth $maxDepth + --step $step + #if $multiple_comparison_conditional.multiple_comparison_selector == 'fdr': + --correctForMultipleTesting fdr + --threshold $multiple_comparison_conditional.threshold + #elif $multiple_comparison_conditional.multiple_comparison_selector == 'bonferroni': + --correctForMultipleTesting bonferroni + --threshold $multiple_comparison_conditional.threshold + #else: + --multipleComparisons None #end if - #else: - --matrix matrix.npz.h5 - --maxDepth $mode.maxDepth - --minDepth $mode.minDepth - --outFileName ./tadScoreFile.tabular - - --step $mode.step - #end if - + --numberOfProcessors @THREADS@ + --outPrefix galaxy_tad_prefix ]]></command> <inputs> - - <conditional name="mode"> - <param name="mode_selector" type="select" label="Range restriction (in bp)" argument="--range"> - <option value="find_TADs">find TADs</option> - <option value="TAD_score">TAD score</option> + <param argument="--matrix" type="data" format="h5" label="Corrected Hi-C matrix to use for the computations"/> + <param argument="--minDepth" type="integer" value="40000" + label="Minimum window length (in bp) to be considered to the left and to the right of each Hi-C bin." + help="This number should be at least 3 times as large as the bin size of the Hi-C matrix."/> + <param argument="--maxDepth" type="integer" value="100000" + label="Maximum window length (in bp) to be considered to the left and to the right of each Hi-C bin." + help="This number should around 6-10 times as large as the bin size of the Hi-C matrix."/> + <param argument="--step" type="integer" value="10000" + label="Step size when moving from minDepth to maxDepth" + help="The step size grows exponentially as maxDeph + (step * int(x)**1.5) for x in [0, 1, ...] + until it reaches maxDepth. For example, selecting step=10,000, minDepth=20,000 + and maxDepth=150,000 will compute TAD-scores for window sizes: + 20,000, 30,000, 40,000, 70,000 and 100,000"/> + <conditional name="multiple_comparison_conditional"> + <param name="multiple_comparison_selector" type="select" label="Multiple Testing Corrections" > + <option value="fdr" selected="True">False discovery rate</option> + <option value="bonferroni">Bonferroni correction</option> + <option value="None">No correction</option> </param> - <when value="find_TADs"> - <param argument="--tadScoreFile" type="data" format="tabular" label="TAD score file"/> - <param name="tad_score_zscore_matrix" type="data" format="h5" label="TAD score Matrix file"/> + <when value="fdr"> + <param name="threshold" type="float" value="0.01" label="q-value" /> + </when> + <when value="bonferroni"> + <param name="threshold" type="float" value="0.01" label="p-value" /> + </when> + <when value="None" /> + </conditional> + <param argument="--delta" type="float" value="0.001" optional="True" + label="Minimum threshold of the difference between the TAD-separation score of a putative boundary and the mean of the TAD-sep. score of surrounding bins." + help="The delta value reduces spurious boundaries that are shallow, which usually + occur at the center of large TADs when the TAD-sep. score is flat. Higher + delta threshold values produce more conservative boundary estimations. By + default, multiple delta thresholds are saved for the following delta + values: 0.001, 0.01, 0.03, 0.05, 0.1. Other single or multiple values + can be given."/> - <param argument="--delta" type="float" value="0.001" optional="True" - label="Minimum threshold of the difference between the TAD-separation score of a putative boundary and the mean of the TAD-sep. score of surrounding bins." - help="The delta value reduces spurious boundaries that are shallow, which usually - occur at the center of large TADs when the TAD-sep. score is flat. Higher - delta threshold values produce more conservative boundary estimations. By - default, multiple delta thresholds are saved for the following delta - values: 0.001, 0.01, 0.03, 0.05, 0.1. Other single or multiple values - can be given."/> <param argument="--pvalue" type="float" value="0.01" - label="P-value threshold" - help="The probability of a local minima to be a boundary is estimated by comparing the distribution (Wilcoxon ranksum) of the - zscores between the left and right regions (diamond) at the local minimum with the matrix zscores for a diamond at - --minDepth to the left and a diamond --minDepth to the right. - The reported pvalue is the Bonferroni correction all pvalues."/> - <param argument="--minBoundaryDistance" type="integer" value="" optional="True" - label="Minimum distance between boundaries (in bp)." - help="This parameter can be used to reduce spurious boundaries caused by noise. "/> - - </when> - <when value="TAD_score"> - - <param argument="--matrix" type="data" format="h5" label="Corrected Hi-C matrix to use for the computations"/> - <param argument="--minDepth" type="integer" value="30000" - label="Minimum window length (in bp) to be considered to the left and to the right of each Hi-C bin." - help="This number should be at least 3 times as large as the bin size of the Hi-C matrix."/> - <param argument="--maxDepth" type="integer" value="100000" - label="Maximum window length (in bp) to be considered to the left and to the right of each Hi-C bin." - help="This number should around 6-10 times as large as the bin size of the Hi-C matrix."/> - <param argument="--step" type="integer" value="10000" - label="Step size when moving from minDepth to maxDepth" - help="The step size grows exponentially as `maxDeph + (step * int(x)**1.5) for x in [0, 1, ...]` - until it reaches `maxDepth`. For example, selecting step=10,000, minDepth=20,000 - and maxDepth=150,000 will compute TAD-scores for window sizes: - 20,000, 30,000, 40,000, 70,000 and 100,000"/> - </when> - </conditional> - + <param argument="--minBoundaryDistance" type="integer" value="" optional="True" + label="Minimum distance between boundaries (in bp)." + help="This parameter can be used to reduce spurious boundaries caused by noise. "/> </inputs> <outputs> - <data name="outFileName" from_work_dir="tadScoreFile.tabular" format="tabular"> - <filter>mode['mode_selector'] == "TAD_score"</filter> - </data> - <data name="tad_score_zscore_matrix" from_work_dir="tadScoreFile.tabular_zscore_matrix.h5" format="h5"> - <filter>mode['mode_selector'] == "TAD_score"</filter> - </data> + <data name="boundaries" from_work_dir="galaxy_tad_prefix_boundaries.bed" format="bed" - label="${tool.name} on ${on_string}: Boundary positions"> - <filter>mode['mode_selector'] == "find_TADs"</filter> - </data> + label="${tool.name} on ${on_string}: Boundary positions" /> + <data name="score" from_work_dir="galaxy_tad_prefix_score.bedgraph" format="bedgraph" - label="${tool.name} on ${on_string}: Matrix with multi-scale TAD scores"> - <filter>mode['mode_selector'] == "find_TADs"</filter> - </data> + label="${tool.name} on ${on_string}: Matrix with multi-scale TAD scores" /> <data name="domains" from_work_dir="galaxy_tad_prefix_domains.bed" format="bed" - label="${tool.name} on ${on_string}: TAD domains"> - <filter>mode['mode_selector'] == "find_TADs"</filter> - </data> + label="${tool.name} on ${on_string}: TAD domains" /> <data name="boundaries_bin" from_work_dir="galaxy_tad_prefix_boundaries.gff" - format="bed" label="${tool.name} on ${on_string}: Boundary information plus score"> - <filter>mode['mode_selector'] == "find_TADs"</filter> - </data> + format="gff" label="${tool.name} on ${on_string}: Boundary information plus score" /> + + <data name="tad_score" from_work_dir="galaxy_tad_prefix_tad_score.bm" + format="bedgraph" label="${tool.name} on ${on_string}: TAD information in bm file" /> + + <data name="matrix_output" from_work_dir="galaxy_tad_prefix_zscore_matrix.h5" + format="h5" label="${tool.name} on ${on_string}: Z-score matrix in h5" /> </outputs> <tests> - <!--test> - <param name="matrix" value="hicBuildMatrix_result1.h5" ftype="h5"/> - <param name="mode_selector" value="TAD_score"/> - <param name="minDepth" value="20000"/> - <param name="maxDepth" value="60000"/> - <param name="step" value="100000"/> - <output name="outFileName" file="hicFindTADs_TAD_score.tabular" ftype="tabular"/> - <output name="tad_score_zscore_matrix" file="tadScoreFile.tabular_zscore_matrix.h5" ftype="h5" compare="sim_size"/> + <test> + <param name="matrix" value="small_test_matrix.h5" ftype="h5" /> + <param name="minDepth" value="60000"/> + <param name="maxDepth" value="180000"/> + <param name="step" value="20000"/> + <param name="minBoundaryDistance" value="20000" /> + <conditional name="multiple_comparison_conditional"> + <param name="multiple_comparison_selector" value="fdr"/> + <param name="threshold" value="0.1" /> + </conditional> + <output name="boundaries" file="find_TADs/multiFDR_boundaries.bed" ftype="bed" compare="sim_size" delta="35000" /> + <output name="boundaries_bin" file="find_TADs/multiFDR_boundaries.gff" ftype="gff" compare="sim_size" delta="35000" /> + <output name="domains" file="find_TADs/multiFDR_domains.bed" ftype="bed" compare="sim_size" delta="35000" /> + <output name="score" file="find_TADs/multiFDR_score.bedgraph" ftype="bedgraph" compare="sim_size" delta="35000" /> + <output name="tad_score" file="find_TADs/multiFDR_tad_score.bm" ftype="bedgraph" compare="sim_size" delta="35000" /> + <output name="matrix_output" file="find_TADs/multiFDR_zscore_matrix.h5" ftype="h5" compare="sim_size" delta="50000" /> </test> - <test> - <param name="tadScoreFile" value="hicFindTADs_TAD_score.tabular" ftype="tabular"/> - <param name="tad_score_zscore_matrix" value="tadScoreFile.tabular_zscore_matrix.h5" ftype="h5"/> - <param name="mode_selector" value="find_TADs"/> - <param name="delta" value="0.002"/> - <param name="pvalue" value="0.01"/> - - <output name="boundaries" file="hicFindTADs_find_boundaries.bed" ftype="bed"/> - <output name="score" file="hicFindTADs_find_score.bedgraph" ftype="bedgraph"/> - <output name="domains" file="hicFindTADs_find_domains.bed" ftype="bed"/> - <output name="boundaries_bin" file="hicFindTADs_find_boundaries.gff" ftype="gff"/> - </test--> </tests> <help><![CDATA[