comparison hicFindTADs.xml @ 12:6b7987d22eab draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicexplorer commit 3b41d687ff30583540d055f6995de00530cca81d-dirty"
author bgruening
date Mon, 16 Dec 2019 15:48:31 -0500
parents b05f292d220c
children 1d9b575fe97d
comparison
equal deleted inserted replaced
11:6bda631b9239 12:6b7987d22eab
4 <token name="@BINARY@">hicFindTADs</token> 4 <token name="@BINARY@">hicFindTADs</token>
5 <import>macros.xml</import> 5 <import>macros.xml</import>
6 </macros> 6 </macros>
7 <expand macro="requirements" /> 7 <expand macro="requirements" />
8 <command detect_errors="exit_code"><![CDATA[ 8 <command detect_errors="exit_code"><![CDATA[
9 hicFindTADs 9 ln -s '$matrix_h5_cooler' 'matrix.$matrix_h5_cooler.ext' &&
10 --matrix '$matrix_h5_cooler' 10
11 #if $precomputedZscore_conditional.precomputedZscore_selector == 'precomputed':
12 ln -s '$precomputedZscore_conditional.scoreFile' 'prefix_tad_score.bm' &&
13 ln -s '$precomputedZscore_conditional.zscoreMatrix' 'prefix_zscore_matrix.h5' &&
14 #end if
15 @BINARY@
16 --matrix 'matrix.$matrix_h5_cooler.ext'
11 17
12 --delta $delta 18 --delta $delta
13 19
14 #if $minBoundaryDistance: 20 #if $minBoundaryDistance:
15 --minBoundaryDistance $minBoundaryDistance 21 --minBoundaryDistance $minBoundaryDistance
16 #end if 22 #end if
17 --minDepth $minDepth 23
18 --maxDepth $maxDepth 24 #if $precomputedZscore_conditional.precomputedZscore_selector == 'scratch':
19 --step $step 25 --minDepth $precomputedZscore_conditional.minDepth
26 --maxDepth $precomputedZscore_conditional.maxDepth
27 --step $precomputedZscore_conditional.step
28 #elif $precomputedZscore_conditional.precomputedZscore_selector == 'precomputed':
29 --TAD_sep_score_prefix prefix
30 #end if
20 #if $multiple_comparison_conditional.multiple_comparison_selector == 'fdr': 31 #if $multiple_comparison_conditional.multiple_comparison_selector == 'fdr':
21 --correctForMultipleTesting fdr 32 --correctForMultipleTesting fdr
22 --threshold $multiple_comparison_conditional.threshold 33 --threshold $multiple_comparison_conditional.threshold
23 #elif $multiple_comparison_conditional.multiple_comparison_selector == 'bonferroni': 34 #elif $multiple_comparison_conditional.multiple_comparison_selector == 'bonferroni':
24 --correctForMultipleTesting bonferroni 35 --correctForMultipleTesting bonferroni
27 --multipleComparisons None 38 --multipleComparisons None
28 #end if 39 #end if
29 40
30 --numberOfProcessors @THREADS@ 41 --numberOfProcessors @THREADS@
31 --outPrefix galaxy_tad_prefix 42 --outPrefix galaxy_tad_prefix
43 #if $chromosomes:
44 --chromosomes #echo "' '".join([ "'%s'" % $chrom.chromosome for $chrom in $chromosomes ])#
45 #end if
32 ]]></command> 46 ]]></command>
33 <inputs> 47 <inputs>
34 <expand macro='matrix_h5_cooler_macro' /> 48 <expand macro='matrix_h5_cooler_macro' />
35 49
36 <param argument="--minDepth" type="integer" value="40000" 50 <conditional name='precomputedZscore_conditional'>
37 label="Minimum window length (in bp) to be considered to the left and to the right of each Hi-C bin." 51 <param name='precomputedZscore_selector' type="select" label="Compute from scratch or use precomputed data">
38 help="This number should be at least 3 times as large as the bin size of the Hi-C matrix."/> 52 <option value='scratch' selected='True'>From scratch</option>
39 <param argument="--maxDepth" type="integer" value="100000" 53 <option value='precomputed'>Precomputed z-score matrix</option>
40 label="Maximum window length (in bp) to be considered to the left and to the right of each Hi-C bin." 54 </param>
41 help="This number should around 6-10 times as large as the bin size of the Hi-C matrix."/> 55 <when value='scratch'>
42 <param argument="--step" type="integer" value="10000" 56
43 label="Step size when moving from minDepth to maxDepth" 57 <param argument="--minDepth" type="integer" value="5000"
44 help="The step size grows exponentially as maxDeph + (step * int(x)**1.5) for x in [0, 1, ...] 58 label="Minimum window length (in bp) to be considered to the left and to the right of each Hi-C bin."
45 until it reaches maxDepth. For example, selecting step=10,000, minDepth=20,000 59 help="This number should be at least 3 times as large as the bin size of the Hi-C matrix."/>
46 and maxDepth=150,000 will compute TAD-scores for window sizes: 60 <param argument="--maxDepth" type="integer" value="10000"
47 20,000, 30,000, 40,000, 70,000 and 100,000"/> 61 label="Maximum window length (in bp) to be considered to the left and to the right of each Hi-C bin."
62 help="This number should around 6-10 times as large as the bin size of the Hi-C matrix."/>
63 <param argument="--step" type="integer" value="10000"
64 label="Step size when moving from minDepth to maxDepth"
65 help="The step size grows exponentially as maxDeph + (step * int(x)**1.5) for x in [0, 1, ...]
66 until it reaches maxDepth. For example, selecting step=10,000, minDepth=20,000
67 and maxDepth=150,000 will compute TAD-scores for window sizes:
68 20,000, 30,000, 40,000, 70,000 and 100,000"/>
69 </when>
70 <when value='precomputed'>
71 <param name="scoreFile" type="data" format='bedgraph' label="Precomputed TAD score file (bm)"/>
72 <param name="zscoreMatrix" type="data" format='h5' label="Precomputed z-score matrix"/>
73 </when>
74 </conditional>
75
48 <conditional name="multiple_comparison_conditional"> 76 <conditional name="multiple_comparison_conditional">
49 <param name="multiple_comparison_selector" type="select" label="Multiple Testing Corrections" > 77 <param name="multiple_comparison_selector" type="select" label="Multiple Testing Corrections" >
50 <option value="fdr" selected="True">False discovery rate</option> 78 <option value="fdr" selected="True">False discovery rate</option>
51 <option value="bonferroni">Bonferroni correction</option> 79 <option value="bonferroni">Bonferroni correction</option>
52 <option value="None">No correction</option> 80 <option value="None">No correction</option>
69 can be given."/> 97 can be given."/>
70 98
71 <param argument="--minBoundaryDistance" type="integer" value="" optional="True" 99 <param argument="--minBoundaryDistance" type="integer" value="" optional="True"
72 label="Minimum distance between boundaries (in bp)." 100 label="Minimum distance between boundaries (in bp)."
73 help="This parameter can be used to reduce spurious boundaries caused by noise. "/> 101 help="This parameter can be used to reduce spurious boundaries caused by noise. "/>
74 102 <repeat name="chromosomes" title="List of chromosomes to be included in the correlation" min="0">
103 <param name="chromosome" type="text" label='chromosome (one per field)'>
104 <validator type="empty_field" />
105 </param>
106 </repeat>
75 </inputs> 107 </inputs>
76 <outputs> 108 <outputs>
77 109
78 <data name="boundaries" from_work_dir="galaxy_tad_prefix_boundaries.bed" format="bed" 110 <data name="boundaries" from_work_dir="galaxy_tad_prefix_boundaries.bed" format="bed"
79 label="${tool.name} on ${on_string}: Boundary positions" /> 111 label="${tool.name} on ${on_string}: Boundary positions" />
84 label="${tool.name} on ${on_string}: TAD domains" /> 116 label="${tool.name} on ${on_string}: TAD domains" />
85 <data name="boundaries_bin" from_work_dir="galaxy_tad_prefix_boundaries.gff" 117 <data name="boundaries_bin" from_work_dir="galaxy_tad_prefix_boundaries.gff"
86 format="gff" label="${tool.name} on ${on_string}: Boundary information plus score" /> 118 format="gff" label="${tool.name} on ${on_string}: Boundary information plus score" />
87 119
88 <data name="tad_score" from_work_dir="galaxy_tad_prefix_tad_score.bm" 120 <data name="tad_score" from_work_dir="galaxy_tad_prefix_tad_score.bm"
89 format="bedgraph" label="${tool.name} on ${on_string}: TAD information in bm file" /> 121 format="bedgraph" label="${tool.name} on ${on_string}: TAD information in bm file" >
122 <filter>precomputedZscore_conditional.precomputedZscore_selector == 'scratch'</filter>
123
124 </data>
90 125
91 <data name="matrix_output" from_work_dir="galaxy_tad_prefix_zscore_matrix.h5" 126 <data name="matrix_output" from_work_dir="galaxy_tad_prefix_zscore_matrix.h5"
92 format="h5" label="${tool.name} on ${on_string}: Z-score matrix in h5" /> 127 format="h5" label="${tool.name} on ${on_string}: Z-score matrix in h5">
128 <filter>precomputedZscore_conditional.precomputedZscore_selector == 'scratch'</filter>
129 </data>
93 </outputs> 130 </outputs>
94 <tests> 131 <tests>
95 <test> 132 <test>
96 <param name="matrix_h5_cooler" value="small_test_matrix.h5"/> 133 <param name="matrix_h5_cooler" value="small_test_matrix.h5"/>
97 134 <conditional name="precomputedZscore_conditional">
98 <param name="minDepth" value="60000"/> 135 <param name="precomputedZscore_selector" value="scratch"/>
99 <param name="maxDepth" value="180000"/> 136 <param name="minDepth" value="15000"/>
100 <param name="step" value="20000"/> 137 <param name="maxDepth" value="30000"/>
101 <param name="minBoundaryDistance" value="20000" /> 138 <param name="step" value="15000"/>
139 </conditional>
140
141 <param name="minBoundaryDistance" value="5000" />
102 <conditional name="multiple_comparison_conditional"> 142 <conditional name="multiple_comparison_conditional">
103 <param name="multiple_comparison_selector" value="fdr"/> 143 <param name="multiple_comparison_selector" value="fdr"/>
104 <param name="threshold" value="0.1" /> 144 <param name="threshold" value="0.8" />
105 </conditional> 145 </conditional>
106 <output name="boundaries" file="find_TADs/multiFDR_boundaries.bed" ftype="bed" compare="sim_size" delta="35000" /> 146 <output name="boundaries" file="find_TADs/multiFDR_boundaries.bed" ftype="bed" compare="sim_size" delta="35000" />
107 <output name="boundaries_bin" file="find_TADs/multiFDR_boundaries.gff" ftype="gff" compare="sim_size" delta="35000" /> 147 <output name="boundaries_bin" file="find_TADs/multiFDR_boundaries.gff" ftype="gff" compare="sim_size" delta="35000" />
108 <output name="domains" file="find_TADs/multiFDR_domains.bed" ftype="bed" compare="sim_size" delta="35000" /> 148 <output name="domains" file="find_TADs/multiFDR_domains.bed" ftype="bed" compare="sim_size" delta="35000" />
109 <output name="score" file="find_TADs/multiFDR_score.bedgraph" ftype="bedgraph" compare="sim_size" delta="35000" /> 149 <output name="score" file="find_TADs/multiFDR_score.bedgraph" ftype="bedgraph" compare="sim_size" delta="35000" />
110 <output name="tad_score" file="find_TADs/multiFDR_tad_score.bm" ftype="bedgraph" compare="sim_size" delta="35000" /> 150 <output name="tad_score" file="find_TADs/multiFDR_tad_score.bm" ftype="bedgraph" compare="sim_size" delta="35000" />
151 <output name="matrix_output" ftype="h5">
152 <assert_contents>
153 <has_h5_keys keys='intervals,matrix'/>
154 </assert_contents>
155 </output>
156
157 </test>
158 <test>
159 <param name="matrix_h5_cooler" value="small_test_matrix.h5"/>
160 <conditional name="precomputedZscore_conditional">
161 <param name="precomputedZscore_selector" value="precomputed"/>
162 <param name="scoreFile" value="find_TADs/multiFDR_tad_score.bm" />
163 <param name="zscoreMatrix" value="find_TADs/multiFDR_zscore_matrix.h5"/>
164 </conditional>
165 <param name="minBoundaryDistance" value="5000" />
166 <conditional name="multiple_comparison_conditional">
167 <param name="multiple_comparison_selector" value="fdr"/>
168 <param name="threshold" value="0.1" />
169 </conditional>
170 <output name="boundaries" file="find_TADs/multiFDR_boundaries.bed" ftype="bed" compare="sim_size" delta="35000" />
171 <output name="boundaries_bin" file="find_TADs/multiFDR_boundaries.gff" ftype="gff" compare="sim_size" delta="35000" />
172 <output name="domains" file="find_TADs/multiFDR_domains.bed" ftype="bed" compare="sim_size" delta="35000" />
173 <output name="score" file="find_TADs/multiFDR_score.bedgraph" ftype="bedgraph" compare="sim_size" delta="35000" />
111 </test> 174 </test>
112 </tests> 175 </tests>
113 <help><![CDATA[ 176 <help><![CDATA[
114 Calculate Topologic Associated Domains 177 Calculate Topologic Associated Domains
115 ====================================== 178 ======================================
150 213
151 - TAD boundaries positions as a BED file and TAD separation score. 214 - TAD boundaries positions as a BED file and TAD separation score.
152 - TAD boundaries positions with delta, p-value and TAD separation score as GFF. 215 - TAD boundaries positions with delta, p-value and TAD separation score as GFF.
153 - TAD domains as a BED file. 216 - TAD domains as a BED file.
154 - TAD seperation score as bigwig (bw), bedgraph and numpy array (npz) format. These files can be used to plot the so-called TAD insulation score or TAD separation score along the genome or at specific regions. This score is much more reliable across samples than the number of TADs or the TADs width that can vary depending on the sequencing depth because of the lack of information at certain bins, and depending on the parameters used with this tool. 217 - TAD seperation score as bigwig (bw), bedgraph and numpy array (npz) format. These files can be used to plot the so-called TAD insulation score or TAD separation score along the genome or at specific regions. This score is much more reliable across samples than the number of TADs or the TADs width that can vary depending on the sequencing depth because of the lack of information at certain bins, and depending on the parameters used with this tool.
155 - Matrix with multi-scale TAD scores as a bed-matrix (bm) file that can be plotted inside ``hicPlotTADs`` to nicely display TAD insulation score alongside Hi-C heatmap and other datasets. 218 - Matrix with multi-scale TAD scores as a bed-matrix (bm) file that can be plotted inside ``pyGenomeTracks`` to nicely display TAD insulation score alongside Hi-C heatmap and other datasets.
156 - Z-score matrix in h5 format that is useful to quickly test the --thresholdComparisons, --delta and --correctForMultipleTesting parameters by using the --TAD_sep_score_prefix option pointing to this zscore_matrix.h5 file (will be added in a future update). 219 - Z-score matrix in h5 format that is useful to quickly test the --thresholdComparisons, --delta and --correctForMultipleTesting parameters by using the --TAD_sep_score_prefix option pointing to this zscore_matrix.h5 file (will be added in a future update).
157 220
158 _________________ 221 _________________
159 222
160 Usage hints 223 Usage hints
161 ----------- 224 -----------
162 225
163 It is mandatory to test multiple parameters of TAD calling with **hicFindTADs** before making conclusions about the number of TADs in a given sample or before comparing TAD calling between multiple conditions. In order to compare numerous TAD calling parameters at once, it is recommended to use ``hicPlotTADs``, below you can find a plot where multiple TAD calling parameters are displayed for *Drosophila melanogaster* embryos: 226 It is mandatory to test multiple parameters of TAD calling with **hicFindTADs** before making conclusions about the number of TADs in a given sample or before comparing TAD calling between multiple conditions. In order to compare numerous TAD calling parameters at once, it is recommended to use ``pyGenomeTracks``, below you can find a plot where multiple TAD calling parameters are displayed for *Drosophila melanogaster* embryos:
164 227
165 .. image:: $PATH_TO_IMAGES/hicFindTADs_TAD_calling_comparison.png 228 .. image:: $PATH_TO_IMAGES/hicFindTADs_TAD_calling_comparison.png
166 :width: 65 % 229 :width: 65 %
167 230
168 We can see that the fourth set of **hicFindTADs** parameters with a threshold of 0.001 gives the best results in terms of TAD calling compared to the corrected Hi-C counts distribution and compared to the enrichment of H3K36me3, which is known to be enriched at TAD boundaries in *Drosophila melanogaster*. 231 We can see that the fourth set of **hicFindTADs** parameters with a threshold of 0.001 gives the best results in terms of TAD calling compared to the corrected Hi-C counts distribution and compared to the enrichment of H3K36me3, which is known to be enriched at TAD boundaries in *Drosophila melanogaster*.