comparison diffbind.xml @ 13:1de83981d43c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit 13485bed6a57ec4a34cab4ec6bb8b36d219e3610
author iuc
date Wed, 30 May 2018 12:25:42 -0400
parents fa56d93f7980
children c97a786e8fb5
comparison
equal deleted inserted replaced
12:fa56d93f7980 13:1de83981d43c
1 <tool id="diffbind" name="DiffBind" version="2.6.6.2"> 1 <tool id="diffbind" name="DiffBind" version="2.6.6.3">
2 <description> differential binding analysis of ChIP-Seq peak data</description> 2 <description> differential binding analysis of ChIP-Seq peak data</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="2.6.6">bioconductor-diffbind</requirement> 4 <requirement type="package" version="2.6.6">bioconductor-diffbind</requirement>
5 <requirement type="package" version="1.20.0">r-getopt</requirement> 5 <requirement type="package" version="1.20.0">r-getopt</requirement>
6 <requirement type="package" version="0.2.15">r-rjson</requirement> 6 <requirement type="package" version="0.2.15">r-rjson</requirement>
104 ]]> 104 ]]>
105 </command> 105 </command>
106 <inputs> 106 <inputs>
107 <repeat name="rep_group" title="Group" min="2" max="2" default="2"> 107 <repeat name="rep_group" title="Group" min="2" max="2" default="2">
108 <param name="groupName" type="text" label="Name" 108 <param name="groupName" type="text" label="Name"
109 help="Name for the Group that the peak and BAM files belong to e.g. Resistant/Responsive (two Groups in total must be specified for DiffBind). NOTE: Please only use letters, numbers or underscores."> 109 help="Name for the Group that the peak and BAM files belong to e.g. Resistant/Responsive (two Groups must be specified for DiffBind). NOTE: Please only use letters, numbers or underscores.">
110 <sanitizer> 110 <sanitizer>
111 <valid initial="string.letters,string.digits"><add value="_" /></valid> 111 <valid initial="string.letters,string.digits"><add value="_" /></valid>
112 </sanitizer> 112 </sanitizer>
113 <validator type="empty_field" /> 113 <validator type="empty_field" />
114 </param> 114 </param>
131 <param name="th" type="float" value="0.05" min="0" max="1" label="FDR Threshold" help="Significance threshold; all sites with FDR less than or equal to this value will be included in the output. A value of 1 will output all binding sites. Default: 0.05"/> 131 <param name="th" type="float" value="0.05" min="0" max="1" label="FDR Threshold" help="Significance threshold; all sites with FDR less than or equal to this value will be included in the output. A value of 1 will output all binding sites. Default: 0.05"/>
132 132
133 <!-- Output Options --> 133 <!-- Output Options -->
134 <section name="out" expanded="false" title="Output Options"> 134 <section name="out" expanded="false" title="Output Options">
135 <param name="format" type="select" label="Output Format"> 135 <param name="format" type="select" label="Output Format">
136 <option value="interval" selected="True">Interval</option>
136 <option value="bed">BED</option> 137 <option value="bed">BED</option>
137 <option value="gff">GFF</option> 138 <option value="tabular">Tabular (tab-separated)</option>
138 <option value="wig">WIG</option>
139 </param> 139 </param>
140 <param name="pdf" type="boolean" truevalue="True" falsevalue="" checked="False" label="Visualising the analysis results" help="output an additional PDF file" /> 140 <param name="pdf" type="boolean" truevalue="True" falsevalue="" checked="False" label="Visualising the analysis results" help="output an additional PDF file" />
141 <param name="binding_matrix" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output binding affinity matrix?" help="Output a table of the binding scores" /> 141 <param name="binding_matrix" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output binding affinity matrix?" help="Output a table of the binding scores" />
142 <param name="rdata" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output RData file?" help="Output all the data used by R to construct the plots and tables, can be loaded into R. Default: No"/> 142 <param name="rdata" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output RData file?" help="Output all the data used by R to construct the plots and tables, can be loaded into R. Default: No"/>
143 <param name="rscript" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used will be provided as a text file in the output. Default: No"/> 143 <param name="rscript" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used will be provided as a text file in the output. Default: No"/>
144 <param name="analysis_info" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output analysis info?" help="If this option is set to Yes, information from the dba.count and dba.analyze commmands will be output in a text file. Default: No"/> 144 <param name="analysis_info" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output analysis info?" help="If this option is set to Yes, information from the dba.count and dba.analyze commmands will be output in a text file. Default: No"/>
145 </section> 145 </section>
146 </inputs> 146 </inputs>
147 147
148 <outputs> 148 <outputs>
149 <data name="outfile" format="tabular" label="${tool.name} on ${on_string}: Differentially bound sites" /> 149 <data name="outfile" format="interval" label="${tool.name} on ${on_string}: Differentially bound sites">
150 <change_format>
151 <when input="out.format" value="bed" format="bed" />
152 <when input="out.format" value="tabular" format="tabular" />
153 </change_format>
154 </data>
150 <data name="plots" format="pdf" label="${tool.name} on ${on_string}: Plots"> 155 <data name="plots" format="pdf" label="${tool.name} on ${on_string}: Plots">
151 <filter>out['pdf']</filter> 156 <filter>out['pdf']</filter>
152 </data> 157 </data>
153 <data name="binding_matrix" format="tabular" from_work_dir="bmatrix.tab" label="${tool.name} on ${on_string}: Binding matrix"> 158 <data name="binding_matrix" format="tabular" from_work_dir="bmatrix.tab" label="${tool.name} on ${on_string}: Binding matrix">
154 <filter>out['binding_matrix']</filter> 159 <filter>out['binding_matrix']</filter>
163 <filter>out['analysis_info']</filter> 168 <filter>out['analysis_info']</filter>
164 </data> 169 </data>
165 </outputs> 170 </outputs>
166 171
167 <tests> 172 <tests>
173 <!-- Ensure outputs work -->
168 <test expect_num_outputs="6"> 174 <test expect_num_outputs="6">
169 <repeat name="rep_group"> 175 <repeat name="rep_group">
170 <param name="groupName" value="Resistant"/> 176 <param name="groupName" value="Resistant"/>
171 <param name="peaks" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/> 177 <param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/>
172 <param name="bamreads" ftype="bam" value="BT474_ER_1.bam,BT474_ER_2.bam" /> 178 <param name="bamreads" ftype="bam" value="BT474_ER_1.bam,BT474_ER_2.bam" />
173 </repeat> 179 </repeat>
174 <repeat name="rep_group"> 180 <repeat name="rep_group">
175 <param name="groupName" value="Responsive"/> 181 <param name="groupName" value="Responsive"/>
176 <param name="peaks" value="MCF7_ER_1.bed.gz,MCF7_ER_2.bed.gz"/> 182 <param name="peaks" ftype="bed" value="MCF7_ER_1.bed.gz,MCF7_ER_2.bed.gz"/>
177 <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" /> 183 <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" />
178 </repeat> 184 </repeat>
179 <param name="scorecol" value="5" /> 185 <param name="scorecol" value="5" />
186 <param name="format" value="interval"/>
180 <param name="pdf" value="True" /> 187 <param name="pdf" value="True" />
181 <param name="binding_matrix" value="True" /> 188 <param name="binding_matrix" value="True" />
182 <param name="rdata" value="True" /> 189 <param name="rdata" value="True" />
183 <param name="rscript" value="True"/> 190 <param name="rscript" value="True"/>
184 <param name="analysis_info" value="True"/> 191 <param name="analysis_info" value="True"/>
185 <output name="outfile" value="out_diffbind.tab" /> 192 <output name="outfile" ftype="interval" value="out_diffbind.interval" />
186 <output name="plots" value="out_plots.pdf" compare="sim_size" /> 193 <output name="plots" value="out_plots.pdf" compare="sim_size" />
187 <output name="binding_matrix" value="out_binding.matrix" /> 194 <output name="binding_matrix" value="out_binding_matrix.tab" />
188 <output name="rdata" value="DiffBind_analysis.RData" compare="sim_size"/> 195 <output name="rdata" value="DiffBind_analysis.RData" compare="sim_size"/>
189 <output name="rscript" value="out_rscript.txt"/> 196 <output name="rscript">
190 <output name="analysis_info" value="out_analysis_info.txt" compare="sim_size" > 197 <assert_contents>
198 <has_text text="write.table"/>
199 </assert_contents>
200 </output>
201 <output name="analysis_info" compare="sim_size" >
191 <assert_contents> 202 <assert_contents>
192 <has_text text="SessionInfo"/> 203 <has_text text="SessionInfo"/>
193 </assert_contents> 204 </assert_contents>
194 </output> 205 </output>
206 </test>
207 <!-- Ensure BED output works -->
208 <test expect_num_outputs="1">
209 <repeat name="rep_group">
210 <param name="groupName" value="Resistant"/>
211 <param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/>
212 <param name="bamreads" ftype="bam" value="BT474_ER_1.bam,BT474_ER_2.bam" />
213 </repeat>
214 <repeat name="rep_group">
215 <param name="groupName" value="Responsive"/>
216 <param name="peaks" ftype="bed" value="MCF7_ER_1.bed.gz,MCF7_ER_2.bed.gz"/>
217 <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" />
218 </repeat>
219 <param name="scorecol" value="5" />
220 <param name="format" value="bed"/>
221 <output name="outfile" ftype="bed" value="out_diffbind.bed" />
222 </test>
223 <!-- Ensure tabular output works -->
224 <test expect_num_outputs="1">
225 <repeat name="rep_group">
226 <param name="groupName" value="Resistant"/>
227 <param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/>
228 <param name="bamreads" ftype="bam" value="BT474_ER_1.bam,BT474_ER_2.bam" />
229 </repeat>
230 <repeat name="rep_group">
231 <param name="groupName" value="Responsive"/>
232 <param name="peaks" ftype="bed" value="MCF7_ER_1.bed.gz,MCF7_ER_2.bed.gz"/>
233 <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" />
234 </repeat>
235 <param name="scorecol" value="5" />
236 <param name="format" value="tabular"/>
237 <output name="outfile" ftype="tabular" file="out_diffbind.tab" />
195 </test> 238 </test>
196 </tests> 239 </tests>
197 <help><![CDATA[ 240 <help><![CDATA[
198 241
199 .. class:: infomark 242 .. class:: infomark
281 324
282 **Outputs** 325 **Outputs**
283 326
284 This tool outputs 327 This tool outputs
285 328
286 * a table of differentially bound sites 329 * a table of differentially bound sites in Interval, BED or Tabular 0-based format
287 330
288 Optionally, under **Output Options** you can choose to output 331 Optionally, under **Output Options** you can choose to output
289 332
290 * a PDF of plots (Heatmap, PCA, MA, Volcano, Boxplots) 333 * a PDF of plots (Heatmap, PCA, MA, Volcano, Boxplots)
291 * a binding affinity matrix 334 * a binding affinity matrix
293 * an RData file of the R objects generated 336 * an RData file of the R objects generated
294 * a text file with information on the analysis (number of Intervals, FriP scores, method used) 337 * a text file with information on the analysis (number of Intervals, FriP scores, method used)
295 338
296 **Differentially Bound Sites** 339 **Differentially Bound Sites**
297 340
341 The default output is Interval format, for information on Interval format see here_. Alternatively, you can choose to output BED or Tabular 0-based format as below. For an explanation of the 0-based and 1-based coordinate systems see this `Biostars post`_.
342
343 Example - **Interval format**:
344
345 ====== ====== ====== ======== ===== ====== ===========================================
346 Chrom Start End Name Score Strand **Comment**
347 ====== ====== ====== ======== ===== ====== ===========================================
348 chr18 394599 396513 DiffBind 0 \. 1914|7.15|5.55|7.89|-2.35|7.06e-24|9.84e-21
349 chr18 111566 112005 DiffBind 0 \. 439|5.71|6.53|3.63|2.89|1.27e-08|8.88e-06
350 chr18 346463 347342 DiffBind 0 \. 879|5|5.77|3.24|2.52|6.51e-06|0.00303
351 chr18 399013 400382 DiffBind 0 \. 1369|7.62|7|8.05|-1.04|1.04e-05|0.00364
352 chr18 371109 372102 DiffBind 0 \. 993|4.63|3.07|5.36|-2.3|8.1e-05|0.0226
353 ====== ====== ====== ======== ===== ====== ===========================================
354
355 Columns contain the following data:
356
357 * **Chrom**: Chromosome name
358 * **Start**: Start position of site
359 * **End**: End position of site
360 * **Score**: 0
361 * **Name**: DiffBind
362 * **Strand**: Strand
363 * **Comment**: The pipe ("|") separated values in this column correspond to:
364
365 * *width*: Length of site
366 * *Conc*: Mean read concentration over all the samples (the default calculation uses log2 normalized ChIP read counts with control read counts subtracted)
367 * *Conc_Group1*: Mean concentration over the first group (e.g. Responsive)
368 * *Conc_Group2*: Mean concentration over second group (e.g. Resistant)
369 * *Fold*: Fold shows the difference in mean concentrations between the two groups (e.g. Responsive - Resistant), with a positive value indicating increased binding affinity in the first group and a negative value indicating increased binding affinity in the second group.
370 * *p.value*: P-value confidence measure for identifying these sites as differentially bound
371 * *FDR*: a multiple testing corrected FDR p-value
372
373 Example - **BED format**:
374
375 ===== ====== ====== ======== ===== ======
376 Chrom Start End Name Score Strand
377 ===== ====== ====== ======== ===== ======
378 chr18 394599 396513 DiffBind 0 \.
379 chr18 111566 112005 DiffBind 0 \.
380 chr18 346463 347342 DiffBind 0 \.
381 chr18 399013 400382 DiffBind 0 \.
382 chr18 371109 372102 DiffBind 0 \.
383 ===== ====== ====== ======== ===== ======
384
385 Example - **Tabular format**:
386
387 ===== ====== ====== ======== ===== ====== ==== =============== ============== ===== ======== ========
388 Chrom Start End Name Score Strand Conc Conc_Responsive Conc_Resistant Fold p.value FDR
389 ===== ====== ====== ======== ===== ====== ==== =============== ============== ===== ======== ========
390 chr18 394599 396513 DiffBind 0 \. 7.15 5.55 7.89 -2.35 7.06E-24 9.84E-21
391 chr18 111566 112005 DiffBind 0 \. 5.71 6.53 3.63 2.89 1.27E-08 8.88E-06
392 chr18 346463 347342 DiffBind 0 \. 5 5.77 3.24 2.52 6.51E-06 0.00303
393 chr18 399013 400382 DiffBind 0 \. 7.62 7 8.05 -1.04 1.04E-05 0.00364
394 chr18 371109 372102 DiffBind 0 \. 4.63 3.07 5.36 -2.3 8.10E-05 0.0226
395 ===== ====== ====== ======== ===== ====== ==== =============== ============== ===== ======== ========
396
397
398 **Binding Affinity Matrix**
399
400 The final result of counting is a binding affinity matrix containing a (normalized) read count for each sample at every potential binding site. With this matrix, the samples can be re-clustered using affinity, rather than occupancy, data. The binding affinity matrix can be used for QC plotting as well as for subsequent
401 differential analysis. Note that this output is a tabular 0-based format.
402
298 Example: 403 Example:
299 404
300 ======== ====== ====== ===== ====== ===== =============== ============== ====== ======== ========
301 seqnames start end width strand Conc Conc_Responsive Conc_Resistant Fold p.value **FDR**
302 ======== ====== ====== ===== ====== ===== =============== ============== ====== ======== ========
303 chr18 394600 396513 1914 \* 7.15 5.55 7.89 -2.35 7.06e-24 9.84e-21
304 chr18 111567 112005 439 \* 5.71 6.53 3.63 2.89 1.27e-08 8.88e-06
305 chr18 346464 347342 879 \* 5 5.77 3.24 2.52 6.51e-06 0.00303
306 chr18 399014 400382 1369 \* 7.62 7 8.05 -1.04 1.04e-05 0.00364
307 chr18 371110 372102 993 \* 4.63 3.07 5.36 -2.3 8.1e-05 0.0226
308 ======== ====== ====== ===== ====== ===== =============== ============== ====== ======== ========
309
310 Columns contain the following data:
311
312 * **seqnames**: Chromosome name
313 * **start**: Start position of site
314 * **end**: End position of site
315 * **width**: Length of site
316 * **strand**: Strand
317 * **Conc**: Mean read concentration over all the samples (the default calculation uses log2 normalized ChIP read counts with control read counts subtracted)
318 * **Responsive**: Mean concentration over the first (e.g. Responsive) group
319 * **Resistant**: Mean concentration over second (e.g. Resistant) group
320 * **Fold**: Fold shows the difference in mean concentrations between the two groups (e.g. Responsive - Resistant), with a positive value indicating increased binding affinity in the first group and a negative value indicating increased binding affinity in the second group.
321 * **p.value**: P-value confidence measure for identifying these sites as differentially bound
322 * **FDR**: a multiple testing corrected FDR p-value
323
324
325 **Binding Affinity Matrix**
326
327 The final result of counting is a binding affinity matrix containing a (normalized) read count for each sample at every potential binding site. With this matrix, the samples can be re-clustered using affinity, rather than occupancy, data. The binding affinity matrix can be used for QC plotting as well as for subsequent
328 differential analysis.
329
330 Example:
331
332 ===== ====== ====== ========= ========= ========== ========== 405 ===== ====== ====== ========= ========= ========== ==========
333 CHR START END MCF7_ER_1 MCF7_ER_2 BT474_ER_1 BT474_ER_2 406 Chrom Start End MCF7_ER_1 MCF7_ER_2 BT474_ER_1 BT474_ER_2
334 ===== ====== ====== ========= ========= ========== ========== 407 ===== ====== ====== ========= ========= ========== ==========
335 chr18 111567 112005 137.6152 59.87837 29.41393 19.95945 408 chr18 111567 112005 137.6152 59.87837 29.41393 19.95945
336 chr18 189223 189652 19.95945 12.60597 11.55547 23.11095 409 chr18 189223 189652 19.95945 12.60597 11.55547 23.11095
337 chr18 215232 216063 11.55547 15.75746 31.51493 72.48434 410 chr18 215232 216063 11.55547 15.75746 31.51493 72.48434
338 chr18 311530 312172 17.85846 11.55547 54.62588 43.07040 411 chr18 311530 312172 17.85846 11.55547 54.62588 43.07040
424 497
425 .. _DiffBind: https://bioconductor.org/packages/release/bioc/html/DiffBind.html 498 .. _DiffBind: https://bioconductor.org/packages/release/bioc/html/DiffBind.html
426 .. _`Bioconductor package`: https://bioconductor.org/packages/release/bioc/html/DiffBind.html 499 .. _`Bioconductor package`: https://bioconductor.org/packages/release/bioc/html/DiffBind.html
427 .. _`DiffBind User Guide`: https://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf 500 .. _`DiffBind User Guide`: https://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf
428 .. _`Bioconductor post`: https://support.bioconductor.org/p/69924/ 501 .. _`Bioconductor post`: https://support.bioconductor.org/p/69924/
502 .. _here: https://galaxyproject.org/learn/datatypes/#interval
503 .. _`Biostars post`: https://www.biostars.org/p/84686/
429 504
430 ]]> 505 ]]>
431 </help> 506 </help>
432 <citations> 507 <citations>
433 <citation type="doi">doi:10.1038/nature10730</citation> 508 <citation type="doi">doi:10.1038/nature10730</citation>