comparison plot.xml @ 0:356839cd89d2 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/checkm commit 2a3b068a98bf0e913dc03e0d5c2182cfd102cf27
author iuc
date Fri, 29 Jul 2022 20:37:57 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:356839cd89d2
1 <tool id="checkm_plot" name="CheckM plot" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>
3 for assessing the quality of genome bins
4 </description>
5 <macros>
6 <import>macros.xml</import>
7 <xml name="gff_inputs">
8 <param name="gff" type="data_collection" collection_type="list" format="gff" label="Gene feature files for each bin"/>
9 </xml>
10 <token name="@PLOT_GFF_INPUTS@"><![CDATA[
11 #for $i in $plot.gff
12 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($i.element_identifier))
13 mkdir -p 'inputs/bins/${identifier}' &&
14 ln -s '$i' 'inputs/bins/${identifier}/genes.gff' &&
15 #end for
16 ]]></token>
17 <xml name="tetra_profile">
18 <param name="tetra_profile" type="data" format="tabular" multiple="true" label="Tetranucleotide profiles for each bin" help="This can be generated using the tetra tool"/>
19 </xml>
20 <xml name="dist_value">
21 <param argument="--dist_value" type="integer" min="0" max="100" value="" label="Reference distribution(s) to plot" />
22 </xml>
23 <xml name="gc_params">
24 <param argument="--gc_window_size" type="integer" min="0" value="5000" label="Window size used to calculate GC histogram" />
25 <param argument="--gc_bin_width" type="float" min="0" value="0.01" label="Width of GC bars in histogram" />
26 </xml>
27 <xml name="cd_params">
28 <param argument="--cd_window_size" type="integer" min="0" value="10000" label="Window size used to calculate CD histogram" />
29 <param argument="--cd_bin_width" type="float" min="0" value="0.01" label="Width of CD bars in histogram" />
30 </xml>
31 <xml name="td_params">
32 <param argument="--td_window_size" type="integer" min="0" value="5000" label="Window size used to calculate TD histogram" />
33 <param argument="--td_bin_width" type="float" min="0" value="0.01" label="Width of TD bars in histogram" />
34 </xml>
35 <xml name="fig_padding">
36 <param argument="--fig_padding" type="float" min="0" value="0.2" label="White space to place around figure" help="In inches"/>
37 </xml>
38 <xml name="gc_bias_plot">
39 <when value="gc_bias_plot">
40 <param name="bam_file" type="data" format="bam" label="BAM file to interrogate for coverage information" help="The file should be sorted"/>
41 <param argument="--window_size" type="integer" min="0" value="5000" label="Window size used to calculate plot statistics" />
42 <param argument="--all_reads" type="boolean" truevalue="--all_reads" falsevalue="" checked="false" label="Use all reads to estimate coverage instead of just those in proper pairs?" />
43 <param argument="--min_align" type="float" min="0" max="1" value="0.98" label="Minimum alignment length as percentage of read length"/>
44 <param argument="--max_edit_dist" type="float" min="0" max="1" value="0.02" label="Maximum edit distance as percentage of read length"/>
45 </when>
46 </xml>
47 </macros>
48 <expand macro="biotools"/>
49 <expand macro="requirements">
50 <requirement type="package" version="1.15.1">samtools</requirement>
51 </expand>
52 <expand macro="version"/>
53 <command detect_errors="exit_code"><![CDATA[
54 @BIN_INPUTS@
55
56 #if $plot.command == 'gc_plot'
57 checkm gc_plot
58 'bins'
59 'output'
60 $plot.dist_value
61 --extension 'fasta'
62 --image_type '$image_type'
63 --dpi $dpi
64 --font_size $font_size
65 --width $width
66 --height $height
67
68 #else if $plot.command == 'coding_plot'
69 @PLOT_GFF_INPUTS@
70 checkm coding_plot
71 'inputs'
72 'bins'
73 'output'
74 $plot.dist_value
75 --extension 'fasta'
76 --image_type '$image_type'
77 --dpi $dpi
78 --font_size $font_size
79 --width $width
80 --height $height
81 --cd_window_size $plot.cd_window_size
82 --cd_bin_width $plot.cd_bin_width
83
84 #else if $plot.command == 'tetra_plot'
85 @PLOT_GFF_INPUTS@
86 checkm tetra_plot
87 'inputs'
88 'bins'
89 'output'
90 '$tetra_profile'
91 $plot.dist_value
92 --extension 'fasta'
93 --image_type '$image_type'
94 --dpi $dpi
95 --font_size $font_size
96 --width $width
97 --height $height
98 --td_window_size $plot.td_window_size
99 --td_bin_width $plot.td_bin_width
100
101 #else if $plot.command == 'dist_plot'
102 @PLOT_GFF_INPUTS@
103 checkm dist_plot
104 'inputs'
105 'bins'
106 'output'
107 '$tetra_profile'
108 $plot.dist_value
109 --extension 'fasta'
110 --image_type '$image_type'
111 --dpi $dpi
112 --font_size $font_size
113 --width $width
114 --height $height
115 --gc_window_size $plot.gc_window_size
116 --gc_bin_width $plot.gc_bin_width
117 --cd_window_size $plot.cd_window_size
118 --cd_bin_width $plot.cd_bin_width
119 --td_window_size $plot.td_window_size
120 --td_bin_width $plot.td_bin_width
121
122 #else if $plot.command == 'nx_plot'
123 checkm nx_plot
124 'bins'
125 'output'
126 --extension 'fasta'
127 --image_type '$image_type'
128 --dpi $dpi
129 --font_size $font_size
130 --width $width
131 --height $height
132 --step_size $plot.step_size
133
134 #else if $plot.command == 'len_hist'
135 checkm len_hist
136 'bins'
137 'output'
138 --extension 'fasta'
139 --image_type '$image_type'
140 --dpi $dpi
141 --font_size $font_size
142 --width $width
143 --height $height
144
145 #else if $plot.command == 'marker_plot'
146 mkdir -p 'inputs/storage/' &&
147 cp '$marker_gene_stats' 'inputs/storage/marker_gene_stats.tsv' &&
148 cp '$bin_stats_ext' 'inputs/storage/bin_stats_ext.tsv' &&
149 #for $b in $plot.genes_fna
150 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($b.element_identifier))
151 mkdir -p 'inputs/bins/${identifier}' &&
152 cp '$b.file_name' 'inputs/bins/${identifier}/genes.faa' &&
153 #end for
154 checkm marker_plot
155 'inputs'
156 'bins'
157 'output'
158 --extension 'fasta'
159 --image_type '$image_type'
160 --dpi $dpi
161 --font_size $font_size
162 --width $width
163 --height $height
164 --fig_padding $plot.fig_padding
165
166 #else if $plot.command == 'gc_bias_plot'
167 mkdir 'mapping' &&
168 ln -s '$bam_file' 'mapping.bam' &&
169 samtools index 'mapping.bam' 'mapping.bam.bai' &&
170
171 checkm gc_bias_plot
172 'bins'
173 'output'
174 'mapping.bam'
175 --extension 'fasta'
176 --image_type '$image_type'
177 --dpi $dpi
178 --font_size $font_size
179 --width $width
180 --height $height
181 --window_size $plot.window_size
182 $plot.all_reads
183 --min_align $plot.min_align
184 --max_edit_dist $plot.max_edit_dist
185 --threads \${GALAXY_SLOTS:-1}
186 #end if
187 ]]></command>
188 <inputs>
189 <expand macro="bin_inputs"/>
190 <conditional name="plot">
191 <param name="command" type="select" label="Plot to generate">
192 <option value="gc_plot">gc_plot: Create GC histogram and delta-GC plot</option>
193 <option value="coding_plot">Create coding density (CD) histogram and delta-CD plot</option>
194 <option value="tetra_plot">Create tetranucleotide distance (TD) histogram and delta-TD plot</option>
195 <option value="dist_plot">Create image with GC, coding density (CD), and tetranucleotide distance (TD) distribution plots together</option>
196 <option value="nx_plot">Create Nx-plots</option>
197 <option value="len_hist">Sequence length histogram</option>
198 <option value="marker_plot">Plot position of marker genes on sequences</option>
199 <!--<option value="gc_bias_plot">Plot bin coverage as a function of GC</option>-->
200 </param>
201 <when value="gc_plot">
202 <expand macro="dist_value"/>
203 <expand macro="gc_params"/>
204 </when>
205 <when value="coding_plot">
206 <expand macro="gff_inputs"/>
207 <expand macro="dist_value"/>
208 <expand macro="cd_params"/>
209 </when>
210 <when value="tetra_plot">
211 <expand macro="gff_inputs"/>
212 <expand macro="tetra_profile"/>
213 <expand macro="dist_value"/>
214 <expand macro="td_params"/>
215 </when>
216 <when value="dist_plot">
217 <expand macro="gff_inputs"/>
218 <expand macro="tetra_profile"/>
219 <expand macro="dist_value"/>
220 <expand macro="gc_params"/>
221 <expand macro="cd_params"/>
222 <expand macro="td_params"/>
223 </when>
224 <when value="nx_plot">
225 <param argument="--step_size" type="float" min="0" value="0.05" label="x step size for calculating Nx" />
226 </when>
227 <when value="len_hist">
228 <expand macro="fig_padding" />
229 </when>
230 <when value="marker_plot">
231 <param name="genes_fna" type="data_collection" collection_type="list" format="fasta" label="Nucleotide gene sequences for each bin" help="Optional output of the CheckM tree or lineage_wf tools"/>
232 <param name="marker_gene_stats" type="data" format="tabular" label="Marker gene stats" help="Output of the CheckM qa tool or optional output of the lineage_wf or taxonomy_wf tools"/>
233 <param name="bin_stats_ext" type="data" format="tabular" label="Marker gene bin extensive stats" help="Output of the CheckM qa tool or optional output of the lineage_wf or taxonomy_wf tools"/>
234 <expand macro="fig_padding" />
235 </when>
236 </conditional>
237 <param argument="--image_type" type="select" label="Image type">
238 <option value="eps">EPS</option>
239 <option value="pdf">PDF</option>
240 <option value="png" selected="true">PNG</option>
241 <option value="ps">PS</option>
242 <option value="svg">SVG</option>
243 </param>
244 <param argument="--dpi" type="integer" min="0" value="600" label="DPI of output image" />
245 <param argument="--font_size" type="integer" min="0" value="8" label="Font size" />
246 <param argument="--width" type="float" min="0" value="6.5" label="Width of output image" />
247 <param argument="--height" type="float" min="0" value="3.5" label="Height of output image" />
248 </inputs>
249 <outputs>
250 <collection name="gc_plot" type="list" label="${tool.name} on ${on_string}: GC distribution plot">
251 <filter>plot['command'] == 'gc_plot'</filter>
252 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.gc_plots\.(?P&lt;ext&gt;.+)" directory="output/"/>
253 </collection>
254 <collection name="coding_plot" type="list" label="${tool.name} on ${on_string}: Coding density (CD) distribution plot">
255 <filter>plot['command'] == 'coding_plot'</filter>
256 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.coding_density_plots\.(?P&lt;ext&gt;.+)" directory="output/"/>
257 </collection>
258 <collection name="tetra_plot" type="list" label="${tool.name} on ${on_string}: Tetranucleotide distance (TD) distribution plot">
259 <filter>plot['command'] == 'tetra_plot'</filter>
260 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.tetra_dist_plots\.(?P&lt;ext&gt;.+)" directory="output/"/>
261 </collection>
262 <collection name="dist_plot" type="list" label="${tool.name} on ${on_string}: GC, Coding density (CD) and Tetranucleotide distance (TD) distribution plot">
263 <filter>plot['command'] == 'dist_plot'</filter>
264 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.ref_dist_plots\.(?P&lt;ext&gt;.+)" directory="output/"/>
265 </collection>
266 <collection name="nx_plot" type="list" label="${tool.name} on ${on_string}: Nx-plot">
267 <filter>plot['command'] == 'nx_plot'</filter>
268 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.nx_plot\.(?P&lt;ext&gt;.+)" directory="output/"/>
269 </collection>
270 <collection name="len_hist" type="list" label="${tool.name} on ${on_string}: Sequence length histogram">
271 <filter>plot['command'] == 'len_hist'</filter>
272 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.len_hist\.(?P&lt;ext&gt;.+)" directory="output/"/>
273 </collection>
274 <collection name="marker_plot" type="list" label="${tool.name} on ${on_string}: Marker gene position plot">
275 <filter>plot['command'] == 'marker_plot'</filter>
276 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.marker_pos_plot\.(?P&lt;ext&gt;.+)" directory="output/"/>
277 </collection>
278 <collection name="gc_bias_plot" type="list" label="${tool.name} on ${on_string}: Bin coverage as a function of GC">
279 <filter>plot['command'] == 'gc_bias_plot'</filter>
280 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.marker_pos_plot\.(?P&lt;ext&gt;.+)" directory="output/"/>
281 </collection>
282 </outputs>
283 <tests>
284 <test expect_num_outputs="1">
285 <conditional name="bins">
286 <param name="select" value="collection"/>
287 <param name="bins_coll">
288 <collection type="list">
289 <element name="637000110" ftype="fasta" value="637000110.fna"/>
290 </collection>
291 </param>
292 </conditional>
293 <conditional name="plot">
294 <param name="command" value="gc_plot"/>
295 <param name="dist_value" value="100" />
296 <param name="gc_window_size" value="5000"/>
297 <param name="gc_bin_width" value="0.01"/>
298 </conditional>
299 <param name="image_type" value="eps"/>
300 <param name="dpi" value="600" />
301 <param name="font_size" value="8"/>
302 <param name="width" value="6.5"/>
303 <param name="height" value="3.5"/>
304 <output_collection name="gc_plot" count="1">
305 <element name="637000110" ftype="eps">
306 <assert_contents>
307 <has_size value="46633" delta="10"/>
308 </assert_contents>
309 </element>
310 </output_collection>
311 </test>
312 <test expect_num_outputs="1">
313 <conditional name="bins">
314 <param name="select" value="collection"/>
315 <param name="bins_coll">
316 <collection type="list">
317 <element name="637000110" ftype="fasta" value="637000110.fna"/>
318 </collection>
319 </param>
320 </conditional>
321 <conditional name="plot">
322 <param name="command" value="coding_plot"/>
323 <param name="gff">
324 <collection type="list">
325 <element name="637000110" ftype="gff" value="637000110.gff"/>
326 </collection>
327 </param>
328 <param name="dist_value" value="100" />
329 <param name="cd_window_size" value="10000"/>
330 <param name="cd_bin_width" value="0.01"/>
331 </conditional>
332 <param name="image_type" value="png"/>
333 <param name="dpi" value="600" />
334 <param name="font_size" value="8"/>
335 <param name="width" value="6.5"/>
336 <param name="height" value="3.5"/>
337 <output_collection name="coding_plot" count="1">
338 <element name="637000110" ftype="png">
339 <assert_contents>
340 <has_size value="224295" delta="10"/>
341 </assert_contents>
342 </element>
343 </output_collection>
344 </test>
345 <test expect_num_outputs="1">
346 <conditional name="bins">
347 <param name="select" value="collection"/>
348 <param name="bins_coll">
349 <collection type="list">
350 <element name="637000110" ftype="fasta" value="637000110.fna"/>
351 </collection>
352 </param>
353 </conditional>
354 <conditional name="plot">
355 <param name="command" value="tetra_plot"/>
356 <param name="gff">
357 <collection type="list">
358 <element name="637000110" ftype="gff" value="637000110.gff"/>
359 </collection>
360 </param>
361 <param name="tetra_profile" ftype="tabular" value="tetra"/>
362 <param name="dist_value" value="100" />
363 <param name="td_window_size" value="5000"/>
364 <param name="td_bin_width" value="0.01"/>
365 </conditional>
366 <param name="image_type" value="pdf"/>
367 <param name="dpi" value="600" />
368 <param name="font_size" value="8"/>
369 <param name="width" value="6.5"/>
370 <param name="height" value="3.5"/>
371 <output_collection name="tetra_plot" count="1">
372 <element name="637000110" ftype="pdf">
373 <assert_contents>
374 <has_size value="17443" delta="10"/>
375 </assert_contents>
376 </element>
377 </output_collection>
378 </test>
379 <test expect_num_outputs="1">
380 <conditional name="bins">
381 <param name="select" value="collection"/>
382 <param name="bins_coll">
383 <collection type="list">
384 <element name="637000110" ftype="fasta" value="637000110.fna"/>
385 </collection>
386 </param>
387 </conditional>
388 <conditional name="plot">
389 <param name="command" value="dist_plot"/>
390 <param name="gff">
391 <collection type="list">
392 <element name="637000110" ftype="gff" value="637000110.gff"/>
393 </collection>
394 </param>
395 <param name="tetra_profile" ftype="tabular" value="tetra"/>
396 <param name="dist_value" value="100" />
397 <param name="gc_window_size" value="5000"/>
398 <param name="gc_bin_width" value="0.01"/>
399 <param name="cd_window_size" value="10000"/>
400 <param name="cd_bin_width" value="0.01"/>
401 <param name="td_window_size" value="5000"/>
402 <param name="td_bin_width" value="0.01"/>
403 </conditional>
404 <param name="image_type" value="png"/>
405 <param name="dpi" value="600" />
406 <param name="font_size" value="8"/>
407 <param name="width" value="6.5"/>
408 <param name="height" value="3.5"/>
409 <output_collection name="dist_plot" count="1">
410 <element name="637000110" ftype="png">
411 <assert_contents>
412 <has_size value="387707" delta="10"/>
413 </assert_contents>
414 </element>
415 </output_collection>
416 </test>
417 <test expect_num_outputs="1">
418 <conditional name="bins">
419 <param name="select" value="collection"/>
420 <param name="bins_coll">
421 <collection type="list">
422 <element name="637000110" ftype="fasta" value="637000110.fna"/>
423 </collection>
424 </param>
425 </conditional>
426 <conditional name="plot">
427 <param name="command" value="nx_plot"/>
428 <param name="step_size" value="0.05"/>
429 </conditional>
430 <param name="image_type" value="ps"/>
431 <param name="dpi" value="600" />
432 <param name="font_size" value="8"/>
433 <param name="width" value="6.5"/>
434 <param name="height" value="3.5"/>
435 <output_collection name="nx_plot" count="1">
436 <element name="637000110" ftype="ps">
437 <assert_contents>
438 <has_size value="18835" delta="10"/>
439 </assert_contents>
440 </element>
441 </output_collection>
442 </test>
443 <test expect_num_outputs="1">
444 <conditional name="bins">
445 <param name="select" value="collection"/>
446 <param name="bins_coll">
447 <collection type="list">
448 <element name="637000110" ftype="fasta" value="637000110.fna"/>
449 </collection>
450 </param>
451 </conditional>
452 <conditional name="plot">
453 <param name="command" value="len_hist"/>
454 <param name="fig_padding" value="0.2"/>
455 </conditional>
456 <param name="image_type" value="svg"/>
457 <param name="dpi" value="600" />
458 <param name="font_size" value="8"/>
459 <param name="width" value="6.5"/>
460 <param name="height" value="3.5"/>
461 <output_collection name="len_hist" count="1">
462 <element name="637000110" ftype="svg">
463 <assert_contents>
464 <has_size value="9075" delta="10"/>
465 </assert_contents>
466 </element>
467 </output_collection>
468 </test>
469 <test expect_num_outputs="1">
470 <conditional name="bins">
471 <param name="select" value="collection"/>
472 <param name="bins_coll">
473 <collection type="list">
474 <element name="637000110" ftype="fasta" value="637000110.fna"/>
475 </collection>
476 </param>
477 </conditional>
478 <conditional name="plot">
479 <param name="command" value="marker_plot"/>
480 <param name="genes_fna">
481 <collection type="list">
482 <element name="637000110" ftype="fasta" value="637000110.faa"/>
483 </collection>
484 </param>
485 <param name="marker_gene_stats" ftype="tabular" value="marker_gene_stats.tsv"/>
486 <param name="bin_stats_ext" ftype="tabular" value="bin_stats_ext.tsv"/>
487 <param name="fig_padding" value="0.2"/>
488 </conditional>
489 <param name="image_type" value="png"/>
490 <param name="dpi" value="600" />
491 <param name="font_size" value="8"/>
492 <param name="width" value="6.5"/>
493 <param name="height" value="3.5"/>
494 <output_collection name="marker_plot" count="1">
495 <element name="637000110" ftype="png">
496 <assert_contents>
497 <has_size value="137394" delta="10"/>
498 </assert_contents>
499 </element>
500 </output_collection>
501 </test>
502 <!--<test expect_num_outputs="1">
503 <conditional name="bins">
504 <param name="select" value="collection"/>
505 <param name="bins_coll">
506 <collection type="list">
507 <element name="637000110" ftype="fasta" value="637000110.fna"/>
508 </collection>
509 </param>
510 </conditional>
511 <conditional name="plot">
512 <param name="command" value="gc_bias_plot"/>
513 <param name="bam_file" ftype="bam" value="637000110.bam"/>
514 <param name="window_size" value="5000"/>
515 <param name="all_reads" value="false" />
516 <param name="min_align" value="0.98"/>
517 <param name="max_edit_dist" value="0.02"/>
518 </conditional>
519 <param name="image_type" value="png"/>
520 <param name="dpi" value="600" />
521 <param name="font_size" value="8"/>
522 <param name="width" value="6.5"/>
523 <param name="height" value="3.5"/>
524 <output_collection name="gc_bias_plot" count="1">
525 <element name="637000110" ftype="png">
526 <assert_contents>
527 <has_size value="10000" delta="100"/>
528 </assert_contents>
529 </element>
530 </output_collection>
531 </test>-->
532 </tests>
533 <help><![CDATA[
534 @HELP_HEADER@
535
536 This command produces a number of plots for assessing the quality of genome bins. Here we describe each of these plots and provide an example.
537
538 - gc_plot: Provides a 3 pane plot suitable for assessing the GC distribution of sequences within a genome bin. The first pane is a histogram of the number of non-overlapping 5 kbp windows with a give percent GC. A typical genome will produce a unimodal distribution. The second pane plots each sequence in the genome bin as a function of its deviation from the average GC of the entire genome (x-axis) and sequence length (y-axis). The dashed red lines indicate the expected deviation from the mean GC as a function of length. This expected deviation is pre-calculated from a set of trusted reference genomes and the percentile plotted is provided as an argument to this command. A good default value to use for this distribution parameter is 95.
539 - coding_plot: Provides a plot analogous to the gc_plot suitable for assessing the coding density of sequences within a genome bin.
540 - tetra_plot: Provides a plot analogous to the gc_plot suitable for assessing the tetranucleotide signatures of sequences within a genome bin. The Manhattan distance is used for determine the different between each sequence's tetranucleotide signature and the tetranucleotide signature of the entire genome bin. This plot requires a file indicating the tetranucleotide signature of all sequences within the genome bins. This file can be creates with the tetra command.
541 - dist_plot: Produces a single figure combining the plots produced by gc_plot, coding_plot, and tetra_plot. This plot requires a file indicating the tetranucleotide signature of all sequences within the genome bins. This file can be creates with the tetra command.
542 - nx_plot: Produces a plot indicating the Nx value of a genome bin for all values of x. This provides a more comprehensive view of the quality of an assembly than simply considering N50.
543 - len_hist: Produce a histogram of the number of sequences within a genome bin at different sequence length intervals. This provides additional information regarding the quality of an assembled genome.
544 - marker_plot: Plots the position of marker genes on sequences within a genome bin. This provides information regarding the extent to which marker genes are collocated. The number of marker genes within a fixed size window (2.8 kbps in this example) is indicated by with different colours. Sequences without any marker genes are not shown.
545 - gc_bias_plot:
546 ]]></help>
547 <expand macro="citations"/>
548 </tool>