Mercurial > repos > iuc > checkm_plot
comparison plot.xml @ 0:356839cd89d2 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/checkm commit 2a3b068a98bf0e913dc03e0d5c2182cfd102cf27
author | iuc |
---|---|
date | Fri, 29 Jul 2022 20:37:57 +0000 |
parents | |
children | 9916308301da |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:356839cd89d2 |
---|---|
1 <tool id="checkm_plot" name="CheckM plot" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
2 <description> | |
3 for assessing the quality of genome bins | |
4 </description> | |
5 <macros> | |
6 <import>macros.xml</import> | |
7 <xml name="gff_inputs"> | |
8 <param name="gff" type="data_collection" collection_type="list" format="gff" label="Gene feature files for each bin"/> | |
9 </xml> | |
10 <token name="@PLOT_GFF_INPUTS@"><![CDATA[ | |
11 #for $i in $plot.gff | |
12 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($i.element_identifier)) | |
13 mkdir -p 'inputs/bins/${identifier}' && | |
14 ln -s '$i' 'inputs/bins/${identifier}/genes.gff' && | |
15 #end for | |
16 ]]></token> | |
17 <xml name="tetra_profile"> | |
18 <param name="tetra_profile" type="data" format="tabular" multiple="true" label="Tetranucleotide profiles for each bin" help="This can be generated using the tetra tool"/> | |
19 </xml> | |
20 <xml name="dist_value"> | |
21 <param argument="--dist_value" type="integer" min="0" max="100" value="" label="Reference distribution(s) to plot" /> | |
22 </xml> | |
23 <xml name="gc_params"> | |
24 <param argument="--gc_window_size" type="integer" min="0" value="5000" label="Window size used to calculate GC histogram" /> | |
25 <param argument="--gc_bin_width" type="float" min="0" value="0.01" label="Width of GC bars in histogram" /> | |
26 </xml> | |
27 <xml name="cd_params"> | |
28 <param argument="--cd_window_size" type="integer" min="0" value="10000" label="Window size used to calculate CD histogram" /> | |
29 <param argument="--cd_bin_width" type="float" min="0" value="0.01" label="Width of CD bars in histogram" /> | |
30 </xml> | |
31 <xml name="td_params"> | |
32 <param argument="--td_window_size" type="integer" min="0" value="5000" label="Window size used to calculate TD histogram" /> | |
33 <param argument="--td_bin_width" type="float" min="0" value="0.01" label="Width of TD bars in histogram" /> | |
34 </xml> | |
35 <xml name="fig_padding"> | |
36 <param argument="--fig_padding" type="float" min="0" value="0.2" label="White space to place around figure" help="In inches"/> | |
37 </xml> | |
38 <xml name="gc_bias_plot"> | |
39 <when value="gc_bias_plot"> | |
40 <param name="bam_file" type="data" format="bam" label="BAM file to interrogate for coverage information" help="The file should be sorted"/> | |
41 <param argument="--window_size" type="integer" min="0" value="5000" label="Window size used to calculate plot statistics" /> | |
42 <param argument="--all_reads" type="boolean" truevalue="--all_reads" falsevalue="" checked="false" label="Use all reads to estimate coverage instead of just those in proper pairs?" /> | |
43 <param argument="--min_align" type="float" min="0" max="1" value="0.98" label="Minimum alignment length as percentage of read length"/> | |
44 <param argument="--max_edit_dist" type="float" min="0" max="1" value="0.02" label="Maximum edit distance as percentage of read length"/> | |
45 </when> | |
46 </xml> | |
47 </macros> | |
48 <expand macro="biotools"/> | |
49 <expand macro="requirements"> | |
50 <requirement type="package" version="1.15.1">samtools</requirement> | |
51 </expand> | |
52 <expand macro="version"/> | |
53 <command detect_errors="exit_code"><![CDATA[ | |
54 @BIN_INPUTS@ | |
55 | |
56 #if $plot.command == 'gc_plot' | |
57 checkm gc_plot | |
58 'bins' | |
59 'output' | |
60 $plot.dist_value | |
61 --extension 'fasta' | |
62 --image_type '$image_type' | |
63 --dpi $dpi | |
64 --font_size $font_size | |
65 --width $width | |
66 --height $height | |
67 | |
68 #else if $plot.command == 'coding_plot' | |
69 @PLOT_GFF_INPUTS@ | |
70 checkm coding_plot | |
71 'inputs' | |
72 'bins' | |
73 'output' | |
74 $plot.dist_value | |
75 --extension 'fasta' | |
76 --image_type '$image_type' | |
77 --dpi $dpi | |
78 --font_size $font_size | |
79 --width $width | |
80 --height $height | |
81 --cd_window_size $plot.cd_window_size | |
82 --cd_bin_width $plot.cd_bin_width | |
83 | |
84 #else if $plot.command == 'tetra_plot' | |
85 @PLOT_GFF_INPUTS@ | |
86 checkm tetra_plot | |
87 'inputs' | |
88 'bins' | |
89 'output' | |
90 '$tetra_profile' | |
91 $plot.dist_value | |
92 --extension 'fasta' | |
93 --image_type '$image_type' | |
94 --dpi $dpi | |
95 --font_size $font_size | |
96 --width $width | |
97 --height $height | |
98 --td_window_size $plot.td_window_size | |
99 --td_bin_width $plot.td_bin_width | |
100 | |
101 #else if $plot.command == 'dist_plot' | |
102 @PLOT_GFF_INPUTS@ | |
103 checkm dist_plot | |
104 'inputs' | |
105 'bins' | |
106 'output' | |
107 '$tetra_profile' | |
108 $plot.dist_value | |
109 --extension 'fasta' | |
110 --image_type '$image_type' | |
111 --dpi $dpi | |
112 --font_size $font_size | |
113 --width $width | |
114 --height $height | |
115 --gc_window_size $plot.gc_window_size | |
116 --gc_bin_width $plot.gc_bin_width | |
117 --cd_window_size $plot.cd_window_size | |
118 --cd_bin_width $plot.cd_bin_width | |
119 --td_window_size $plot.td_window_size | |
120 --td_bin_width $plot.td_bin_width | |
121 | |
122 #else if $plot.command == 'nx_plot' | |
123 checkm nx_plot | |
124 'bins' | |
125 'output' | |
126 --extension 'fasta' | |
127 --image_type '$image_type' | |
128 --dpi $dpi | |
129 --font_size $font_size | |
130 --width $width | |
131 --height $height | |
132 --step_size $plot.step_size | |
133 | |
134 #else if $plot.command == 'len_hist' | |
135 checkm len_hist | |
136 'bins' | |
137 'output' | |
138 --extension 'fasta' | |
139 --image_type '$image_type' | |
140 --dpi $dpi | |
141 --font_size $font_size | |
142 --width $width | |
143 --height $height | |
144 | |
145 #else if $plot.command == 'marker_plot' | |
146 mkdir -p 'inputs/storage/' && | |
147 cp '$marker_gene_stats' 'inputs/storage/marker_gene_stats.tsv' && | |
148 cp '$bin_stats_ext' 'inputs/storage/bin_stats_ext.tsv' && | |
149 #for $b in $plot.genes_fna | |
150 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($b.element_identifier)) | |
151 mkdir -p 'inputs/bins/${identifier}' && | |
152 cp '$b.file_name' 'inputs/bins/${identifier}/genes.faa' && | |
153 #end for | |
154 checkm marker_plot | |
155 'inputs' | |
156 'bins' | |
157 'output' | |
158 --extension 'fasta' | |
159 --image_type '$image_type' | |
160 --dpi $dpi | |
161 --font_size $font_size | |
162 --width $width | |
163 --height $height | |
164 --fig_padding $plot.fig_padding | |
165 | |
166 #else if $plot.command == 'gc_bias_plot' | |
167 mkdir 'mapping' && | |
168 ln -s '$bam_file' 'mapping.bam' && | |
169 samtools index 'mapping.bam' 'mapping.bam.bai' && | |
170 | |
171 checkm gc_bias_plot | |
172 'bins' | |
173 'output' | |
174 'mapping.bam' | |
175 --extension 'fasta' | |
176 --image_type '$image_type' | |
177 --dpi $dpi | |
178 --font_size $font_size | |
179 --width $width | |
180 --height $height | |
181 --window_size $plot.window_size | |
182 $plot.all_reads | |
183 --min_align $plot.min_align | |
184 --max_edit_dist $plot.max_edit_dist | |
185 --threads \${GALAXY_SLOTS:-1} | |
186 #end if | |
187 ]]></command> | |
188 <inputs> | |
189 <expand macro="bin_inputs"/> | |
190 <conditional name="plot"> | |
191 <param name="command" type="select" label="Plot to generate"> | |
192 <option value="gc_plot">gc_plot: Create GC histogram and delta-GC plot</option> | |
193 <option value="coding_plot">Create coding density (CD) histogram and delta-CD plot</option> | |
194 <option value="tetra_plot">Create tetranucleotide distance (TD) histogram and delta-TD plot</option> | |
195 <option value="dist_plot">Create image with GC, coding density (CD), and tetranucleotide distance (TD) distribution plots together</option> | |
196 <option value="nx_plot">Create Nx-plots</option> | |
197 <option value="len_hist">Sequence length histogram</option> | |
198 <option value="marker_plot">Plot position of marker genes on sequences</option> | |
199 <!--<option value="gc_bias_plot">Plot bin coverage as a function of GC</option>--> | |
200 </param> | |
201 <when value="gc_plot"> | |
202 <expand macro="dist_value"/> | |
203 <expand macro="gc_params"/> | |
204 </when> | |
205 <when value="coding_plot"> | |
206 <expand macro="gff_inputs"/> | |
207 <expand macro="dist_value"/> | |
208 <expand macro="cd_params"/> | |
209 </when> | |
210 <when value="tetra_plot"> | |
211 <expand macro="gff_inputs"/> | |
212 <expand macro="tetra_profile"/> | |
213 <expand macro="dist_value"/> | |
214 <expand macro="td_params"/> | |
215 </when> | |
216 <when value="dist_plot"> | |
217 <expand macro="gff_inputs"/> | |
218 <expand macro="tetra_profile"/> | |
219 <expand macro="dist_value"/> | |
220 <expand macro="gc_params"/> | |
221 <expand macro="cd_params"/> | |
222 <expand macro="td_params"/> | |
223 </when> | |
224 <when value="nx_plot"> | |
225 <param argument="--step_size" type="float" min="0" value="0.05" label="x step size for calculating Nx" /> | |
226 </when> | |
227 <when value="len_hist"> | |
228 <expand macro="fig_padding" /> | |
229 </when> | |
230 <when value="marker_plot"> | |
231 <param name="genes_fna" type="data_collection" collection_type="list" format="fasta" label="Nucleotide gene sequences for each bin" help="Optional output of the CheckM tree or lineage_wf tools"/> | |
232 <param name="marker_gene_stats" type="data" format="tabular" label="Marker gene stats" help="Output of the CheckM qa tool or optional output of the lineage_wf or taxonomy_wf tools"/> | |
233 <param name="bin_stats_ext" type="data" format="tabular" label="Marker gene bin extensive stats" help="Output of the CheckM qa tool or optional output of the lineage_wf or taxonomy_wf tools"/> | |
234 <expand macro="fig_padding" /> | |
235 </when> | |
236 </conditional> | |
237 <param argument="--image_type" type="select" label="Image type"> | |
238 <option value="eps">EPS</option> | |
239 <option value="pdf">PDF</option> | |
240 <option value="png" selected="true">PNG</option> | |
241 <option value="ps">PS</option> | |
242 <option value="svg">SVG</option> | |
243 </param> | |
244 <param argument="--dpi" type="integer" min="0" value="600" label="DPI of output image" /> | |
245 <param argument="--font_size" type="integer" min="0" value="8" label="Font size" /> | |
246 <param argument="--width" type="float" min="0" value="6.5" label="Width of output image" /> | |
247 <param argument="--height" type="float" min="0" value="3.5" label="Height of output image" /> | |
248 </inputs> | |
249 <outputs> | |
250 <collection name="gc_plot" type="list" label="${tool.name} on ${on_string}: GC distribution plot"> | |
251 <filter>plot['command'] == 'gc_plot'</filter> | |
252 <discover_datasets pattern="(?P<designation>.*)\.gc_plots\.(?P<ext>.+)" directory="output/"/> | |
253 </collection> | |
254 <collection name="coding_plot" type="list" label="${tool.name} on ${on_string}: Coding density (CD) distribution plot"> | |
255 <filter>plot['command'] == 'coding_plot'</filter> | |
256 <discover_datasets pattern="(?P<designation>.*)\.coding_density_plots\.(?P<ext>.+)" directory="output/"/> | |
257 </collection> | |
258 <collection name="tetra_plot" type="list" label="${tool.name} on ${on_string}: Tetranucleotide distance (TD) distribution plot"> | |
259 <filter>plot['command'] == 'tetra_plot'</filter> | |
260 <discover_datasets pattern="(?P<designation>.*)\.tetra_dist_plots\.(?P<ext>.+)" directory="output/"/> | |
261 </collection> | |
262 <collection name="dist_plot" type="list" label="${tool.name} on ${on_string}: GC, Coding density (CD) and Tetranucleotide distance (TD) distribution plot"> | |
263 <filter>plot['command'] == 'dist_plot'</filter> | |
264 <discover_datasets pattern="(?P<designation>.*)\.ref_dist_plots\.(?P<ext>.+)" directory="output/"/> | |
265 </collection> | |
266 <collection name="nx_plot" type="list" label="${tool.name} on ${on_string}: Nx-plot"> | |
267 <filter>plot['command'] == 'nx_plot'</filter> | |
268 <discover_datasets pattern="(?P<designation>.*)\.nx_plot\.(?P<ext>.+)" directory="output/"/> | |
269 </collection> | |
270 <collection name="len_hist" type="list" label="${tool.name} on ${on_string}: Sequence length histogram"> | |
271 <filter>plot['command'] == 'len_hist'</filter> | |
272 <discover_datasets pattern="(?P<designation>.*)\.len_hist\.(?P<ext>.+)" directory="output/"/> | |
273 </collection> | |
274 <collection name="marker_plot" type="list" label="${tool.name} on ${on_string}: Marker gene position plot"> | |
275 <filter>plot['command'] == 'marker_plot'</filter> | |
276 <discover_datasets pattern="(?P<designation>.*)\.marker_pos_plot\.(?P<ext>.+)" directory="output/"/> | |
277 </collection> | |
278 <collection name="gc_bias_plot" type="list" label="${tool.name} on ${on_string}: Bin coverage as a function of GC"> | |
279 <filter>plot['command'] == 'gc_bias_plot'</filter> | |
280 <discover_datasets pattern="(?P<designation>.*)\.marker_pos_plot\.(?P<ext>.+)" directory="output/"/> | |
281 </collection> | |
282 </outputs> | |
283 <tests> | |
284 <test expect_num_outputs="1"> | |
285 <conditional name="bins"> | |
286 <param name="select" value="collection"/> | |
287 <param name="bins_coll"> | |
288 <collection type="list"> | |
289 <element name="637000110" ftype="fasta" value="637000110.fna"/> | |
290 </collection> | |
291 </param> | |
292 </conditional> | |
293 <conditional name="plot"> | |
294 <param name="command" value="gc_plot"/> | |
295 <param name="dist_value" value="100" /> | |
296 <param name="gc_window_size" value="5000"/> | |
297 <param name="gc_bin_width" value="0.01"/> | |
298 </conditional> | |
299 <param name="image_type" value="eps"/> | |
300 <param name="dpi" value="600" /> | |
301 <param name="font_size" value="8"/> | |
302 <param name="width" value="6.5"/> | |
303 <param name="height" value="3.5"/> | |
304 <output_collection name="gc_plot" count="1"> | |
305 <element name="637000110" ftype="eps"> | |
306 <assert_contents> | |
307 <has_size value="46633" delta="10"/> | |
308 </assert_contents> | |
309 </element> | |
310 </output_collection> | |
311 </test> | |
312 <test expect_num_outputs="1"> | |
313 <conditional name="bins"> | |
314 <param name="select" value="collection"/> | |
315 <param name="bins_coll"> | |
316 <collection type="list"> | |
317 <element name="637000110" ftype="fasta" value="637000110.fna"/> | |
318 </collection> | |
319 </param> | |
320 </conditional> | |
321 <conditional name="plot"> | |
322 <param name="command" value="coding_plot"/> | |
323 <param name="gff"> | |
324 <collection type="list"> | |
325 <element name="637000110" ftype="gff" value="637000110.gff"/> | |
326 </collection> | |
327 </param> | |
328 <param name="dist_value" value="100" /> | |
329 <param name="cd_window_size" value="10000"/> | |
330 <param name="cd_bin_width" value="0.01"/> | |
331 </conditional> | |
332 <param name="image_type" value="png"/> | |
333 <param name="dpi" value="600" /> | |
334 <param name="font_size" value="8"/> | |
335 <param name="width" value="6.5"/> | |
336 <param name="height" value="3.5"/> | |
337 <output_collection name="coding_plot" count="1"> | |
338 <element name="637000110" ftype="png"> | |
339 <assert_contents> | |
340 <has_size value="224295" delta="10"/> | |
341 </assert_contents> | |
342 </element> | |
343 </output_collection> | |
344 </test> | |
345 <test expect_num_outputs="1"> | |
346 <conditional name="bins"> | |
347 <param name="select" value="collection"/> | |
348 <param name="bins_coll"> | |
349 <collection type="list"> | |
350 <element name="637000110" ftype="fasta" value="637000110.fna"/> | |
351 </collection> | |
352 </param> | |
353 </conditional> | |
354 <conditional name="plot"> | |
355 <param name="command" value="tetra_plot"/> | |
356 <param name="gff"> | |
357 <collection type="list"> | |
358 <element name="637000110" ftype="gff" value="637000110.gff"/> | |
359 </collection> | |
360 </param> | |
361 <param name="tetra_profile" ftype="tabular" value="tetra"/> | |
362 <param name="dist_value" value="100" /> | |
363 <param name="td_window_size" value="5000"/> | |
364 <param name="td_bin_width" value="0.01"/> | |
365 </conditional> | |
366 <param name="image_type" value="pdf"/> | |
367 <param name="dpi" value="600" /> | |
368 <param name="font_size" value="8"/> | |
369 <param name="width" value="6.5"/> | |
370 <param name="height" value="3.5"/> | |
371 <output_collection name="tetra_plot" count="1"> | |
372 <element name="637000110" ftype="pdf"> | |
373 <assert_contents> | |
374 <has_size value="17443" delta="10"/> | |
375 </assert_contents> | |
376 </element> | |
377 </output_collection> | |
378 </test> | |
379 <test expect_num_outputs="1"> | |
380 <conditional name="bins"> | |
381 <param name="select" value="collection"/> | |
382 <param name="bins_coll"> | |
383 <collection type="list"> | |
384 <element name="637000110" ftype="fasta" value="637000110.fna"/> | |
385 </collection> | |
386 </param> | |
387 </conditional> | |
388 <conditional name="plot"> | |
389 <param name="command" value="dist_plot"/> | |
390 <param name="gff"> | |
391 <collection type="list"> | |
392 <element name="637000110" ftype="gff" value="637000110.gff"/> | |
393 </collection> | |
394 </param> | |
395 <param name="tetra_profile" ftype="tabular" value="tetra"/> | |
396 <param name="dist_value" value="100" /> | |
397 <param name="gc_window_size" value="5000"/> | |
398 <param name="gc_bin_width" value="0.01"/> | |
399 <param name="cd_window_size" value="10000"/> | |
400 <param name="cd_bin_width" value="0.01"/> | |
401 <param name="td_window_size" value="5000"/> | |
402 <param name="td_bin_width" value="0.01"/> | |
403 </conditional> | |
404 <param name="image_type" value="png"/> | |
405 <param name="dpi" value="600" /> | |
406 <param name="font_size" value="8"/> | |
407 <param name="width" value="6.5"/> | |
408 <param name="height" value="3.5"/> | |
409 <output_collection name="dist_plot" count="1"> | |
410 <element name="637000110" ftype="png"> | |
411 <assert_contents> | |
412 <has_size value="387707" delta="10"/> | |
413 </assert_contents> | |
414 </element> | |
415 </output_collection> | |
416 </test> | |
417 <test expect_num_outputs="1"> | |
418 <conditional name="bins"> | |
419 <param name="select" value="collection"/> | |
420 <param name="bins_coll"> | |
421 <collection type="list"> | |
422 <element name="637000110" ftype="fasta" value="637000110.fna"/> | |
423 </collection> | |
424 </param> | |
425 </conditional> | |
426 <conditional name="plot"> | |
427 <param name="command" value="nx_plot"/> | |
428 <param name="step_size" value="0.05"/> | |
429 </conditional> | |
430 <param name="image_type" value="ps"/> | |
431 <param name="dpi" value="600" /> | |
432 <param name="font_size" value="8"/> | |
433 <param name="width" value="6.5"/> | |
434 <param name="height" value="3.5"/> | |
435 <output_collection name="nx_plot" count="1"> | |
436 <element name="637000110" ftype="ps"> | |
437 <assert_contents> | |
438 <has_size value="18835" delta="10"/> | |
439 </assert_contents> | |
440 </element> | |
441 </output_collection> | |
442 </test> | |
443 <test expect_num_outputs="1"> | |
444 <conditional name="bins"> | |
445 <param name="select" value="collection"/> | |
446 <param name="bins_coll"> | |
447 <collection type="list"> | |
448 <element name="637000110" ftype="fasta" value="637000110.fna"/> | |
449 </collection> | |
450 </param> | |
451 </conditional> | |
452 <conditional name="plot"> | |
453 <param name="command" value="len_hist"/> | |
454 <param name="fig_padding" value="0.2"/> | |
455 </conditional> | |
456 <param name="image_type" value="svg"/> | |
457 <param name="dpi" value="600" /> | |
458 <param name="font_size" value="8"/> | |
459 <param name="width" value="6.5"/> | |
460 <param name="height" value="3.5"/> | |
461 <output_collection name="len_hist" count="1"> | |
462 <element name="637000110" ftype="svg"> | |
463 <assert_contents> | |
464 <has_size value="9075" delta="10"/> | |
465 </assert_contents> | |
466 </element> | |
467 </output_collection> | |
468 </test> | |
469 <test expect_num_outputs="1"> | |
470 <conditional name="bins"> | |
471 <param name="select" value="collection"/> | |
472 <param name="bins_coll"> | |
473 <collection type="list"> | |
474 <element name="637000110" ftype="fasta" value="637000110.fna"/> | |
475 </collection> | |
476 </param> | |
477 </conditional> | |
478 <conditional name="plot"> | |
479 <param name="command" value="marker_plot"/> | |
480 <param name="genes_fna"> | |
481 <collection type="list"> | |
482 <element name="637000110" ftype="fasta" value="637000110.faa"/> | |
483 </collection> | |
484 </param> | |
485 <param name="marker_gene_stats" ftype="tabular" value="marker_gene_stats.tsv"/> | |
486 <param name="bin_stats_ext" ftype="tabular" value="bin_stats_ext.tsv"/> | |
487 <param name="fig_padding" value="0.2"/> | |
488 </conditional> | |
489 <param name="image_type" value="png"/> | |
490 <param name="dpi" value="600" /> | |
491 <param name="font_size" value="8"/> | |
492 <param name="width" value="6.5"/> | |
493 <param name="height" value="3.5"/> | |
494 <output_collection name="marker_plot" count="1"> | |
495 <element name="637000110" ftype="png"> | |
496 <assert_contents> | |
497 <has_size value="137394" delta="10"/> | |
498 </assert_contents> | |
499 </element> | |
500 </output_collection> | |
501 </test> | |
502 <!--<test expect_num_outputs="1"> | |
503 <conditional name="bins"> | |
504 <param name="select" value="collection"/> | |
505 <param name="bins_coll"> | |
506 <collection type="list"> | |
507 <element name="637000110" ftype="fasta" value="637000110.fna"/> | |
508 </collection> | |
509 </param> | |
510 </conditional> | |
511 <conditional name="plot"> | |
512 <param name="command" value="gc_bias_plot"/> | |
513 <param name="bam_file" ftype="bam" value="637000110.bam"/> | |
514 <param name="window_size" value="5000"/> | |
515 <param name="all_reads" value="false" /> | |
516 <param name="min_align" value="0.98"/> | |
517 <param name="max_edit_dist" value="0.02"/> | |
518 </conditional> | |
519 <param name="image_type" value="png"/> | |
520 <param name="dpi" value="600" /> | |
521 <param name="font_size" value="8"/> | |
522 <param name="width" value="6.5"/> | |
523 <param name="height" value="3.5"/> | |
524 <output_collection name="gc_bias_plot" count="1"> | |
525 <element name="637000110" ftype="png"> | |
526 <assert_contents> | |
527 <has_size value="10000" delta="100"/> | |
528 </assert_contents> | |
529 </element> | |
530 </output_collection> | |
531 </test>--> | |
532 </tests> | |
533 <help><![CDATA[ | |
534 @HELP_HEADER@ | |
535 | |
536 This command produces a number of plots for assessing the quality of genome bins. Here we describe each of these plots and provide an example. | |
537 | |
538 - gc_plot: Provides a 3 pane plot suitable for assessing the GC distribution of sequences within a genome bin. The first pane is a histogram of the number of non-overlapping 5 kbp windows with a give percent GC. A typical genome will produce a unimodal distribution. The second pane plots each sequence in the genome bin as a function of its deviation from the average GC of the entire genome (x-axis) and sequence length (y-axis). The dashed red lines indicate the expected deviation from the mean GC as a function of length. This expected deviation is pre-calculated from a set of trusted reference genomes and the percentile plotted is provided as an argument to this command. A good default value to use for this distribution parameter is 95. | |
539 - coding_plot: Provides a plot analogous to the gc_plot suitable for assessing the coding density of sequences within a genome bin. | |
540 - tetra_plot: Provides a plot analogous to the gc_plot suitable for assessing the tetranucleotide signatures of sequences within a genome bin. The Manhattan distance is used for determine the different between each sequence's tetranucleotide signature and the tetranucleotide signature of the entire genome bin. This plot requires a file indicating the tetranucleotide signature of all sequences within the genome bins. This file can be creates with the tetra command. | |
541 - dist_plot: Produces a single figure combining the plots produced by gc_plot, coding_plot, and tetra_plot. This plot requires a file indicating the tetranucleotide signature of all sequences within the genome bins. This file can be creates with the tetra command. | |
542 - nx_plot: Produces a plot indicating the Nx value of a genome bin for all values of x. This provides a more comprehensive view of the quality of an assembly than simply considering N50. | |
543 - len_hist: Produce a histogram of the number of sequences within a genome bin at different sequence length intervals. This provides additional information regarding the quality of an assembled genome. | |
544 - marker_plot: Plots the position of marker genes on sequences within a genome bin. This provides information regarding the extent to which marker genes are collocated. The number of marker genes within a fixed size window (2.8 kbps in this example) is indicated by with different colours. Sequences without any marker genes are not shown. | |
545 - gc_bias_plot: | |
546 ]]></help> | |
547 <expand macro="citations"/> | |
548 </tool> |