Mercurial > repos > iuc > checkm_plot
diff plot.xml @ 0:356839cd89d2 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/checkm commit 2a3b068a98bf0e913dc03e0d5c2182cfd102cf27
author | iuc |
---|---|
date | Fri, 29 Jul 2022 20:37:57 +0000 |
parents | |
children | 9916308301da |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plot.xml Fri Jul 29 20:37:57 2022 +0000 @@ -0,0 +1,548 @@ +<tool id="checkm_plot" name="CheckM plot" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description> + for assessing the quality of genome bins + </description> + <macros> + <import>macros.xml</import> + <xml name="gff_inputs"> + <param name="gff" type="data_collection" collection_type="list" format="gff" label="Gene feature files for each bin"/> + </xml> + <token name="@PLOT_GFF_INPUTS@"><![CDATA[ +#for $i in $plot.gff + #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($i.element_identifier)) +mkdir -p 'inputs/bins/${identifier}' && +ln -s '$i' 'inputs/bins/${identifier}/genes.gff' && +#end for +]]></token> + <xml name="tetra_profile"> + <param name="tetra_profile" type="data" format="tabular" multiple="true" label="Tetranucleotide profiles for each bin" help="This can be generated using the tetra tool"/> + </xml> + <xml name="dist_value"> + <param argument="--dist_value" type="integer" min="0" max="100" value="" label="Reference distribution(s) to plot" /> + </xml> + <xml name="gc_params"> + <param argument="--gc_window_size" type="integer" min="0" value="5000" label="Window size used to calculate GC histogram" /> + <param argument="--gc_bin_width" type="float" min="0" value="0.01" label="Width of GC bars in histogram" /> + </xml> + <xml name="cd_params"> + <param argument="--cd_window_size" type="integer" min="0" value="10000" label="Window size used to calculate CD histogram" /> + <param argument="--cd_bin_width" type="float" min="0" value="0.01" label="Width of CD bars in histogram" /> + </xml> + <xml name="td_params"> + <param argument="--td_window_size" type="integer" min="0" value="5000" label="Window size used to calculate TD histogram" /> + <param argument="--td_bin_width" type="float" min="0" value="0.01" label="Width of TD bars in histogram" /> + </xml> + <xml name="fig_padding"> + <param argument="--fig_padding" type="float" min="0" value="0.2" label="White space to place around figure" help="In inches"/> + </xml> + <xml name="gc_bias_plot"> + <when value="gc_bias_plot"> + <param name="bam_file" type="data" format="bam" label="BAM file to interrogate for coverage information" help="The file should be sorted"/> + <param argument="--window_size" type="integer" min="0" value="5000" label="Window size used to calculate plot statistics" /> + <param argument="--all_reads" type="boolean" truevalue="--all_reads" falsevalue="" checked="false" label="Use all reads to estimate coverage instead of just those in proper pairs?" /> + <param argument="--min_align" type="float" min="0" max="1" value="0.98" label="Minimum alignment length as percentage of read length"/> + <param argument="--max_edit_dist" type="float" min="0" max="1" value="0.02" label="Maximum edit distance as percentage of read length"/> + </when> + </xml> + </macros> + <expand macro="biotools"/> + <expand macro="requirements"> + <requirement type="package" version="1.15.1">samtools</requirement> + </expand> + <expand macro="version"/> + <command detect_errors="exit_code"><![CDATA[ +@BIN_INPUTS@ + +#if $plot.command == 'gc_plot' +checkm gc_plot + 'bins' + 'output' + $plot.dist_value + --extension 'fasta' + --image_type '$image_type' + --dpi $dpi + --font_size $font_size + --width $width + --height $height + +#else if $plot.command == 'coding_plot' +@PLOT_GFF_INPUTS@ +checkm coding_plot + 'inputs' + 'bins' + 'output' + $plot.dist_value + --extension 'fasta' + --image_type '$image_type' + --dpi $dpi + --font_size $font_size + --width $width + --height $height + --cd_window_size $plot.cd_window_size + --cd_bin_width $plot.cd_bin_width + +#else if $plot.command == 'tetra_plot' +@PLOT_GFF_INPUTS@ +checkm tetra_plot + 'inputs' + 'bins' + 'output' + '$tetra_profile' + $plot.dist_value + --extension 'fasta' + --image_type '$image_type' + --dpi $dpi + --font_size $font_size + --width $width + --height $height + --td_window_size $plot.td_window_size + --td_bin_width $plot.td_bin_width + +#else if $plot.command == 'dist_plot' +@PLOT_GFF_INPUTS@ +checkm dist_plot + 'inputs' + 'bins' + 'output' + '$tetra_profile' + $plot.dist_value + --extension 'fasta' + --image_type '$image_type' + --dpi $dpi + --font_size $font_size + --width $width + --height $height + --gc_window_size $plot.gc_window_size + --gc_bin_width $plot.gc_bin_width + --cd_window_size $plot.cd_window_size + --cd_bin_width $plot.cd_bin_width + --td_window_size $plot.td_window_size + --td_bin_width $plot.td_bin_width + +#else if $plot.command == 'nx_plot' +checkm nx_plot + 'bins' + 'output' + --extension 'fasta' + --image_type '$image_type' + --dpi $dpi + --font_size $font_size + --width $width + --height $height + --step_size $plot.step_size + +#else if $plot.command == 'len_hist' +checkm len_hist + 'bins' + 'output' + --extension 'fasta' + --image_type '$image_type' + --dpi $dpi + --font_size $font_size + --width $width + --height $height + +#else if $plot.command == 'marker_plot' +mkdir -p 'inputs/storage/' && +cp '$marker_gene_stats' 'inputs/storage/marker_gene_stats.tsv' && +cp '$bin_stats_ext' 'inputs/storage/bin_stats_ext.tsv' && +#for $b in $plot.genes_fna + #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($b.element_identifier)) +mkdir -p 'inputs/bins/${identifier}' && +cp '$b.file_name' 'inputs/bins/${identifier}/genes.faa' && +#end for +checkm marker_plot + 'inputs' + 'bins' + 'output' + --extension 'fasta' + --image_type '$image_type' + --dpi $dpi + --font_size $font_size + --width $width + --height $height + --fig_padding $plot.fig_padding + +#else if $plot.command == 'gc_bias_plot' +mkdir 'mapping' && +ln -s '$bam_file' 'mapping.bam' && +samtools index 'mapping.bam' 'mapping.bam.bai' && + +checkm gc_bias_plot + 'bins' + 'output' + 'mapping.bam' + --extension 'fasta' + --image_type '$image_type' + --dpi $dpi + --font_size $font_size + --width $width + --height $height + --window_size $plot.window_size + $plot.all_reads + --min_align $plot.min_align + --max_edit_dist $plot.max_edit_dist + --threads \${GALAXY_SLOTS:-1} +#end if + ]]></command> + <inputs> + <expand macro="bin_inputs"/> + <conditional name="plot"> + <param name="command" type="select" label="Plot to generate"> + <option value="gc_plot">gc_plot: Create GC histogram and delta-GC plot</option> + <option value="coding_plot">Create coding density (CD) histogram and delta-CD plot</option> + <option value="tetra_plot">Create tetranucleotide distance (TD) histogram and delta-TD plot</option> + <option value="dist_plot">Create image with GC, coding density (CD), and tetranucleotide distance (TD) distribution plots together</option> + <option value="nx_plot">Create Nx-plots</option> + <option value="len_hist">Sequence length histogram</option> + <option value="marker_plot">Plot position of marker genes on sequences</option> + <!--<option value="gc_bias_plot">Plot bin coverage as a function of GC</option>--> + </param> + <when value="gc_plot"> + <expand macro="dist_value"/> + <expand macro="gc_params"/> + </when> + <when value="coding_plot"> + <expand macro="gff_inputs"/> + <expand macro="dist_value"/> + <expand macro="cd_params"/> + </when> + <when value="tetra_plot"> + <expand macro="gff_inputs"/> + <expand macro="tetra_profile"/> + <expand macro="dist_value"/> + <expand macro="td_params"/> + </when> + <when value="dist_plot"> + <expand macro="gff_inputs"/> + <expand macro="tetra_profile"/> + <expand macro="dist_value"/> + <expand macro="gc_params"/> + <expand macro="cd_params"/> + <expand macro="td_params"/> + </when> + <when value="nx_plot"> + <param argument="--step_size" type="float" min="0" value="0.05" label="x step size for calculating Nx" /> + </when> + <when value="len_hist"> + <expand macro="fig_padding" /> + </when> + <when value="marker_plot"> + <param name="genes_fna" type="data_collection" collection_type="list" format="fasta" label="Nucleotide gene sequences for each bin" help="Optional output of the CheckM tree or lineage_wf tools"/> + <param name="marker_gene_stats" type="data" format="tabular" label="Marker gene stats" help="Output of the CheckM qa tool or optional output of the lineage_wf or taxonomy_wf tools"/> + <param name="bin_stats_ext" type="data" format="tabular" label="Marker gene bin extensive stats" help="Output of the CheckM qa tool or optional output of the lineage_wf or taxonomy_wf tools"/> + <expand macro="fig_padding" /> + </when> + </conditional> + <param argument="--image_type" type="select" label="Image type"> + <option value="eps">EPS</option> + <option value="pdf">PDF</option> + <option value="png" selected="true">PNG</option> + <option value="ps">PS</option> + <option value="svg">SVG</option> + </param> + <param argument="--dpi" type="integer" min="0" value="600" label="DPI of output image" /> + <param argument="--font_size" type="integer" min="0" value="8" label="Font size" /> + <param argument="--width" type="float" min="0" value="6.5" label="Width of output image" /> + <param argument="--height" type="float" min="0" value="3.5" label="Height of output image" /> + </inputs> + <outputs> + <collection name="gc_plot" type="list" label="${tool.name} on ${on_string}: GC distribution plot"> + <filter>plot['command'] == 'gc_plot'</filter> + <discover_datasets pattern="(?P<designation>.*)\.gc_plots\.(?P<ext>.+)" directory="output/"/> + </collection> + <collection name="coding_plot" type="list" label="${tool.name} on ${on_string}: Coding density (CD) distribution plot"> + <filter>plot['command'] == 'coding_plot'</filter> + <discover_datasets pattern="(?P<designation>.*)\.coding_density_plots\.(?P<ext>.+)" directory="output/"/> + </collection> + <collection name="tetra_plot" type="list" label="${tool.name} on ${on_string}: Tetranucleotide distance (TD) distribution plot"> + <filter>plot['command'] == 'tetra_plot'</filter> + <discover_datasets pattern="(?P<designation>.*)\.tetra_dist_plots\.(?P<ext>.+)" directory="output/"/> + </collection> + <collection name="dist_plot" type="list" label="${tool.name} on ${on_string}: GC, Coding density (CD) and Tetranucleotide distance (TD) distribution plot"> + <filter>plot['command'] == 'dist_plot'</filter> + <discover_datasets pattern="(?P<designation>.*)\.ref_dist_plots\.(?P<ext>.+)" directory="output/"/> + </collection> + <collection name="nx_plot" type="list" label="${tool.name} on ${on_string}: Nx-plot"> + <filter>plot['command'] == 'nx_plot'</filter> + <discover_datasets pattern="(?P<designation>.*)\.nx_plot\.(?P<ext>.+)" directory="output/"/> + </collection> + <collection name="len_hist" type="list" label="${tool.name} on ${on_string}: Sequence length histogram"> + <filter>plot['command'] == 'len_hist'</filter> + <discover_datasets pattern="(?P<designation>.*)\.len_hist\.(?P<ext>.+)" directory="output/"/> + </collection> + <collection name="marker_plot" type="list" label="${tool.name} on ${on_string}: Marker gene position plot"> + <filter>plot['command'] == 'marker_plot'</filter> + <discover_datasets pattern="(?P<designation>.*)\.marker_pos_plot\.(?P<ext>.+)" directory="output/"/> + </collection> + <collection name="gc_bias_plot" type="list" label="${tool.name} on ${on_string}: Bin coverage as a function of GC"> + <filter>plot['command'] == 'gc_bias_plot'</filter> + <discover_datasets pattern="(?P<designation>.*)\.marker_pos_plot\.(?P<ext>.+)" directory="output/"/> + </collection> + </outputs> + <tests> + <test expect_num_outputs="1"> + <conditional name="bins"> + <param name="select" value="collection"/> + <param name="bins_coll"> + <collection type="list"> + <element name="637000110" ftype="fasta" value="637000110.fna"/> + </collection> + </param> + </conditional> + <conditional name="plot"> + <param name="command" value="gc_plot"/> + <param name="dist_value" value="100" /> + <param name="gc_window_size" value="5000"/> + <param name="gc_bin_width" value="0.01"/> + </conditional> + <param name="image_type" value="eps"/> + <param name="dpi" value="600" /> + <param name="font_size" value="8"/> + <param name="width" value="6.5"/> + <param name="height" value="3.5"/> + <output_collection name="gc_plot" count="1"> + <element name="637000110" ftype="eps"> + <assert_contents> + <has_size value="46633" delta="10"/> + </assert_contents> + </element> + </output_collection> + </test> + <test expect_num_outputs="1"> + <conditional name="bins"> + <param name="select" value="collection"/> + <param name="bins_coll"> + <collection type="list"> + <element name="637000110" ftype="fasta" value="637000110.fna"/> + </collection> + </param> + </conditional> + <conditional name="plot"> + <param name="command" value="coding_plot"/> + <param name="gff"> + <collection type="list"> + <element name="637000110" ftype="gff" value="637000110.gff"/> + </collection> + </param> + <param name="dist_value" value="100" /> + <param name="cd_window_size" value="10000"/> + <param name="cd_bin_width" value="0.01"/> + </conditional> + <param name="image_type" value="png"/> + <param name="dpi" value="600" /> + <param name="font_size" value="8"/> + <param name="width" value="6.5"/> + <param name="height" value="3.5"/> + <output_collection name="coding_plot" count="1"> + <element name="637000110" ftype="png"> + <assert_contents> + <has_size value="224295" delta="10"/> + </assert_contents> + </element> + </output_collection> + </test> + <test expect_num_outputs="1"> + <conditional name="bins"> + <param name="select" value="collection"/> + <param name="bins_coll"> + <collection type="list"> + <element name="637000110" ftype="fasta" value="637000110.fna"/> + </collection> + </param> + </conditional> + <conditional name="plot"> + <param name="command" value="tetra_plot"/> + <param name="gff"> + <collection type="list"> + <element name="637000110" ftype="gff" value="637000110.gff"/> + </collection> + </param> + <param name="tetra_profile" ftype="tabular" value="tetra"/> + <param name="dist_value" value="100" /> + <param name="td_window_size" value="5000"/> + <param name="td_bin_width" value="0.01"/> + </conditional> + <param name="image_type" value="pdf"/> + <param name="dpi" value="600" /> + <param name="font_size" value="8"/> + <param name="width" value="6.5"/> + <param name="height" value="3.5"/> + <output_collection name="tetra_plot" count="1"> + <element name="637000110" ftype="pdf"> + <assert_contents> + <has_size value="17443" delta="10"/> + </assert_contents> + </element> + </output_collection> + </test> + <test expect_num_outputs="1"> + <conditional name="bins"> + <param name="select" value="collection"/> + <param name="bins_coll"> + <collection type="list"> + <element name="637000110" ftype="fasta" value="637000110.fna"/> + </collection> + </param> + </conditional> + <conditional name="plot"> + <param name="command" value="dist_plot"/> + <param name="gff"> + <collection type="list"> + <element name="637000110" ftype="gff" value="637000110.gff"/> + </collection> + </param> + <param name="tetra_profile" ftype="tabular" value="tetra"/> + <param name="dist_value" value="100" /> + <param name="gc_window_size" value="5000"/> + <param name="gc_bin_width" value="0.01"/> + <param name="cd_window_size" value="10000"/> + <param name="cd_bin_width" value="0.01"/> + <param name="td_window_size" value="5000"/> + <param name="td_bin_width" value="0.01"/> + </conditional> + <param name="image_type" value="png"/> + <param name="dpi" value="600" /> + <param name="font_size" value="8"/> + <param name="width" value="6.5"/> + <param name="height" value="3.5"/> + <output_collection name="dist_plot" count="1"> + <element name="637000110" ftype="png"> + <assert_contents> + <has_size value="387707" delta="10"/> + </assert_contents> + </element> + </output_collection> + </test> + <test expect_num_outputs="1"> + <conditional name="bins"> + <param name="select" value="collection"/> + <param name="bins_coll"> + <collection type="list"> + <element name="637000110" ftype="fasta" value="637000110.fna"/> + </collection> + </param> + </conditional> + <conditional name="plot"> + <param name="command" value="nx_plot"/> + <param name="step_size" value="0.05"/> + </conditional> + <param name="image_type" value="ps"/> + <param name="dpi" value="600" /> + <param name="font_size" value="8"/> + <param name="width" value="6.5"/> + <param name="height" value="3.5"/> + <output_collection name="nx_plot" count="1"> + <element name="637000110" ftype="ps"> + <assert_contents> + <has_size value="18835" delta="10"/> + </assert_contents> + </element> + </output_collection> + </test> + <test expect_num_outputs="1"> + <conditional name="bins"> + <param name="select" value="collection"/> + <param name="bins_coll"> + <collection type="list"> + <element name="637000110" ftype="fasta" value="637000110.fna"/> + </collection> + </param> + </conditional> + <conditional name="plot"> + <param name="command" value="len_hist"/> + <param name="fig_padding" value="0.2"/> + </conditional> + <param name="image_type" value="svg"/> + <param name="dpi" value="600" /> + <param name="font_size" value="8"/> + <param name="width" value="6.5"/> + <param name="height" value="3.5"/> + <output_collection name="len_hist" count="1"> + <element name="637000110" ftype="svg"> + <assert_contents> + <has_size value="9075" delta="10"/> + </assert_contents> + </element> + </output_collection> + </test> + <test expect_num_outputs="1"> + <conditional name="bins"> + <param name="select" value="collection"/> + <param name="bins_coll"> + <collection type="list"> + <element name="637000110" ftype="fasta" value="637000110.fna"/> + </collection> + </param> + </conditional> + <conditional name="plot"> + <param name="command" value="marker_plot"/> + <param name="genes_fna"> + <collection type="list"> + <element name="637000110" ftype="fasta" value="637000110.faa"/> + </collection> + </param> + <param name="marker_gene_stats" ftype="tabular" value="marker_gene_stats.tsv"/> + <param name="bin_stats_ext" ftype="tabular" value="bin_stats_ext.tsv"/> + <param name="fig_padding" value="0.2"/> + </conditional> + <param name="image_type" value="png"/> + <param name="dpi" value="600" /> + <param name="font_size" value="8"/> + <param name="width" value="6.5"/> + <param name="height" value="3.5"/> + <output_collection name="marker_plot" count="1"> + <element name="637000110" ftype="png"> + <assert_contents> + <has_size value="137394" delta="10"/> + </assert_contents> + </element> + </output_collection> + </test> + <!--<test expect_num_outputs="1"> + <conditional name="bins"> + <param name="select" value="collection"/> + <param name="bins_coll"> + <collection type="list"> + <element name="637000110" ftype="fasta" value="637000110.fna"/> + </collection> + </param> + </conditional> + <conditional name="plot"> + <param name="command" value="gc_bias_plot"/> + <param name="bam_file" ftype="bam" value="637000110.bam"/> + <param name="window_size" value="5000"/> + <param name="all_reads" value="false" /> + <param name="min_align" value="0.98"/> + <param name="max_edit_dist" value="0.02"/> + </conditional> + <param name="image_type" value="png"/> + <param name="dpi" value="600" /> + <param name="font_size" value="8"/> + <param name="width" value="6.5"/> + <param name="height" value="3.5"/> + <output_collection name="gc_bias_plot" count="1"> + <element name="637000110" ftype="png"> + <assert_contents> + <has_size value="10000" delta="100"/> + </assert_contents> + </element> + </output_collection> + </test>--> + </tests> + <help><![CDATA[ +@HELP_HEADER@ + +This command produces a number of plots for assessing the quality of genome bins. Here we describe each of these plots and provide an example. + +- gc_plot: Provides a 3 pane plot suitable for assessing the GC distribution of sequences within a genome bin. The first pane is a histogram of the number of non-overlapping 5 kbp windows with a give percent GC. A typical genome will produce a unimodal distribution. The second pane plots each sequence in the genome bin as a function of its deviation from the average GC of the entire genome (x-axis) and sequence length (y-axis). The dashed red lines indicate the expected deviation from the mean GC as a function of length. This expected deviation is pre-calculated from a set of trusted reference genomes and the percentile plotted is provided as an argument to this command. A good default value to use for this distribution parameter is 95. +- coding_plot: Provides a plot analogous to the gc_plot suitable for assessing the coding density of sequences within a genome bin. +- tetra_plot: Provides a plot analogous to the gc_plot suitable for assessing the tetranucleotide signatures of sequences within a genome bin. The Manhattan distance is used for determine the different between each sequence's tetranucleotide signature and the tetranucleotide signature of the entire genome bin. This plot requires a file indicating the tetranucleotide signature of all sequences within the genome bins. This file can be creates with the tetra command. +- dist_plot: Produces a single figure combining the plots produced by gc_plot, coding_plot, and tetra_plot. This plot requires a file indicating the tetranucleotide signature of all sequences within the genome bins. This file can be creates with the tetra command. +- nx_plot: Produces a plot indicating the Nx value of a genome bin for all values of x. This provides a more comprehensive view of the quality of an assembly than simply considering N50. +- len_hist: Produce a histogram of the number of sequences within a genome bin at different sequence length intervals. This provides additional information regarding the quality of an assembled genome. +- marker_plot: Plots the position of marker genes on sequences within a genome bin. This provides information regarding the extent to which marker genes are collocated. The number of marker genes within a fixed size window (2.8 kbps in this example) is indicated by with different colours. Sequences without any marker genes are not shown. +- gc_bias_plot: + ]]></help> + <expand macro="citations"/> +</tool>