diff plot.xml @ 0:356839cd89d2 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/checkm commit 2a3b068a98bf0e913dc03e0d5c2182cfd102cf27
author iuc
date Fri, 29 Jul 2022 20:37:57 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plot.xml	Fri Jul 29 20:37:57 2022 +0000
@@ -0,0 +1,548 @@
+<tool id="checkm_plot" name="CheckM plot" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>
+        for assessing the quality of genome bins
+    </description>
+    <macros>
+        <import>macros.xml</import>
+        <xml name="gff_inputs">
+            <param name="gff" type="data_collection" collection_type="list" format="gff" label="Gene feature files for each bin"/>
+        </xml>
+        <token name="@PLOT_GFF_INPUTS@"><![CDATA[
+#for $i in $plot.gff
+    #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($i.element_identifier))
+mkdir -p 'inputs/bins/${identifier}' &&
+ln -s '$i' 'inputs/bins/${identifier}/genes.gff' &&
+#end for
+]]></token>
+        <xml name="tetra_profile">
+            <param name="tetra_profile" type="data" format="tabular" multiple="true" label="Tetranucleotide profiles for each bin" help="This can be generated using the tetra tool"/>
+        </xml>   
+        <xml name="dist_value">
+            <param argument="--dist_value" type="integer" min="0" max="100" value="" label="Reference distribution(s) to plot" />
+        </xml>
+        <xml name="gc_params">
+            <param argument="--gc_window_size" type="integer" min="0" value="5000" label="Window size used to calculate GC histogram" />
+            <param argument="--gc_bin_width" type="float" min="0" value="0.01" label="Width of GC bars in histogram" />
+        </xml>
+        <xml name="cd_params">
+            <param argument="--cd_window_size" type="integer" min="0" value="10000" label="Window size used to calculate CD histogram" />
+            <param argument="--cd_bin_width" type="float" min="0" value="0.01" label="Width of CD bars in histogram" />
+        </xml>
+        <xml name="td_params">
+            <param argument="--td_window_size" type="integer" min="0" value="5000" label="Window size used to calculate TD histogram" />
+            <param argument="--td_bin_width" type="float" min="0" value="0.01" label="Width of TD bars in histogram" />
+        </xml>
+        <xml name="fig_padding">
+            <param argument="--fig_padding" type="float" min="0" value="0.2" label="White space to place around figure" help="In inches"/>
+        </xml>
+        <xml name="gc_bias_plot">
+            <when value="gc_bias_plot">
+                <param name="bam_file" type="data" format="bam" label="BAM file to interrogate for coverage information" help="The file should be sorted"/>
+                <param argument="--window_size" type="integer" min="0" value="5000" label="Window size used to calculate plot statistics" />
+                <param argument="--all_reads" type="boolean" truevalue="--all_reads" falsevalue="" checked="false" label="Use all reads to estimate coverage instead of just those in proper pairs?" />
+                <param argument="--min_align" type="float" min="0" max="1" value="0.98" label="Minimum alignment length as percentage of read length"/>
+                <param argument="--max_edit_dist" type="float" min="0" max="1" value="0.02" label="Maximum edit distance as percentage of read length"/>
+            </when>
+        </xml>
+    </macros>
+    <expand macro="biotools"/>
+    <expand macro="requirements">
+        <requirement type="package" version="1.15.1">samtools</requirement>
+    </expand>
+    <expand macro="version"/>
+    <command detect_errors="exit_code"><![CDATA[
+@BIN_INPUTS@
+
+#if $plot.command == 'gc_plot'
+checkm gc_plot
+    'bins'
+    'output'
+    $plot.dist_value
+    --extension 'fasta'
+    --image_type '$image_type'
+    --dpi $dpi
+    --font_size $font_size
+    --width $width
+    --height $height
+
+#else if $plot.command == 'coding_plot'
+@PLOT_GFF_INPUTS@
+checkm coding_plot
+    'inputs'
+    'bins'
+    'output'
+    $plot.dist_value
+    --extension 'fasta'
+    --image_type '$image_type'
+    --dpi $dpi
+    --font_size $font_size
+    --width $width
+    --height $height
+    --cd_window_size $plot.cd_window_size
+    --cd_bin_width $plot.cd_bin_width
+
+#else if $plot.command == 'tetra_plot'
+@PLOT_GFF_INPUTS@
+checkm tetra_plot
+    'inputs'
+    'bins'
+    'output'
+    '$tetra_profile'
+    $plot.dist_value
+    --extension 'fasta'
+    --image_type '$image_type'
+    --dpi $dpi
+    --font_size $font_size
+    --width $width
+    --height $height
+    --td_window_size $plot.td_window_size
+    --td_bin_width $plot.td_bin_width
+
+#else if $plot.command == 'dist_plot'
+@PLOT_GFF_INPUTS@
+checkm dist_plot
+    'inputs'
+    'bins'
+    'output'
+    '$tetra_profile'
+    $plot.dist_value
+    --extension 'fasta'
+    --image_type '$image_type'
+    --dpi $dpi
+    --font_size $font_size
+    --width $width
+    --height $height
+    --gc_window_size $plot.gc_window_size
+    --gc_bin_width $plot.gc_bin_width
+    --cd_window_size $plot.cd_window_size
+    --cd_bin_width $plot.cd_bin_width
+    --td_window_size $plot.td_window_size
+    --td_bin_width $plot.td_bin_width
+
+#else if $plot.command == 'nx_plot'
+checkm nx_plot
+    'bins'
+    'output'
+    --extension 'fasta'
+    --image_type '$image_type'
+    --dpi $dpi
+    --font_size $font_size
+    --width $width
+    --height $height
+    --step_size $plot.step_size
+
+#else if $plot.command == 'len_hist'
+checkm len_hist
+    'bins'
+    'output'
+    --extension 'fasta'
+    --image_type '$image_type'
+    --dpi $dpi
+    --font_size $font_size
+    --width $width
+    --height $height
+
+#else if $plot.command == 'marker_plot'
+mkdir -p 'inputs/storage/' &&
+cp '$marker_gene_stats' 'inputs/storage/marker_gene_stats.tsv' &&
+cp '$bin_stats_ext' 'inputs/storage/bin_stats_ext.tsv' &&
+#for $b in $plot.genes_fna
+    #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($b.element_identifier))
+mkdir -p 'inputs/bins/${identifier}' &&
+cp '$b.file_name' 'inputs/bins/${identifier}/genes.faa' &&
+#end for
+checkm marker_plot
+    'inputs'
+    'bins'
+    'output'
+    --extension 'fasta'
+    --image_type '$image_type'
+    --dpi $dpi
+    --font_size $font_size
+    --width $width
+    --height $height
+    --fig_padding $plot.fig_padding
+
+#else if $plot.command == 'gc_bias_plot'
+mkdir 'mapping' &&
+ln -s '$bam_file' 'mapping.bam' &&
+samtools index 'mapping.bam' 'mapping.bam.bai' &&
+
+checkm gc_bias_plot
+    'bins'
+    'output'
+    'mapping.bam'
+    --extension 'fasta'
+    --image_type '$image_type'
+    --dpi $dpi
+    --font_size $font_size
+    --width $width
+    --height $height
+    --window_size $plot.window_size
+    $plot.all_reads       
+    --min_align $plot.min_align
+    --max_edit_dist $plot.max_edit_dist
+    --threads \${GALAXY_SLOTS:-1}
+#end if
+    ]]></command>
+    <inputs>
+        <expand macro="bin_inputs"/>
+        <conditional name="plot">
+            <param name="command" type="select" label="Plot to generate">
+                <option value="gc_plot">gc_plot: Create GC histogram and delta-GC plot</option>
+                <option value="coding_plot">Create coding density (CD) histogram and delta-CD plot</option>
+                <option value="tetra_plot">Create tetranucleotide distance (TD) histogram and delta-TD plot</option>
+                <option value="dist_plot">Create image with GC, coding density (CD), and tetranucleotide distance (TD) distribution plots together</option>
+                <option value="nx_plot">Create Nx-plots</option>
+                <option value="len_hist">Sequence length histogram</option>
+                <option value="marker_plot">Plot position of marker genes on sequences</option>
+                <!--<option value="gc_bias_plot">Plot bin coverage as a function of GC</option>-->
+            </param>
+            <when value="gc_plot">
+                <expand macro="dist_value"/>
+                <expand macro="gc_params"/>
+            </when>
+            <when value="coding_plot">
+                <expand macro="gff_inputs"/>
+                <expand macro="dist_value"/>
+                <expand macro="cd_params"/>
+            </when>
+            <when value="tetra_plot">
+                <expand macro="gff_inputs"/>
+                <expand macro="tetra_profile"/>
+                <expand macro="dist_value"/>
+                <expand macro="td_params"/>
+            </when>
+            <when value="dist_plot">
+                <expand macro="gff_inputs"/>
+                <expand macro="tetra_profile"/>
+                <expand macro="dist_value"/>
+                <expand macro="gc_params"/>
+                <expand macro="cd_params"/>
+                <expand macro="td_params"/>
+            </when>
+            <when value="nx_plot">
+                <param argument="--step_size" type="float" min="0" value="0.05" label="x step size for calculating Nx" />
+            </when>
+            <when value="len_hist">
+                <expand macro="fig_padding" />
+            </when>
+            <when value="marker_plot">
+                <param name="genes_fna" type="data_collection" collection_type="list" format="fasta" label="Nucleotide gene sequences for each bin"  help="Optional output of the CheckM tree or lineage_wf tools"/>
+                <param name="marker_gene_stats" type="data" format="tabular" label="Marker gene stats" help="Output of the CheckM qa tool or optional output of the lineage_wf or taxonomy_wf tools"/>
+                <param name="bin_stats_ext" type="data" format="tabular" label="Marker gene bin extensive stats" help="Output of the CheckM qa tool or optional output of the lineage_wf or taxonomy_wf tools"/>
+                <expand macro="fig_padding" />
+            </when>
+        </conditional>
+        <param argument="--image_type" type="select" label="Image type">
+            <option value="eps">EPS</option>
+            <option value="pdf">PDF</option>
+            <option value="png" selected="true">PNG</option>
+            <option value="ps">PS</option>
+            <option value="svg">SVG</option>
+        </param>
+        <param argument="--dpi" type="integer" min="0" value="600" label="DPI of output image" />
+        <param argument="--font_size" type="integer" min="0" value="8" label="Font size" />
+        <param argument="--width" type="float" min="0" value="6.5" label="Width of output image" />
+        <param argument="--height" type="float" min="0" value="3.5" label="Height of output image" />
+    </inputs>
+    <outputs>
+        <collection name="gc_plot" type="list" label="${tool.name} on ${on_string}: GC distribution plot">
+            <filter>plot['command'] == 'gc_plot'</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.gc_plots\.(?P&lt;ext&gt;.+)" directory="output/"/>
+        </collection>
+        <collection name="coding_plot" type="list" label="${tool.name} on ${on_string}: Coding density (CD) distribution plot">
+            <filter>plot['command'] == 'coding_plot'</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.coding_density_plots\.(?P&lt;ext&gt;.+)" directory="output/"/>
+        </collection>
+        <collection name="tetra_plot" type="list" label="${tool.name} on ${on_string}: Tetranucleotide distance (TD) distribution plot">
+            <filter>plot['command'] == 'tetra_plot'</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.tetra_dist_plots\.(?P&lt;ext&gt;.+)" directory="output/"/>
+        </collection>
+        <collection name="dist_plot" type="list" label="${tool.name} on ${on_string}: GC, Coding density (CD) and Tetranucleotide distance (TD) distribution plot">
+            <filter>plot['command'] == 'dist_plot'</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.ref_dist_plots\.(?P&lt;ext&gt;.+)" directory="output/"/>
+        </collection>
+        <collection name="nx_plot" type="list" label="${tool.name} on ${on_string}: Nx-plot">
+            <filter>plot['command'] == 'nx_plot'</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.nx_plot\.(?P&lt;ext&gt;.+)" directory="output/"/>
+        </collection>
+        <collection name="len_hist" type="list" label="${tool.name} on ${on_string}: Sequence length histogram">
+            <filter>plot['command'] == 'len_hist'</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.len_hist\.(?P&lt;ext&gt;.+)" directory="output/"/>
+        </collection>
+        <collection name="marker_plot" type="list" label="${tool.name} on ${on_string}: Marker gene position plot">
+            <filter>plot['command'] == 'marker_plot'</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.marker_pos_plot\.(?P&lt;ext&gt;.+)" directory="output/"/>
+        </collection>
+        <collection name="gc_bias_plot" type="list" label="${tool.name} on ${on_string}: Bin coverage as a function of GC">
+            <filter>plot['command'] == 'gc_bias_plot'</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.marker_pos_plot\.(?P&lt;ext&gt;.+)" directory="output/"/>
+        </collection>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <conditional name="bins">
+                <param name="select" value="collection"/>
+                <param name="bins_coll">
+                    <collection type="list">
+                        <element name="637000110" ftype="fasta" value="637000110.fna"/>
+                    </collection>
+                </param>
+            </conditional>
+            <conditional name="plot">
+                <param name="command" value="gc_plot"/>
+                <param name="dist_value" value="100" />
+                <param name="gc_window_size" value="5000"/>
+                <param name="gc_bin_width" value="0.01"/>
+            </conditional>
+            <param name="image_type" value="eps"/>
+            <param name="dpi" value="600" />
+            <param name="font_size" value="8"/>
+            <param name="width" value="6.5"/>
+            <param name="height" value="3.5"/>
+            <output_collection name="gc_plot" count="1">
+                <element name="637000110" ftype="eps">
+                    <assert_contents>
+                        <has_size value="46633" delta="10"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="bins">
+                <param name="select" value="collection"/>
+                <param name="bins_coll">
+                    <collection type="list">
+                        <element name="637000110" ftype="fasta" value="637000110.fna"/>
+                    </collection>
+                </param>
+            </conditional>
+            <conditional name="plot">
+                <param name="command" value="coding_plot"/>
+                <param name="gff">
+                    <collection type="list">
+                        <element name="637000110" ftype="gff" value="637000110.gff"/>
+                    </collection>
+                </param>
+                <param name="dist_value" value="100" />
+                <param name="cd_window_size" value="10000"/>
+                <param name="cd_bin_width" value="0.01"/>
+            </conditional>
+            <param name="image_type" value="png"/>
+            <param name="dpi" value="600" />
+            <param name="font_size" value="8"/>
+            <param name="width" value="6.5"/>
+            <param name="height" value="3.5"/>
+            <output_collection name="coding_plot" count="1">
+                <element name="637000110" ftype="png">
+                    <assert_contents>
+                        <has_size value="224295" delta="10"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="bins">
+                <param name="select" value="collection"/>
+                <param name="bins_coll">
+                    <collection type="list">
+                        <element name="637000110" ftype="fasta" value="637000110.fna"/>
+                    </collection>
+                </param>
+            </conditional>
+            <conditional name="plot">
+                <param name="command" value="tetra_plot"/>
+                <param name="gff">
+                    <collection type="list">
+                        <element name="637000110" ftype="gff" value="637000110.gff"/>
+                    </collection>
+                </param>
+                <param name="tetra_profile" ftype="tabular" value="tetra"/>
+                <param name="dist_value" value="100" />
+                <param name="td_window_size" value="5000"/>
+                <param name="td_bin_width" value="0.01"/>
+            </conditional>
+            <param name="image_type" value="pdf"/>
+            <param name="dpi" value="600" />
+            <param name="font_size" value="8"/>
+            <param name="width" value="6.5"/>
+            <param name="height" value="3.5"/>
+            <output_collection name="tetra_plot" count="1">
+                <element name="637000110" ftype="pdf">
+                    <assert_contents>
+                        <has_size value="17443" delta="10"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="bins">
+                <param name="select" value="collection"/>
+                <param name="bins_coll">
+                    <collection type="list">
+                        <element name="637000110" ftype="fasta" value="637000110.fna"/>
+                    </collection>
+                </param>
+            </conditional>
+            <conditional name="plot">
+                <param name="command" value="dist_plot"/>
+                <param name="gff">
+                    <collection type="list">
+                        <element name="637000110" ftype="gff" value="637000110.gff"/>
+                    </collection>
+                </param>
+                <param name="tetra_profile" ftype="tabular" value="tetra"/>
+                <param name="dist_value" value="100" />
+                <param name="gc_window_size" value="5000"/>
+                <param name="gc_bin_width" value="0.01"/>
+                <param name="cd_window_size" value="10000"/>
+                <param name="cd_bin_width" value="0.01"/>
+                <param name="td_window_size" value="5000"/>
+                <param name="td_bin_width" value="0.01"/>
+            </conditional>
+            <param name="image_type" value="png"/>
+            <param name="dpi" value="600" />
+            <param name="font_size" value="8"/>
+            <param name="width" value="6.5"/>
+            <param name="height" value="3.5"/>
+            <output_collection name="dist_plot" count="1">
+                <element name="637000110" ftype="png">
+                    <assert_contents>
+                        <has_size value="387707" delta="10"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="bins">
+                <param name="select" value="collection"/>
+                <param name="bins_coll">
+                    <collection type="list">
+                        <element name="637000110" ftype="fasta" value="637000110.fna"/>
+                    </collection>
+                </param>
+            </conditional>
+            <conditional name="plot">
+                <param name="command" value="nx_plot"/>
+                <param name="step_size" value="0.05"/>
+            </conditional>
+            <param name="image_type" value="ps"/>
+            <param name="dpi" value="600" />
+            <param name="font_size" value="8"/>
+            <param name="width" value="6.5"/>
+            <param name="height" value="3.5"/>
+            <output_collection name="nx_plot" count="1">
+                <element name="637000110" ftype="ps">
+                    <assert_contents>
+                        <has_size value="18835" delta="10"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="bins">
+                <param name="select" value="collection"/>
+                <param name="bins_coll">
+                    <collection type="list">
+                        <element name="637000110" ftype="fasta" value="637000110.fna"/>
+                    </collection>
+                </param>
+            </conditional>
+            <conditional name="plot">
+                <param name="command" value="len_hist"/>
+                <param name="fig_padding" value="0.2"/>
+            </conditional>
+            <param name="image_type" value="svg"/>
+            <param name="dpi" value="600" />
+            <param name="font_size" value="8"/>
+            <param name="width" value="6.5"/>
+            <param name="height" value="3.5"/>
+            <output_collection name="len_hist" count="1">
+                <element name="637000110" ftype="svg">
+                    <assert_contents>
+                        <has_size value="9075" delta="10"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="bins">
+                <param name="select" value="collection"/>
+                <param name="bins_coll">
+                    <collection type="list">
+                        <element name="637000110" ftype="fasta" value="637000110.fna"/>
+                    </collection>
+                </param>
+            </conditional>
+            <conditional name="plot">
+                <param name="command" value="marker_plot"/>
+                <param name="genes_fna">
+                    <collection type="list">
+                        <element name="637000110" ftype="fasta" value="637000110.faa"/>
+                    </collection>
+                </param>
+                <param name="marker_gene_stats" ftype="tabular" value="marker_gene_stats.tsv"/>
+                <param name="bin_stats_ext" ftype="tabular" value="bin_stats_ext.tsv"/>
+                <param name="fig_padding" value="0.2"/>
+            </conditional>
+            <param name="image_type" value="png"/>
+            <param name="dpi" value="600" />
+            <param name="font_size" value="8"/>
+            <param name="width" value="6.5"/>
+            <param name="height" value="3.5"/>
+            <output_collection name="marker_plot" count="1">
+                <element name="637000110" ftype="png">
+                    <assert_contents>
+                        <has_size value="137394" delta="10"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <!--<test expect_num_outputs="1">
+            <conditional name="bins">
+                <param name="select" value="collection"/>
+                <param name="bins_coll">
+                    <collection type="list">
+                        <element name="637000110" ftype="fasta" value="637000110.fna"/>
+                    </collection>
+                </param>
+            </conditional>
+            <conditional name="plot">
+                <param name="command" value="gc_bias_plot"/>
+                <param name="bam_file" ftype="bam" value="637000110.bam"/>
+                <param name="window_size" value="5000"/>
+                <param name="all_reads" value="false" />
+                <param name="min_align" value="0.98"/>
+                <param name="max_edit_dist" value="0.02"/>
+            </conditional>
+            <param name="image_type" value="png"/>
+            <param name="dpi" value="600" />
+            <param name="font_size" value="8"/>
+            <param name="width" value="6.5"/>
+            <param name="height" value="3.5"/>
+            <output_collection name="gc_bias_plot" count="1">
+                <element name="637000110" ftype="png">
+                    <assert_contents>
+                        <has_size value="10000" delta="100"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>-->
+    </tests>
+    <help><![CDATA[
+@HELP_HEADER@
+
+This command produces a number of plots for assessing the quality of genome bins. Here we describe each of these plots and provide an example.
+
+- gc_plot: Provides a 3 pane plot suitable for assessing the GC distribution of sequences within a genome bin. The first pane is a histogram of the number of non-overlapping 5 kbp windows with a give percent GC. A typical genome will produce a unimodal distribution. The second pane plots each sequence in the genome bin as a function of its deviation from the average GC of the entire genome (x-axis) and sequence length (y-axis). The dashed red lines indicate the expected deviation from the mean GC as a function of length. This expected deviation is pre-calculated from a set of trusted reference genomes and the percentile plotted is provided as an argument to this command. A good default value to use for this distribution parameter is 95.
+- coding_plot: Provides a plot analogous to the gc_plot suitable for assessing the coding density of sequences within a genome bin.
+- tetra_plot: Provides a plot analogous to the gc_plot suitable for assessing the tetranucleotide signatures of sequences within a genome bin. The Manhattan distance is used for determine the different between each sequence's tetranucleotide signature and the tetranucleotide signature of the entire genome bin. This plot requires a file indicating the tetranucleotide signature of all sequences within the genome bins. This file can be creates with the tetra command.
+- dist_plot: Produces a single figure combining the plots produced by gc_plot, coding_plot, and tetra_plot. This plot requires a file indicating the tetranucleotide signature of all sequences within the genome bins. This file can be creates with the tetra command.
+- nx_plot: Produces a plot indicating the Nx value of a genome bin for all values of x. This provides a more comprehensive view of the quality of an assembly than simply considering N50.
+- len_hist: Produce a histogram of the number of sequences within a genome bin at different sequence length intervals. This provides additional information regarding the quality of an assembled genome.
+- marker_plot: Plots the position of marker genes on sequences within a genome bin. This provides information regarding the extent to which marker genes are collocated. The number of marker genes within a fixed size window (2.8 kbps in this example) is indicated by with different colours. Sequences without any marker genes are not shown.
+- gc_bias_plot: 
+    ]]></help>
+    <expand macro="citations"/>
+</tool>