view lineage_wf.xml @ 0:760dc0c0e689 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/checkm commit 2a3b068a98bf0e913dc03e0d5c2182cfd102cf27
author iuc
date Fri, 29 Jul 2022 20:30:08 +0000
parents
children f0107b9f2dc3
line wrap: on
line source

<tool id="checkm_lineage_wf" name="CheckM lineage_wf" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>
        Assessing the completeness and contamination of genome bins using lineage-specific marker sets
    </description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="biotools"/>
    <expand macro="requirements"/>
    <expand macro="version"/>
    <command detect_errors="exit_code"><![CDATA[
@BIN_INPUTS@

checkm lineage_wf
    'bins'
    'output'
    $tree_analyze.reduced_tree
    $tree_analyze.ali
    $tree_analyze.nt
    $tree_analyze.genes
    --unique '$lineage_set.unique'
    --multi '$lineage_set.multi'
    $lineage_set.force_domain
    $lineage_set.no_refinement
    $qa.individual_markers
    $qa.skip_adj_correction
    $qa.skip_pseudogene_correction
    --aai_strain $qa.aai_strain
    $qa.ignore_thresholds
    --e_value $qa.e_value
    --length $qa.length
    --file '$results'
    --tab_table
    --extension 'fasta'
    --threads \${GALAXY_SLOTS:-1}
    --pplacer_threads \${GALAXY_SLOTS:-1}
    ]]></command>
    <inputs>
        <expand macro="bin_inputs" />
        <section name="tree_analyze" title="Bin placement in the genome tree and marker gene identification">
            <expand macro="tree_params" />
        </section>
        <section name="lineage_set" title="Bin lineage-specific marker set inference">
            <expand macro="lineage_set_params" />
        </section>
        <section name="qa" title="Bin assessment">
            <expand macro="qa_params" />
        </section>
        <param name="extra_outputs" type="select" multiple="true" optional="true" label="Extra outputs">
            <option value="phylo_hmm_info">Phylogenetic HMM model info for each bin</option>
            <option value="bin_stats_tree">Phylogenetic bin stats</option>
            <option value="hmmer_tree">Phylogenetic HMM hits to each bin</option>
            <option value="concatenated_tre">Concatenated tree</option>
            <option value="concatenated_fasta">Concatenated masked sequences</option>
            <expand macro="tree_extra_output_options" />
            <option value="marker_file">Marker genes</option>
            <option value="hmmer_analyze">Marker gene HMM hits to each bin</option>
            <option value="bin_stats_analyze">Marker gene bin stats</option>
            <option value="checkm_hmm_info">Marker gene HMM info for each bin</option>
            <expand macro="analyze_extra_output_options" />
            <option value="bin_stats_ext">Marker gene bin extensive stats</option>
            <expand macro="qa_extra_output_options" />
        </param>
    </inputs>
    <outputs>
        <data name="results" format="tabular" label="${tool.name} on ${on_string}: Bin statistics"/>
        <!--tree outputs-->
        <data name="phylo_hmm_info" format="zip" from_work_dir="output/storage/phylo_hmm_info.pkl.gz" label="${tool.name} on ${on_string}: Phylogenetic HMM model info for each bin">
            <filter>'phylo_hmm_info' in extra_outputs</filter>
        </data>
        <data name="bin_stats_tree" format="tabular" from_work_dir="output/storage/bin_stats.tree.tsv" label="${tool.name} on ${on_string}: Phylogenetic bin stats">
            <filter>'bin_stats_tree' in extra_outputs</filter>
        </data>
        <collection name="hmmer_tree" type="list" label="${tool.name} on ${on_string}: Phylogenetic HMM hits to each bin">
            <filter>'hmmer_tree' in extra_outputs</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.*)/hmmer\.tree\.txt" format="txt" directory="output/bins/" recurse="true" match_relative_path="true"/>
        </collection>
        <data name="concatenated_fasta" format="fasta" from_work_dir="output/storage/tree/concatenated.fasta" label="${tool.name} on ${on_string}: Concatenated masked sequences">
            <filter>'concatenated_fasta' in extra_outputs</filter>
        </data>
        <data name="concatenated_tre" format="phyloxml" from_work_dir="output/storage/tree/concatenated.tre" label="${tool.name} on ${on_string}: Concatenated tree">
            <filter>'concatenated_tre' in extra_outputs</filter>
        </data>
        <collection name="hmmer_tree_ali" type="list" label="${tool.name} on ${on_string}: Phylogenetic HMMER alignment file for each bin">
            <filter>tree_analyze['ali'] and 'hmmer_tree_ali' in extra_outputs</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.*)/hmmer\.tree\.ali\.txt" format="txt" directory="output/bins/" recurse="true" match_relative_path="true"/>
        </collection>
        <data name="concatenated_pplacer_json" format="json" from_work_dir="output/storage/tree/concatenated.pplacer.json" label="${tool.name} on ${on_string}: Concatenated pplacer JSON">
            <filter>'concatenate_pplacer_json' in extra_outputs</filter>
        </data>
        <collection name="genes_fna" type="list" label="${tool.name} on ${on_string}: Protein gene sequences for each bin">
            <filter>not tree_analyze['genes'] and tree_analyze['nt'] and 'genes_fna' in extra_outputs</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.*)/genes\.fna" format="fasta" directory="output/bins/" recurse="true" match_relative_path="true"/>
        </collection>
        <collection name="genes_faa" type="list" label="${tool.name} on ${on_string}: Nucleotide gene sequences for each bin">
            <filter>'genes_faa' in extra_outputs</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.*)/genes\.faa" format="fasta" directory="output/bins/" recurse="true" match_relative_path="true"/>
        </collection>
        <collection name="genes_gff" type="list" label="${tool.name} on ${on_string}: Gene feature files for each bin">
            <filter>not tree_analyze['genes'] and 'genes_gff' in extra_outputs</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.*)/genes\.gff" format="gff" directory="output/bins/" recurse="true" match_relative_path="true"/>
        </collection>
        <!--lineage_set outputs-->
        <data name="marker_file" format="tabular" from_work_dir="output/lineage.ms" label="${tool.name} on ${on_string}: Marker genes">
            <filter>'marker_file' in extra_outputs</filter>
        </data>
        <!--analyze outputs-->
        <collection name="hmmer_analyze" type="list" label="${tool.name} on ${on_string}: Marker gene HMM hits to each bin">
            <filter>'hmmer_analyze' in extra_outputs</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.*)/hmmer\.analyze\.txt" format="txt" directory="output/bins/" recurse="true" match_relative_path="true"/>
        </collection>
        <data name="bin_stats_analyze" format="tabular" from_work_dir="output/storage/bin_stats.analyze.tsv" label="${tool.name} on ${on_string}: Marker gene bin stats">
            <filter>'bin_stats_analyze' in extra_outputs</filter>
        </data>
        <data name="checkm_hmm_info" format="zip" from_work_dir="output/storage/checkm_hmm_info.pkl.gz" label="${tool.name} on ${on_string}: Marker gene HMM info for each bin" >
            <filter>'checkm_hmm_info' in extra_outputs</filter>
        </data>
        <collection name="hmmer_analyze_ali" type="list" label="${tool.name} on ${on_string}: HMMER alignment file for each bin">
            <filter>tree_analyze['ali'] and 'hmmer_analyze_ali' in extra_outputs</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.*)/hmmer\.analyze\.ali\.txt" format="txt" directory="output/bins/" recurse="true" match_relative_path="true"/>
        </collection>
        <!--qa outputs-->
        <data name="bin_stats_ext" format="tabular" from_work_dir="output/storage/bin_stats_ext.tsv" label="${tool.name} on ${on_string}: Marker gene bin extensive stats">
            <filter>'bin_stats_ext' in extra_outputs</filter>
        </data>
        <expand macro="qa_extra_outputs" />

    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <conditional name="bins">
                <param name="select" value="individual"/>
                <param name="bins_ind" ftype="fasta" value="637000110.fna"/>
            </conditional>
            <section name="tree_analyze">
                <param name="reduced_tree" value="true"/>
                <param name="ali" value="false"/>
                <param name="nt" value="false"/>
                <param name="genes" value="false"/>
            </section>
            <section name="lineage_set">
                <param name="unique" value="10"/>
                <param name="multi" value="10"/>
                <param name="force_domain" value="false"/>
                <param name="no_refinement" value="false"/>
            </section>
            <section name="qa">
                <param name="individual_markers" value="false"/>
                <param name="skip_adj_correction" value="false"/>
                <param name="skip_pseudogene_correction" value="false"/>
                <param name="aai_strain" value="0.9"/>
                <param name="ignore_thresholds" value="false"/>
                <param name="e_value" value="1e-10"/>
                <param name="length" value="0.7"/>
            </section>
            <param name="extra_outputs" value=""/>
            <output name="results" ftype="tabular">
                <assert_contents>
                    <has_text text="637000110"/>
                    <has_text text="Marker lineage"/>
                    <has_text text="k__Bacteria"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="12">
            <conditional name="bins">
                <param name="select" value="collection"/>
                <param name="bins_coll">
                    <collection type="list">
                        <element name="637000110" ftype="fasta" value="637000110.fna"/>
                    </collection>
                </param>
            </conditional>
            <section name="tree_analyze">
                <param name="reduced_tree" value="true"/>
                <param name="ali" value="true"/>
                <param name="nt" value="false"/>
                <param name="genes" value="false"/>
            </section>
            <section name="lineage_set">
                <param name="unique" value="10"/>
                <param name="multi" value="10"/>
                <param name="force_domain" value="false"/>
                <param name="no_refinement" value="false"/>
            </section>
            <section name="qa">
                <param name="individual_markers" value="false"/>
                <param name="skip_adj_correction" value="false"/>
                <param name="skip_pseudogene_correction" value="false"/>
                <param name="aai_strain" value="0.9"/>
                <param name="ignore_thresholds" value="false"/>
                <param name="e_value" value="1e-10"/>
                <param name="length" value="0.7"/>
            </section>
            <param name="extra_outputs" value="phylo_hmm_info,bin_stats_tree,hmmer_tree,concatenated_tre,concatenated_fasta,marker_file,hmmer_analyze,bin_stats_analyze,bin_stats_ext,checkm_hmm_info,marker_gene_stats"/>
            <output name="results" ftype="tabular">
                <assert_contents>
                    <has_text text="637000110"/>
                    <has_text text="Marker lineage"/>
                    <has_text text="k__Bacteria"/>
                </assert_contents>
            </output>
            <output name="phylo_hmm_info" ftype="zip">
                <assert_contents>
                    <has_size value="1575" delta="10"/>
                </assert_contents>
            </output>
            <output name="bin_stats_tree" ftype="tabular">
                <assert_contents>
                    <has_text text="637000110"/>
                    <has_text text="Mean scaffold length"/>
                    <has_text text="Translation table"/>
                </assert_contents>
            </output>
            <output_collection name="hmmer_tree" count="1">
                <element name="637000110" ftype="txt">
                    <assert_contents>
                        <has_text text="target name"/>
                        <has_text text="AC_000091_79"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output name="concatenated_fasta" ftype="fasta">
                <assert_contents>
                    <has_text text="637000110"/>
                    <has_text text="MLKAGVHFGHQ"/>
                </assert_contents>
            </output>
            <output name="concatenated_tre" ftype="phyloxml">
                <assert_contents>
                    <has_text text="IMG_646564547"/>
                    <has_text text="g__Methanocaldococcus"/>
                </assert_contents>
            </output>
            <output name="marker_file" ftype="tabular">
                <assert_contents>
                    <has_text text="Lineage Marker File"/>
                    <has_text text="637000110"/>
                    <has_text text="k__Bacteria"/>
                </assert_contents>
            </output>
            <output_collection name="hmmer_analyze" count="1">
                <element name="637000110" ftype="txt">
                    <assert_contents>
                        <has_text text="target name"/>
                        <has_text text="AC_000091_859"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output name="bin_stats_analyze" ftype="tabular">
                <assert_contents>
                    <has_text text="637000110"/>
                    <has_text text="GC"/>
                    <has_text text="GC std"/>
                </assert_contents>
            </output>
            <output name="bin_stats_ext" ftype="tabular">
                <assert_contents>
                    <has_text text="637000110"/>
                    <has_text text="marker lineage"/>
                </assert_contents>
            </output>
            <output name="checkm_hmm_info" ftype="zip">
                <assert_contents>
                    <has_size value="17052" delta="200"/>
                </assert_contents>
            </output>
            <output name="marker_gene_stats" ftype="tabular">
                <assert_contents>
                    <has_text text="637000110"/>
                    <has_text text="AC_000091_79"/>
                    <has_text text="PF00318.15"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
@HELP_HEADER@

This command runs the recommended workflow for assessing the completeness and contamination of genome bins is to use lineage-specific marker sets. 
This workflow consists of 4 mandatory (M) steps and 1 recommended (R) step:

- (M) The tree command places genome bins into a reference genome tree
- (R) The tree_qa command indicates the number of phylogenetically informative marker genes found in each genome bin along with a taxonomic string indicating its approximate placement in the tree. 

    If desired, genome bins with few phylogenetically marker genes may be removed in order to reduce the computational requirements of the following commands. 
    Alternatively, if only genomes from a particular taxonomic group are of interest these can be moved to a new directory and analyzed separately. 

- (M) The lineage_set command creates a marker file indicating lineage-specific marker sets suitable for evaluating each genome. 
- (M) The analyze command identifies marker genes and estimates the completeness and contamination of each genome bin. 
- (M) The qa command can be used to produce different tables summarizing the quality of each genome bin.

    ]]></help>
    <expand macro="citations"/>
</tool>