Mercurial > repos > iuc > dram_distill

<tool id="dram_distill" name="DRAM distill" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
    <description>metagenome-assembled-genomes (MAGs)</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <command detect_errors="exit_code"><![CDATA[
DRAM.py distill
--input_file '$input_file'
--output_dir 'output_dir'
#if $rrna_path:
    --rrna_path '$rrna_path'
#end if
#if $trna_path:
    --trna_path '$trna_path'
#end if
--groupby_column '$groupby_column'
#if $custom_distillate:
    --custom_distillate '$custom_distillate'
#end if
$distillate_gene_names
--genomes_per_product $genomes_per_product
&& (test -f 'output_dir/genome_stats.tsv' && mv 'output_dir/genome_stats.tsv' '$output_genome_stats' || echo 'No genome_stats.tsv output produced')
&& (test -f 'output_dir/metabolism_summary.xlsx' && mv 'output_dir/metabolism_summary.xlsx' '$output_metabolism_summary' || echo 'No metabolism_summary.xlsx output produced')
&& (test -f 'output_dir/product.html' && mv 'output_dir/product.html' '$output_product_html' || echo 'No product.html output produced')
&& (test -f 'output_dir/product.tsv' && mv 'output_dir/product.tsv' '$output_product_tsv' || echo 'No product.tsv output produced')
    ]]></command>
    <inputs>
        <expand macro="input_file_param"/>
        <param argument="--rrna_path" type="data" format="tabular" optional="true" label="rRNA file" help="Produced by the DRAM annotate tool (optional, leave blank to ignore)"/>
        <param argument="--trna_path" type="data" format="tabular" optional="true" label="tRNA file" help="Produced by the DRAM annotate tool (optional, leave blank to ignore)"/>
        <param argument="--groupby_column" type="text" value="fasta" label="Column from annotations to group as organism units">
            <expand macro="sanitizer"/>
        </param>
        <expand macro="custom_distillate_param"/>
        <param argument="--distillate_gene_names" type="boolean" truevalue="--distillate_gene_names" falsevalue="" checked="false" label="Give names of genes instead of counts in genome metabolism summary?"/>
        <param argument="--genomes_per_product" type="integer" value="1000" min="0" label="Number of genomes per product.html output"/>
    </inputs>
    <outputs>
        <data name="output_genome_stats" format="tabular" label="${tool.name} on ${on_string}: genome stats"/>
        <data name="output_metabolism_summary" format="xlsx" label="${tool.name} on ${on_string}: metabolism summary"/>
        <data name="output_product_html" format="html" label="${tool.name} on ${on_string}: product html"/>
        <data name="output_product_tsv" format="tabular" label="${tool.name} on ${on_string}: product_tsv"/>
    </outputs>
    <tests>
        <!--
        These tests succeed, but the outputs are empty because the DRAM
        database is not available in the test environment. This warning
        is displayed in the log...
        warnings.warn('Database does not exist at path %s' % self.description_loc)
        ...and this exception is raised:
        ValueError: Invalid file path or buffer object type: <class 'NoneType'
        -->
        <test expect_failure="true">
            <param name="input_file" ftype="tabular" value="annotated1.tabular"/>
            <param name="rrna_path" ftype="tabular" value="input_distill_rrna1.tabular"/>
            <param name="trna_path" ftype="tabular" value="input_distill_trna1.tabular"/>
            <param name="custom_distillate" ftype="tabular" value="distill_custom.tabular"/>
            <assert_stderr>
                <has_text text="Traceback"/>
            </assert_stderr>
        </test>
    </tests>
    <help>
**What it does**

@WHATITDOESHEADER@

This tool accepts a tabular file with all gene annotations from Pfam, UniProt, dbCAN and MEROPS databases produced by the
DRAM annotate tool and curates them into useful functional categories, creating genome statistics and metabolism summary
files.  The genome statistics provides most genome quality information required for MIMAG standards, including GTDB-tk
and checkM information if provided by the user. The summarised metabolism table includes the number of genes with specific
metabolic function identifiers (KO, CAZY ID, etc) for each genome with information obtained from multiple databases.

@CUSTOMDISTILLATEFILES@

This tool produces the following outputs.

* **Distillate**

  * **genome stats** - a tabular file with genome statistics for all input genomes including all statistics required by recently defined MIMAG standards
  * **metabolism summary** - an Excel file with metabolism summary of all input genomes which gives gene counts of functional and structural genes across a wide variety of metabolisms

* **Liquor**

  * **product html** - an html file containing an interactive heatmap showing coverage of pathways, the coverage of electron transport chain components and the presence of selected metabolic functions
  * **product tsv** - a tabular file with corresponding genes for the html heatmap

@WHATITDOESFOOTER@
    </help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Fri, 23 Aug 2024 08:18:06 +0000
parents	9a0944d08a99
children