Mercurial > repos > iuc > deeparg_predict

<tool id="deeparg_predict" name="DeepARG predict" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Antibiotic Resistance Genes (ARGs) from metagenomes</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="xrefs"/>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
##Used only for test
#if str($hide_db_build) == 'true':
    deeparg download_data -o '$deeparg_db.fields.path' &&
#end if
##
    mkdir -p deeparg_predict_output &&
    deeparg predict
        --model '$model'
        -i '$input'
        -o 'deeparg_predict_output/deeparg_predict'
        -d '$deeparg_db.fields.path'
        --type '$type'
        --min-prob $min_prob
        --arg-alignment-identity $arg_alignment_identity
        --arg-alignment-evalue $arg_alignment_evalue
        --arg-alignment-overlap $arg_alignment_overlap
        --arg-num-alignments-per-entry $arg_num_alignments_per_entry
]]></command>
    <inputs>
        <!-- used only for tests, as the deeparg database contains large files that cannot be deleted or reduced. -->
        <param name="hide_db_build" type="hidden" value=""/>
        <!-- -->
        <param name="input" type="data" format="fasta" label="Input file"/>
        <param name="deeparg_db" type="select" label="DeepARG database">
            <options from_data_table="deeparg">
                <filter type="static_value" value="@TOOL_VERSION@" column="db_version"/>
                <validator message="No deeparg database is available" type="no_options"/>
            </options>
        </param>
        <param argument="--model" type="select" label="Select model to use">
            <option value="SS" selected="true">SS (short sequences for reads)</option>
            <option value="LS">LS (long sequences for genes)</option>
        </param>
        <param argument="--type" type="select" label="Molecular data type">
            <option value="nucl" selected="true">Nucleotid (default)</option>
            <option value="prot">Protein</option>
        </param>
        <param argument="--min-prob" type="float" min="0" max="1" value="0.8" label="Minimum probability cutoff [Default: 0.8]"/>
        <param argument="--arg-alignment-identity" type="integer" min="0" value="50" label="Identity cutoff for sequence alignment [Default: 50]"/>
        <param argument="--arg-alignment-evalue" type="float" min="0" value="1e-10" label="Evalue cutoff [Default: 1e-10]"/>
        <param argument="--arg-alignment-overlap" type="float" min="0" max="1" value="0.8" label="Alignment read overlap [Default: 0.8]"/>
        <param argument="--arg-num-alignments-per-entry" type="integer" min="0" value="1000" label="Diamond, minimum number of alignments per entry [Default: 1000]"/>
        <section name="output_files" title="Selection of the output files">
            <param name="output_selection" type="select" label="Output files selection" display="checkboxes" multiple="true">
                <option value="file_ARG_tsv" selected="true">ARG detected with prob higher or equal to --prob in TSV</option>
                <option value="file_potential_ARG_tsv" selected="true">ARG detected with prob below --prob in TSV</option>
                <option value="file_all_hits_tsv" selected="false">All hits detected in TSV</option>
            </param>
        </section>
    </inputs>
    <outputs>
        <data name="output_mapping_ARG" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.mapping.ARG" label="${tool.name} on ${on_string} : ARG detected (prob higher or equal to --prob)">
            <filter>output_files['output_selection'] and "file_ARG_tsv" in output_files['output_selection']</filter>
        </data>
        <data name="output_mapping_potential_ARG" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.mapping.potential.ARG" label="${tool.name} on ${on_string} : Potential ARG (prob below --prob)">
            <filter>output_files['output_selection'] and "file_potential_ARG_tsv" in output_files['output_selection']</filter>
        </data>
        <data name="output_all_hits" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.align.daa.tsv" label="${tool.name} on ${on_string} : all hits detected">
            <filter>output_files['output_selection'] and "file_all_hits_tsv" in output_files['output_selection']</filter>
        </data>
    </outputs>
    <tests>
        <!-- Test 1 -->
        <test expect_num_outputs="3">
            <param name="hide_db_build" value="true"/>
            <param name="input" value="ORFs.fa" ftype="fasta"/>
            <param name="deeparg_db" value="deeparg_1.0.4-19122024"/>
            <param name="model" value="SS"/>
            <param name="type" value="nucl"/>
            <section name="output_files">
                <param name="output_selection" value="file_ARG_tsv,file_potential_ARG_tsv,file_all_hits_tsv"/>
            </section>
            <output name="output_mapping_ARG" ftype="tabular">
                <assert_contents>
                    <has_text text="YP_003283625.1|FEATURES|tet(K)|tetracycline|tet(K)"/>
                    <has_text text="RPOB2"/>
                </assert_contents>
            </output>
            <output name="output_mapping_potential_ARG" ftype="tabular">
                <assert_contents>
                    <has_text text="gi:545254650:ref:WP_021551023.1:|FEATURES|mdtB|multidrug|mdtB"/>
                    <has_text text="MUXB"/>
                </assert_contents>
            </output>
            <output name="output_all_hits" ftype="tabular">
                <assert_contents>
                    <has_size value="226000" delta="10000"/>
                    <has_text text="ADV91011.1|FEATURES|RbpA|rifamycin|RbpA"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help>
DeepARG Predict is a computational tool designed to classify and annotate antibiotic resistance genes (ARGs) from nucleotide or protein sequences

It takes as input a **fasta nucleotide or protein file** containing short (SS model) or long (LS model) sequences

DeepARG output
---------------

DeepARG generates two main files: .ARG that contains the sequences with a probability sup or = --prob (0.8 default) and .potential.ARG with sequences containing a probability inf to --prob (0.8 default). The .potential.ARG file can still contain ARG-like sequences, howevere, it is necessary inspect its sequences

The output format for both files consists of the following fields:

* ARG_NAME
* QUERY_START
* QUERY_END
* QUERY_ID
* PREDICTED_ARG_CLASS
* BEST_HIT_FROM_DATABASE
* PREDICTION_PROBABILITY
* ALIGNMENT_BESTHIT_IDENTITY (%)
* ALIGNMENT_BESTHIT_LENGTH
* ALIGNMENT_BESTHIT_BITSCORE
* ALIGNMENT_BESTHIT_EVALUE
* COUNTS

If you want to annotate paired-end short read sequencing data use the DeepARG Short Reads tool

    </help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Fri, 27 Jun 2025 12:41:35 +0000
parents	3953324f6d31
children