diff deeparg_predict.xml @ 0:3953324f6d31 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deeparg commit 0f935507a23e554a7d9e181a278a00440865c3ba
author iuc
date Fri, 14 Feb 2025 11:09:55 +0000 (2 months ago)
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deeparg_predict.xml	Fri Feb 14 11:09:55 2025 +0000
@@ -0,0 +1,130 @@
+<tool id="deeparg_predict" name="DeepARG predict" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Antibiotic Resistance Genes (ARGs) from metagenomes</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="xrefs"/>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+##Used only for test 
+#if str($hide_db_build) == 'true':
+    deeparg download_data -o '$deeparg_db.fields.path' &&
+#end if
+##
+    mkdir -p deeparg_predict_output &&
+    deeparg predict
+        --model '$model'
+        -i '$input'
+        -o 'deeparg_predict_output/deeparg_predict'
+        -d '$deeparg_db.fields.path'
+        --type '$type'
+        --min-prob $min_prob
+        --arg-alignment-identity $arg_alignment_identity
+        --arg-alignment-evalue $arg_alignment_evalue
+        --arg-alignment-overlap $arg_alignment_overlap
+        --arg-num-alignments-per-entry $arg_num_alignments_per_entry 
+]]></command>
+    <inputs>
+        <!-- used only for tests, as the deeparg database contains large files that cannot be deleted or reduced. -->
+        <param name="hide_db_build" type="hidden" value=""/>
+        <!-- -->
+        <param name="input" type="data" format="fasta" label="Input file"/>
+        <param name="deeparg_db" type="select" label="DeepARG database">
+            <options from_data_table="deeparg">
+                <filter type="static_value" value="@TOOL_VERSION@" column="db_version"/>
+                <validator message="No deeparg database is available" type="no_options"/>
+            </options>
+        </param>
+        <param argument="--model" type="select" label="Select model to use">
+            <option value="SS" selected="true">SS (short sequences for reads)</option>
+            <option value="LS">LS (long sequences for genes)</option>
+        </param>
+        <param argument="--type" type="select" label="Molecular data type">
+            <option value="nucl" selected="true">Nucleotid (default)</option>
+            <option value="prot">Protein</option>
+        </param>
+        <param argument="--min-prob" type="float" min="0" max="1" value="0.8" label="Minimum probability cutoff [Default: 0.8]"/>
+        <param argument="--arg-alignment-identity" type="integer" min="0" value="50" label="Identity cutoff for sequence alignment [Default: 50]"/>
+        <param argument="--arg-alignment-evalue" type="float" min="0" value="1e-10" label="Evalue cutoff [Default: 1e-10]"/>
+        <param argument="--arg-alignment-overlap" type="float" min="0" max="1" value="0.8" label="Alignment read overlap [Default: 0.8]"/>
+        <param argument="--arg-num-alignments-per-entry" type="integer" min="0" value="1000" label="Diamond, minimum number of alignments per entry [Default: 1000]"/>
+        <section name="output_files" title="Selection of the output files">
+            <param name="output_selection" type="select" label="Output files selection" display="checkboxes" multiple="true">
+                <option value="file_ARG_tsv" selected="true">ARG detected with prob higher or equal to --prob in TSV</option>
+                <option value="file_potential_ARG_tsv" selected="true">ARG detected with prob below --prob in TSV</option>
+                <option value="file_all_hits_tsv" selected="false">All hits detected in TSV</option>
+            </param>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="output_mapping_ARG" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.mapping.ARG" label="${tool.name} on ${on_string} : ARG detected (prob higher or equal to --prob)">
+            <filter>output_files['output_selection'] and "file_ARG_tsv" in output_files['output_selection']</filter>
+        </data>
+        <data name="output_mapping_potential_ARG" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.mapping.potential.ARG" label="${tool.name} on ${on_string} : Potential ARG (prob below --prob)">
+            <filter>output_files['output_selection'] and "file_potential_ARG_tsv" in output_files['output_selection']</filter>
+        </data>
+        <data name="output_all_hits" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.align.daa.tsv" label="${tool.name} on ${on_string} : all hits detected">
+            <filter>output_files['output_selection'] and "file_all_hits_tsv" in output_files['output_selection']</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- Test 1 -->
+        <test expect_num_outputs="3">
+            <param name="hide_db_build" value="true"/>
+            <param name="input" value="ORFs.fa" ftype="fasta"/>
+            <param name="deeparg_db" value="deeparg_1.0.4-19122024"/>
+            <param name="model" value="SS"/>
+            <param name="type" value="nucl"/>
+            <section name="output_files">
+                <param name="output_selection" value="file_ARG_tsv,file_potential_ARG_tsv,file_all_hits_tsv"/>
+            </section>
+            <output name="output_mapping_ARG" ftype="tabular">
+                <assert_contents>
+                    <has_text text="YP_003283625.1|FEATURES|tet(K)|tetracycline|tet(K)"/>
+                    <has_text text="RPOB2"/>
+                </assert_contents>
+            </output>
+            <output name="output_mapping_potential_ARG" ftype="tabular">
+                <assert_contents>
+                    <has_text text="gi:545254650:ref:WP_021551023.1:|FEATURES|mdtB|multidrug|mdtB"/>
+                    <has_text text="MUXB"/>
+                </assert_contents>
+            </output>
+            <output name="output_all_hits" ftype="tabular">
+                <assert_contents>
+                    <has_size value="226000" delta="10000"/>
+                    <has_text text="ADV91011.1|FEATURES|RbpA|rifamycin|RbpA"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+DeepARG Predict is a computational tool designed to classify and annotate antibiotic resistance genes (ARGs) from nucleotide or protein sequences
+
+It takes as input a **fasta nucleotide or protein file** containing short (SS model) or long (LS model) sequences
+
+DeepARG output
+---------------
+
+DeepARG generates two main files: .ARG that contains the sequences with a probability sup or = --prob (0.8 default) and .potential.ARG with sequences containing a probability inf to --prob (0.8 default). The .potential.ARG file can still contain ARG-like sequences, howevere, it is necessary inspect its sequences
+
+The output format for both files consists of the following fields:
+
+* ARG_NAME
+* QUERY_START
+* QUERY_END
+* QUERY_ID
+* PREDICTED_ARG_CLASS
+* BEST_HIT_FROM_DATABASE
+* PREDICTION_PROBABILITY
+* ALIGNMENT_BESTHIT_IDENTITY (%)
+* ALIGNMENT_BESTHIT_LENGTH
+* ALIGNMENT_BESTHIT_BITSCORE
+* ALIGNMENT_BESTHIT_EVALUE
+* COUNTS
+
+If you want to annotate paired-end short read sequencing data use the DeepARG Short Reads tool
+
+    </help>
+    <expand macro="citations"/>
+</tool>