diff iphop.xml @ 0:b6dba8fabeb2 draft

planemo upload for repository https://github.com/Helmholtz-UFZ/ufz-galaxy-tools/blob/main/tools/iphop/ commit d99e4d279594b887549d2cea9b1f452c9b1b89c8
author ufz
date Mon, 14 Oct 2024 14:56:45 +0000
parents
children d357350b6da0
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/iphop.xml	Mon Oct 14 14:56:45 2024 +0000
@@ -0,0 +1,116 @@
+<tool id="iphop_predict" name="iPHopP predict" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0" license="MIT">
+    <description>host of input bacteriophage/archaeal virus genomes</description>
+    <macros>
+        <token name="@TOOL_VERSION@">1.3.3</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <xrefs>
+        <xref type="bio.tools">iphop</xref>
+    </xrefs>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">iphop</requirement>
+    </requirements>
+    <version_command><![CDATA[iphop --version]]></version_command>
+    <command detect_errors="exit_code"><![CDATA[
+        iphop predict
+            --fa_file '$fa_file'
+            --out_dir output/
+            --db_dir '$db_dir.fields.path'
+            --num_threads "\${GALAXY_SLOTS:-1}"
+            --min_score $min_score &&
+        mv 'output/Host_prediction_to_genome_m${min_score}.csv' output/Host_prediction_to_genome.csv &&
+        mv 'output/Host_prediction_to_genus_m${min_score}.csv' "output/Host_prediction_to_genus.csv"
+    ]]></command>
+    <inputs>
+        <param argument="--fa_file" type="data" format="fasta" label="FASTA formatted virus sequences"/>
+        <param argument="--db_dir" type="select" label="Reference database">
+            <options from_data_table="iphop">
+                <validator type="no_options" message="No reference data available. Contact your Galaxy admin."/>
+            </options>
+        </param>
+        <param argument="--min_score" type="integer" value="90" min="75" max="100" label="Minimum confidence score" help="Minimum confidence score for final output. Default of 90 corresponds to ~10% FDR (roughly). For ~5% overall (roughly) estimated FDR, use a minimum score of 95, etc."/>
+        <param argument="--no_qc" type="boolean" truevalue="--no_qc" falsevalue="" label="Bypass the automated QC" help="Bypass the automated QC that filters out input sequences with > 10% Ns or with characters other than ATCGN. Warning: if set to true, low-quality sequences (with Ns or unusual characters) may lead to unexpected failures at multiple steps. (default = False)"/>
+    </inputs>
+    <outputs>
+        <data name="detailed_per_tool" format="csv" from_work_dir="output/Detailed_output_by_tool.csv" label="${tool.name} on ${on_string}: Detailed per tool"/>
+        <data name="host_genome" format="csv" from_work_dir="output/Host_prediction_to_genome.csv" label="${tool.name} on ${on_string}: Host prediction to genome"/>
+        <data name="host_genus" format="csv" from_work_dir="output/Host_prediction_to_genus.csv" label="${tool.name} on ${on_string}: Host prediction to genus"/>
+    </outputs>
+    <tests>
+        <!-- <test>
+            <param name="fa_file" value="test_input_phages.fna" ftype="fasta"/>
+            <param name="db_dir" value="db2"/>
+            <output name="detailed_per_tool">
+                <assert_contents>
+                    <has_n_lines n="190"/>
+                    <has_n_columns n="10" sep="," comment="#"/>
+                </assert_contents>
+            </output>
+            <output name="host_genome">
+                <assert_contents>
+                    <has_line line="Virus,Host genome,Host taxonomy,Main method,Confidence score,Additional methods"/>
+                    <has_n_lines n="119"/>
+                    <has_n_columns n="6" sep=","/>
+                </assert_contents>
+            </output>
+            <output name="host_genus">
+                <assert_contents>
+                    <has_line line="Virus,AAI to closest RaFAH reference,Host genus,Confidence score,List of methods"/>
+                    <has_n_lines n="6"/>
+                    <has_n_columns n="5" sep=","/>
+                </assert_contents>
+            </output>
+        </test> -->
+    </tests>
+    <help><![CDATA[
+
+iPHoP stands for integrated Phage Host Prediction. It is an automated command-line pipeline for predicting host genus of novel bacteriophages and archaeoviruses based on their genome sequences.
+
+Usage
+.....
+
+
+**Input**
+
+Virus sequences in FASTA format.
+
+**Output**
+
+Host prediction to genus
+
+- contains integrated results from host-based and phage-based tools at the host genus level
+- lists for each prediction
+
+  - the virus sequence ID,
+  - the level of amino-acid similarity (AAI) between the query and the genomes in the RaFAH phage database,
+  - the predicted host genus,
+  - the confidence score calculated from all tools, and
+  - the list of scores for individual classifiers obtained for this virus-host pair.
+
+- for the detailed score by classifier, "RaFAH" represents the score derived from RaFAH (https://pubmed.ncbi.nlm.nih.gov/34286299/), iPHoP-RF is the score derived from all host-based tools, CRISPR the score derived only from CRISPR hits, and blast the score derived only from blastn hits
+- all virus-host pairs for which the confidence score is higher than the selected score cutoff (default = 90) are included, so each virus may be associated with multiple predictions
+- when multiple predictions are available for a query virus, typical standard practices is to use the one with the highest score
+
+Host prediction to genome
+
+- contains integrated results from host-based tools only (i.e., no RaFAH) at the host genome representative level
+- lists for each host-based prediction
+
+  - the virus sequence ID,
+  - the representative host genome ID,
+  - the corresponding host genome taxonomy,
+  - the main method supporting this prediction (i.e., highest score),
+  - the confidence score for this main method, and
+  - the list of additional methods and scores obtained for this virus-host pair.
+
+Detailed by tool
+
+
+- This output files lists the 5 best hits for each method for each input virus
+- When no hits were obtained, the corresponding method is not listed in this output file for the query virus.
+
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1371/journal.pbio.3002083</citation>
+    </citations>
+</tool>