view iphop.xml @ 1:d357350b6da0 draft default tip

planemo upload for repository https://github.com/Helmholtz-UFZ/ufz-galaxy-tools/blob/main/tools/iphop/ commit c792f1b2671d363ce34398d8886702d356d01e7f
author ufz
date Thu, 08 May 2025 11:23:48 +0000
parents b6dba8fabeb2
children
line wrap: on
line source

<tool id="iphop_predict" name="iPHoP predict" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0" license="MIT">
    <description>host of input bacteriophage/archaeal virus genomes</description>
    <macros>
        <token name="@TOOL_VERSION@">1.3.3</token>
        <token name="@VERSION_SUFFIX@">0</token>
    </macros>
    <xrefs>
        <xref type="bio.tools">iphop</xref>
    </xrefs>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">iphop</requirement>
    </requirements>
    <version_command><![CDATA[iphop --version]]></version_command>
    <command detect_errors="exit_code"><![CDATA[
        iphop predict
            --fa_file '$fa_file'
            --out_dir output/
            --db_dir '$db_dir.fields.path'
            --num_threads "\${GALAXY_SLOTS:-1}"
            --min_score $min_score &&
        mv 'output/Host_prediction_to_genome_m${min_score}.csv' output/Host_prediction_to_genome.csv &&
        mv 'output/Host_prediction_to_genus_m${min_score}.csv' "output/Host_prediction_to_genus.csv"
    ]]></command>
    <inputs>
        <param argument="--fa_file" type="data" format="fasta" label="FASTA formatted virus sequences"/>
        <param argument="--db_dir" type="select" label="Reference database">
            <options from_data_table="iphop">
                <validator type="no_options" message="No reference data available. Contact your Galaxy admin."/>
            </options>
        </param>
        <param argument="--min_score" type="integer" value="90" min="75" max="100" label="Minimum confidence score" help="Minimum confidence score for final output. Default of 90 corresponds to ~10% FDR (roughly). For ~5% overall (roughly) estimated FDR, use a minimum score of 95, etc."/>
        <param argument="--no_qc" type="boolean" truevalue="--no_qc" falsevalue="" label="Bypass the automated QC" help="Bypass the automated QC that filters out input sequences with > 10% Ns or with characters other than ATCGN. Warning: if set to true, low-quality sequences (with Ns or unusual characters) may lead to unexpected failures at multiple steps. (default = False)"/>
    </inputs>
    <outputs>
        <data name="detailed_per_tool" format="csv" from_work_dir="output/Detailed_output_by_tool.csv" label="${tool.name} on ${on_string}: Detailed per tool"/>
        <data name="host_genome" format="csv" from_work_dir="output/Host_prediction_to_genome.csv" label="${tool.name} on ${on_string}: Host prediction to genome"/>
        <data name="host_genus" format="csv" from_work_dir="output/Host_prediction_to_genus.csv" label="${tool.name} on ${on_string}: Host prediction to genus"/>
    </outputs>
    <tests>
        <!-- <test>
            <param name="fa_file" value="test_input_phages.fna" ftype="fasta"/>
            <param name="db_dir" value="db2"/>
            <output name="detailed_per_tool">
                <assert_contents>
                    <has_n_lines n="190"/>
                    <has_n_columns n="10" sep="," comment="#"/>
                </assert_contents>
            </output>
            <output name="host_genome">
                <assert_contents>
                    <has_line line="Virus,Host genome,Host taxonomy,Main method,Confidence score,Additional methods"/>
                    <has_n_lines n="119"/>
                    <has_n_columns n="6" sep=","/>
                </assert_contents>
            </output>
            <output name="host_genus">
                <assert_contents>
                    <has_line line="Virus,AAI to closest RaFAH reference,Host genus,Confidence score,List of methods"/>
                    <has_n_lines n="6"/>
                    <has_n_columns n="5" sep=","/>
                </assert_contents>
            </output>
        </test> -->
    </tests>
    <help><![CDATA[

iPHoP stands for integrated Phage Host Prediction. It is an automated command-line pipeline for predicting host genus of novel bacteriophages and archaeoviruses based on their genome sequences.

Usage
.....


**Input**

Virus sequences in FASTA format.

**Output**

Host prediction to genus

- contains integrated results from host-based and phage-based tools at the host genus level
- lists for each prediction

  - the virus sequence ID,
  - the level of amino-acid similarity (AAI) between the query and the genomes in the RaFAH phage database,
  - the predicted host genus,
  - the confidence score calculated from all tools, and
  - the list of scores for individual classifiers obtained for this virus-host pair.

- for the detailed score by classifier, "RaFAH" represents the score derived from RaFAH (https://pubmed.ncbi.nlm.nih.gov/34286299/), iPHoP-RF is the score derived from all host-based tools, CRISPR the score derived only from CRISPR hits, and blast the score derived only from blastn hits
- all virus-host pairs for which the confidence score is higher than the selected score cutoff (default = 90) are included, so each virus may be associated with multiple predictions
- when multiple predictions are available for a query virus, typical standard practices is to use the one with the highest score

Host prediction to genome

- contains integrated results from host-based tools only (i.e., no RaFAH) at the host genome representative level
- lists for each host-based prediction

  - the virus sequence ID,
  - the representative host genome ID,
  - the corresponding host genome taxonomy,
  - the main method supporting this prediction (i.e., highest score),
  - the confidence score for this main method, and
  - the list of additional methods and scores obtained for this virus-host pair.

Detailed by tool


- This output files lists the 5 best hits for each method for each input virus
- When no hits were obtained, the corresponding method is not listed in this output file for the query virus.

    ]]></help>
    <citations>
        <citation type="doi">10.1371/journal.pbio.3002083</citation>
    </citations>
</tool>