Mercurial > repos > ufz > iphop_predict
view iphop.xml @ 1:d357350b6da0 draft default tip
planemo upload for repository https://github.com/Helmholtz-UFZ/ufz-galaxy-tools/blob/main/tools/iphop/ commit c792f1b2671d363ce34398d8886702d356d01e7f
| author | ufz |
|---|---|
| date | Thu, 08 May 2025 11:23:48 +0000 |
| parents | b6dba8fabeb2 |
| children |
line wrap: on
line source
<tool id="iphop_predict" name="iPHoP predict" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0" license="MIT"> <description>host of input bacteriophage/archaeal virus genomes</description> <macros> <token name="@TOOL_VERSION@">1.3.3</token> <token name="@VERSION_SUFFIX@">0</token> </macros> <xrefs> <xref type="bio.tools">iphop</xref> </xrefs> <requirements> <requirement type="package" version="@TOOL_VERSION@">iphop</requirement> </requirements> <version_command><![CDATA[iphop --version]]></version_command> <command detect_errors="exit_code"><![CDATA[ iphop predict --fa_file '$fa_file' --out_dir output/ --db_dir '$db_dir.fields.path' --num_threads "\${GALAXY_SLOTS:-1}" --min_score $min_score && mv 'output/Host_prediction_to_genome_m${min_score}.csv' output/Host_prediction_to_genome.csv && mv 'output/Host_prediction_to_genus_m${min_score}.csv' "output/Host_prediction_to_genus.csv" ]]></command> <inputs> <param argument="--fa_file" type="data" format="fasta" label="FASTA formatted virus sequences"/> <param argument="--db_dir" type="select" label="Reference database"> <options from_data_table="iphop"> <validator type="no_options" message="No reference data available. Contact your Galaxy admin."/> </options> </param> <param argument="--min_score" type="integer" value="90" min="75" max="100" label="Minimum confidence score" help="Minimum confidence score for final output. Default of 90 corresponds to ~10% FDR (roughly). For ~5% overall (roughly) estimated FDR, use a minimum score of 95, etc."/> <param argument="--no_qc" type="boolean" truevalue="--no_qc" falsevalue="" label="Bypass the automated QC" help="Bypass the automated QC that filters out input sequences with > 10% Ns or with characters other than ATCGN. Warning: if set to true, low-quality sequences (with Ns or unusual characters) may lead to unexpected failures at multiple steps. (default = False)"/> </inputs> <outputs> <data name="detailed_per_tool" format="csv" from_work_dir="output/Detailed_output_by_tool.csv" label="${tool.name} on ${on_string}: Detailed per tool"/> <data name="host_genome" format="csv" from_work_dir="output/Host_prediction_to_genome.csv" label="${tool.name} on ${on_string}: Host prediction to genome"/> <data name="host_genus" format="csv" from_work_dir="output/Host_prediction_to_genus.csv" label="${tool.name} on ${on_string}: Host prediction to genus"/> </outputs> <tests> <!-- <test> <param name="fa_file" value="test_input_phages.fna" ftype="fasta"/> <param name="db_dir" value="db2"/> <output name="detailed_per_tool"> <assert_contents> <has_n_lines n="190"/> <has_n_columns n="10" sep="," comment="#"/> </assert_contents> </output> <output name="host_genome"> <assert_contents> <has_line line="Virus,Host genome,Host taxonomy,Main method,Confidence score,Additional methods"/> <has_n_lines n="119"/> <has_n_columns n="6" sep=","/> </assert_contents> </output> <output name="host_genus"> <assert_contents> <has_line line="Virus,AAI to closest RaFAH reference,Host genus,Confidence score,List of methods"/> <has_n_lines n="6"/> <has_n_columns n="5" sep=","/> </assert_contents> </output> </test> --> </tests> <help><![CDATA[ iPHoP stands for integrated Phage Host Prediction. It is an automated command-line pipeline for predicting host genus of novel bacteriophages and archaeoviruses based on their genome sequences. Usage ..... **Input** Virus sequences in FASTA format. **Output** Host prediction to genus - contains integrated results from host-based and phage-based tools at the host genus level - lists for each prediction - the virus sequence ID, - the level of amino-acid similarity (AAI) between the query and the genomes in the RaFAH phage database, - the predicted host genus, - the confidence score calculated from all tools, and - the list of scores for individual classifiers obtained for this virus-host pair. - for the detailed score by classifier, "RaFAH" represents the score derived from RaFAH (https://pubmed.ncbi.nlm.nih.gov/34286299/), iPHoP-RF is the score derived from all host-based tools, CRISPR the score derived only from CRISPR hits, and blast the score derived only from blastn hits - all virus-host pairs for which the confidence score is higher than the selected score cutoff (default = 90) are included, so each virus may be associated with multiple predictions - when multiple predictions are available for a query virus, typical standard practices is to use the one with the highest score Host prediction to genome - contains integrated results from host-based tools only (i.e., no RaFAH) at the host genome representative level - lists for each host-based prediction - the virus sequence ID, - the representative host genome ID, - the corresponding host genome taxonomy, - the main method supporting this prediction (i.e., highest score), - the confidence score for this main method, and - the list of additional methods and scores obtained for this virus-host pair. Detailed by tool - This output files lists the 5 best hits for each method for each input virus - When no hits were obtained, the corresponding method is not listed in this output file for the query virus. ]]></help> <citations> <citation type="doi">10.1371/journal.pbio.3002083</citation> </citations> </tool>
