Mercurial > repos > ufz > iphop_predict
diff iphop.xml @ 0:b6dba8fabeb2 draft
planemo upload for repository https://github.com/Helmholtz-UFZ/ufz-galaxy-tools/blob/main/tools/iphop/ commit d99e4d279594b887549d2cea9b1f452c9b1b89c8
| author | ufz |
|---|---|
| date | Mon, 14 Oct 2024 14:56:45 +0000 |
| parents | |
| children | d357350b6da0 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/iphop.xml Mon Oct 14 14:56:45 2024 +0000 @@ -0,0 +1,116 @@ +<tool id="iphop_predict" name="iPHopP predict" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0" license="MIT"> + <description>host of input bacteriophage/archaeal virus genomes</description> + <macros> + <token name="@TOOL_VERSION@">1.3.3</token> + <token name="@VERSION_SUFFIX@">0</token> + </macros> + <xrefs> + <xref type="bio.tools">iphop</xref> + </xrefs> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">iphop</requirement> + </requirements> + <version_command><![CDATA[iphop --version]]></version_command> + <command detect_errors="exit_code"><![CDATA[ + iphop predict + --fa_file '$fa_file' + --out_dir output/ + --db_dir '$db_dir.fields.path' + --num_threads "\${GALAXY_SLOTS:-1}" + --min_score $min_score && + mv 'output/Host_prediction_to_genome_m${min_score}.csv' output/Host_prediction_to_genome.csv && + mv 'output/Host_prediction_to_genus_m${min_score}.csv' "output/Host_prediction_to_genus.csv" + ]]></command> + <inputs> + <param argument="--fa_file" type="data" format="fasta" label="FASTA formatted virus sequences"/> + <param argument="--db_dir" type="select" label="Reference database"> + <options from_data_table="iphop"> + <validator type="no_options" message="No reference data available. Contact your Galaxy admin."/> + </options> + </param> + <param argument="--min_score" type="integer" value="90" min="75" max="100" label="Minimum confidence score" help="Minimum confidence score for final output. Default of 90 corresponds to ~10% FDR (roughly). For ~5% overall (roughly) estimated FDR, use a minimum score of 95, etc."/> + <param argument="--no_qc" type="boolean" truevalue="--no_qc" falsevalue="" label="Bypass the automated QC" help="Bypass the automated QC that filters out input sequences with > 10% Ns or with characters other than ATCGN. Warning: if set to true, low-quality sequences (with Ns or unusual characters) may lead to unexpected failures at multiple steps. (default = False)"/> + </inputs> + <outputs> + <data name="detailed_per_tool" format="csv" from_work_dir="output/Detailed_output_by_tool.csv" label="${tool.name} on ${on_string}: Detailed per tool"/> + <data name="host_genome" format="csv" from_work_dir="output/Host_prediction_to_genome.csv" label="${tool.name} on ${on_string}: Host prediction to genome"/> + <data name="host_genus" format="csv" from_work_dir="output/Host_prediction_to_genus.csv" label="${tool.name} on ${on_string}: Host prediction to genus"/> + </outputs> + <tests> + <!-- <test> + <param name="fa_file" value="test_input_phages.fna" ftype="fasta"/> + <param name="db_dir" value="db2"/> + <output name="detailed_per_tool"> + <assert_contents> + <has_n_lines n="190"/> + <has_n_columns n="10" sep="," comment="#"/> + </assert_contents> + </output> + <output name="host_genome"> + <assert_contents> + <has_line line="Virus,Host genome,Host taxonomy,Main method,Confidence score,Additional methods"/> + <has_n_lines n="119"/> + <has_n_columns n="6" sep=","/> + </assert_contents> + </output> + <output name="host_genus"> + <assert_contents> + <has_line line="Virus,AAI to closest RaFAH reference,Host genus,Confidence score,List of methods"/> + <has_n_lines n="6"/> + <has_n_columns n="5" sep=","/> + </assert_contents> + </output> + </test> --> + </tests> + <help><![CDATA[ + +iPHoP stands for integrated Phage Host Prediction. It is an automated command-line pipeline for predicting host genus of novel bacteriophages and archaeoviruses based on their genome sequences. + +Usage +..... + + +**Input** + +Virus sequences in FASTA format. + +**Output** + +Host prediction to genus + +- contains integrated results from host-based and phage-based tools at the host genus level +- lists for each prediction + + - the virus sequence ID, + - the level of amino-acid similarity (AAI) between the query and the genomes in the RaFAH phage database, + - the predicted host genus, + - the confidence score calculated from all tools, and + - the list of scores for individual classifiers obtained for this virus-host pair. + +- for the detailed score by classifier, "RaFAH" represents the score derived from RaFAH (https://pubmed.ncbi.nlm.nih.gov/34286299/), iPHoP-RF is the score derived from all host-based tools, CRISPR the score derived only from CRISPR hits, and blast the score derived only from blastn hits +- all virus-host pairs for which the confidence score is higher than the selected score cutoff (default = 90) are included, so each virus may be associated with multiple predictions +- when multiple predictions are available for a query virus, typical standard practices is to use the one with the highest score + +Host prediction to genome + +- contains integrated results from host-based tools only (i.e., no RaFAH) at the host genome representative level +- lists for each host-based prediction + + - the virus sequence ID, + - the representative host genome ID, + - the corresponding host genome taxonomy, + - the main method supporting this prediction (i.e., highest score), + - the confidence score for this main method, and + - the list of additional methods and scores obtained for this virus-host pair. + +Detailed by tool + + +- This output files lists the 5 best hits for each method for each input virus +- When no hits were obtained, the corresponding method is not listed in this output file for the query virus. + + ]]></help> + <citations> + <citation type="doi">10.1371/journal.pbio.3002083</citation> + </citations> +</tool>
