Mercurial > repos > iuc > virhunter
changeset 2:ea2cccb9f73e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/VirHunter commit c3685ed6a70b47012b62b95a2a3db062bd3b7475
author | iuc |
---|---|
date | Thu, 05 Jan 2023 14:27:54 +0000 |
parents | 9b12bc1b1e2c |
children | 302332b914ef |
files | macros.xml predict.py tool-data/virhunter.loc.sample virhunter.xml |
diffstat | 4 files changed, 19 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Wed Nov 30 17:31:52 2022 +0000 +++ b/macros.xml Thu Jan 05 14:27:54 2023 +0000 @@ -1,6 +1,6 @@ <macros> <token name="@TOOL_VERSION@">1.0.0</token> - <token name="@VERSION_SUFFIX@">1</token> + <token name="@VERSION_SUFFIX@">2</token> <xml name="requirements"> <requirements> <requirement type="package" version="1.23.3">numpy</requirement>
--- a/predict.py Wed Nov 30 17:31:52 2022 +0000 +++ b/predict.py Thu Jan 05 14:27:54 2023 +0000 @@ -104,7 +104,7 @@ df.groupby(["id", "length", 'RF_decision'], sort=False).size().unstack(fill_value=0) ) df = df.reset_index() - df = df.reindex(['length', 'id', 'virus', 'plant', 'bacteria'], axis=1) + df = df.reindex(['length', 'id', 'virus', 'plant', 'bacteria'], axis=1).fillna(value=0) conditions = [ (df['virus'] > df['plant']) & (df['virus'] > df['bacteria']), (df['plant'] > df['virus']) & (df['plant'] > df['bacteria']),
--- a/tool-data/virhunter.loc.sample Wed Nov 30 17:31:52 2022 +0000 +++ b/tool-data/virhunter.loc.sample Thu Jan 05 14:27:54 2023 +0000 @@ -1,29 +1,27 @@ #This is a sample file distributed with Galaxy that enables tools -#to use a directory of Samtools indexed sequences data files. You will need -#to create these data files and then create a fasta_indexes.loc file +#to use a directory of virhunter hdf5 model files. You will need +#to create these data files and then create a virhunter.loc file #similar to this one (store it in this directory) that points to -#the directories in which those files are stored. The fasta_indexes.loc +#the directories in which those files are stored. The virhunter.loc #file has this format (white space characters are TAB characters): # -# <unique_build_id> <dbkey> <display_name> <file_base_path> +# <value> <name> <path> # -#So, for example, if you had hg19 Canonical indexed stored in +#So, for example, if you had fungi hdf5 model files stored in # -# /depot/data2/galaxy/hg19/sam/, +# /tool-data/weights/peach/, # -#then the fasta_indexes.loc entry would look like this: +#then the virhunter.loc entry would look like this: # -#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#peach peach /data/databases/path/weights/peach # -#and your /depot/data2/galaxy/hg19/sam/ directory -#would contain hg19canon.fa and hg19canon.fa.fai files. +#and your /tool-data/weights/peach/ directory +#would contain model_5_500.h5,model_7_500.h5,model_10_500.h5 and model_5_1000.h5, model_7_1000.h5, model_10_1000.h5 files. # -#Your fasta_indexes.loc file should include an entry per line for +#Your virhunter.loc file should include an entry per line for #each index set you have stored. The file in the path does actually #exist, but it should never be directly used. Instead, the name serves #as a prefix for the index file. For example: # -#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa -#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa -#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa -#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa \ No newline at end of file +#peach peach /data/databases/path/weights/peach +#grapevine grapevine /data/databases/path/weights/peach
--- a/virhunter.xml Wed Nov 30 17:31:52 2022 +0000 +++ b/virhunter.xml Thu Jan 05 14:27:54 2023 +0000 @@ -1,6 +1,6 @@ <tool id="virhunter" name="virhunter" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05"> <description> - Deep learning method to identify viruses in sequencing datasets.. + Deep learning method to identify viruses in sequencing datasets </description> <macros> <import>macros.xml</import> @@ -24,7 +24,7 @@ ]]></command> <inputs> - <param name="fasta_file" type="data" format="fasta" label="DNA FASTA file(s)"/> + <param name="fasta_file" type="data" format="fasta" label="DNA FASTA file"/> <param name="weights" type="select" label="Select a reference model" help="If your model of interest is not listed, contact the Galaxy team"> <options from_data_table="virhunter_models"> <validator type="no_options" message="No models are available for the selected input dataset" /> @@ -49,7 +49,8 @@ <help> <![CDATA[ - VirHunter is a deep learning method that uses Convolutional Neural Networks (CNNs) and a Random Forest Classifier to identify viruses in sequening datasets. More precisely, VirHunter classifies previously assembled contigs as viral, host and bacterial (contamination). + VirHunter is a tool that uses deep learning to identify viruses in plant virome sequencing datasets. + In particular, VirHunter classifies previously assembled contigs into virus, host and bacteria classes. ]]></help> <expand macro="citations" /> </tool> \ No newline at end of file