Galaxy |

Changeset 2:ea2cccb9f73e (2023-01-05)

Previous changeset 1:9b12bc1b1e2c (2022-11-30) Next changeset 3:302332b914ef (2023-01-14)

Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/VirHunter commit c3685ed6a70b47012b62b95a2a3db062bd3b7475

modified:
macros.xml
predict.py
tool-data/virhunter.loc.sample
virhunter.xml

diff -r 9b12bc1b1e2c -r ea2cccb9f73e macros.xml
--- a/macros.xml Wed Nov 30 17:31:52 2022 +0000
+++ b/macros.xml Thu Jan 05 14:27:54 2023 +0000

@@ -1,6 +1,6 @@
<macros>
     <token name="@TOOL_VERSION@">1.0.0</token>
-    <token name="@VERSION_SUFFIX@">1</token>
+    <token name="@VERSION_SUFFIX@">2</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="1.23.3">numpy</requirement>

diff -r 9b12bc1b1e2c -r ea2cccb9f73e predict.py
--- a/predict.py Wed Nov 30 17:31:52 2022 +0000
+++ b/predict.py Thu Jan 05 14:27:54 2023 +0000

[

@@ -104,7 +104,7 @@
         df.groupby(["id", "length", 'RF_decision'], sort=False).size().unstack(fill_value=0)
     )
     df = df.reset_index()
-    df = df.reindex(['length', 'id', 'virus', 'plant', 'bacteria'], axis=1)
+    df = df.reindex(['length', 'id', 'virus', 'plant', 'bacteria'], axis=1).fillna(value=0)
     conditions = [
         (df['virus'] > df['plant']) & (df['virus'] > df['bacteria']),
         (df['plant'] > df['virus']) & (df['plant'] > df['bacteria']),

diff -r 9b12bc1b1e2c -r ea2cccb9f73e tool-data/virhunter.loc.sample
--- a/tool-data/virhunter.loc.sample Wed Nov 30 17:31:52 2022 +0000
+++ b/tool-data/virhunter.loc.sample Thu Jan 05 14:27:54 2023 +0000

@@ -1,29 +1,27 @@
#This is a sample file distributed with Galaxy that enables tools
-#to use a directory of Samtools indexed sequences data files.  You will need
-#to create these data files and then create a fasta_indexes.loc file
+#to use a directory of virhunter hdf5 model files.  You will need
+#to create these data files and then create a virhunter.loc file
#similar to this one (store it in this directory) that points to
-#the directories in which those files are stored. The fasta_indexes.loc
+#the directories in which those files are stored. The virhunter.loc
#file has this format (white space characters are TAB characters):
#
-# <unique_build_id> <dbkey> <display_name> <file_base_path>
+# <value> <name> <path>
#
-#So, for example, if you had hg19 Canonical indexed stored in
+#So, for example, if you had fungi hdf5 model files  stored in
#
-# /depot/data2/galaxy/hg19/sam/,
+# /tool-data/weights/peach/,
#
-#then the fasta_indexes.loc entry would look like this:
+#then the virhunter.loc entry would look like this:
#
-#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#peach peach /data/databases/path/weights/peach
#
-#and your /depot/data2/galaxy/hg19/sam/ directory
-#would contain hg19canon.fa and hg19canon.fa.fai files.
+#and your /tool-data/weights/peach/ directory
+#would contain model_5_500.h5,model_7_500.h5,model_10_500.h5 and model_5_1000.h5, model_7_1000.h5, model_10_1000.h5 files.
#
-#Your fasta_indexes.loc file should include an entry per line for
+#Your virhunter.loc file should include an entry per line for
#each index set you have stored.  The file in the path does actually
#exist, but it should never be directly used. Instead, the name serves
#as a prefix for the index file.  For example:
#
-#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa
-#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa
-#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa
-#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
\ No newline at end of file
+#peach peach /data/databases/path/weights/peach
+#grapevine grapevine /data/databases/path/weights/peach

diff -r 9b12bc1b1e2c -r ea2cccb9f73e virhunter.xml
--- a/virhunter.xml Wed Nov 30 17:31:52 2022 +0000
+++ b/virhunter.xml Thu Jan 05 14:27:54 2023 +0000

[

@@ -1,6 +1,6 @@
<tool id="virhunter" name="virhunter" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
     <description>
-        Deep learning method to identify viruses in sequencing datasets..
+        Deep learning method to identify viruses in sequencing datasets
     </description>
     <macros>
         <import>macros.xml</import>
@@ -24,7 +24,7 @@

     ]]></command>
     <inputs>
-        <param name="fasta_file" type="data" format="fasta" label="DNA FASTA file(s)"/>
+        <param name="fasta_file" type="data" format="fasta" label="DNA FASTA file"/>
         <param name="weights" type="select" label="Select a reference model" help="If your model of interest is not listed, contact the Galaxy team">
             <options from_data_table="virhunter_models">
                 <validator type="no_options" message="No models are available for the selected input dataset" />
@@ -49,7 +49,8 @@

     <help>
     <![CDATA[
-    VirHunter is a deep learning method that uses Convolutional Neural Networks (CNNs) and a Random Forest Classifier to identify viruses in sequening datasets. More precisely, VirHunter classifies previously assembled contigs as viral, host and bacterial (contamination).
+    VirHunter is a tool that uses deep learning to identify viruses in plant virome sequencing datasets.
+    In particular, VirHunter classifies previously assembled contigs into virus, host and bacteria classes.
  ]]></help>
     <expand macro="citations" />
</tool>
\ No newline at end of file