changeset 0:d89783920192 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_kaiju commit 59064c06143fdc7d7b17178e46911ba1009cd32e
author iuc
date Tue, 22 Apr 2025 14:02:32 +0000
parents
children
files data_manager/kaiju_data_manager.xml data_manager_conf.xml test-data/kaiju.loc tool-data/kaiju.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 6 files changed, 228 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/kaiju_data_manager.xml	Tue Apr 22 14:02:32 2025 +0000
@@ -0,0 +1,189 @@
+<tool id="kaiju_data_manager" name="kaiju data manager" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.2">
+    <description>builder</description>
+    <macros>
+        <token name="@TOOL_VERSION@">1.10.1</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">kaiju</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        #if $type.date == "today"
+            kaiju-makedb -s $type.select -t \${GALAXY_SLOTS:-1} &&
+        #else
+            #set year=str($type.date).split("-")[0]
+            wget https://kaiju-idx.s3.eu-central-1.amazonaws.com/${year}/kaiju_db_${type.select}_${type.date}.tgz --output-document=download.tgz &&
+            tar -xvf download.tgz &&
+            rm download.tgz &&
+        #end if
+        mkdir '${out_file.extra_files_path}' &&
+        mv \$(find . -name "*nodes.dmp") '${out_file.extra_files_path}'/nodes.dmp &&
+        mv \$(find . -name "*names.dmp") '${out_file.extra_files_path}'/names.dmp &&
+        mv \$(find . -name "*.fmi") '${out_file.extra_files_path}'/database.fmi &&
+        cp '$dmjson' '$out_file'
+    ]]></command>
+    <configfiles>
+        <configfile name="dmjson"><![CDATA[#slurp
+#import datetime
+#if $type.date == "today"
+    #set date = datetime.datetime.utcnow().strftime("%Y-%m-%d")
+#else
+    #set date = $type.date
+#end if
+#if $type.select == "refseq"
+    #set name = "(refseq) bacterial, archaeal and viral genomes in the NCBI RefSeq database with assembly status Complete"
+#elif $type.select == "refseq_nr"
+    #set name = "(refseq_nr) proteins from bacteria, Archaea, viruses, fungi and microbial eukaryotes from the NCBI RefSeq non-redundant proteins collection"
+#elif $type.select == "refseq_ref"
+    #set name = "(refseq_ref) proteins from bacteria, Archaea from the NCBI RefSeq representative assemblies + viral proteins from NCBI RefSeq"
+#elif $type.select == "progenomes"
+    #set name = "(progenomes) proteins in the set of representative genomes from the proGenomes v3 database and viral proteins from NCBI RefSeq"
+#elif $type.select == "nr"
+    #set name = "(nr) NCBI BLAST non-redundant protein database 'nr', only Archaea, bacteria, and viruses"
+#elif $type.select == "nr_euk"
+    #set name = "(nr_euk) nr and additionally including fungi and microbial eukaryotes"
+#elif $type.select == "fungi"
+    #set name = "(fungi) all fungi genomes from NCBI RefSeq (any assembly status)"
+#elif $type.select == "viruses"
+    #set name = "(viruses) viral genomes from NCBI RefSeq"
+#elif $type.select == "plasmids"
+    #set name = "(plasmids) plasmid genomes from NCBI RefSeq"
+#elif $type.select == "rvdb"
+    #set name = "(tvdb) viral proteins from RVDB-prot"
+#end if
+{
+  "data_tables":{
+    "kaiju":[
+      {
+        "value": "${date}_${$type.select}",
+        "name": "${date} $name",
+        "path": "output/",
+        "version": "@TOOL_VERSION@"
+      }
+    ]
+  }
+}
+]]></configfile>
+    </configfiles>
+    <inputs>
+        <conditional name="type">
+            <param name="select" type="select" label="Source database" help="">
+                <option value="refseq">bacterial, archaeal and viral genomes in the NCBI RefSeq database with assembly status Complete</option>
+                <option value="refseq_nr">proteins from bacteria, Archaea, viruses, fungi and microbial eukaryotes from the NCBI RefSeq non-redundant proteins collection</option>
+                <option value="refseq_ref">proteins from bacteria, Archaea from the NCBI RefSeq representative assemblies + viral proteins from NCBI RefSeq</option>
+                <option value="progenomes">proteins in the set of representative genomes from the proGenomes v3 database and viral proteins from NCBI RefSeq</option>
+                <option value="nr">NCBI BLAST non-redundant protein database "nr", only Archaea, bacteria, and viruses</option>
+                <option value="nr_euk">nr and additionally including fungi and microbial eukaryotes</option>
+                <option value="fungi">All fungi genomes from NCBI RefSeq (any assembly status)</option>
+                <option value="viruses">Viral genomes from NCBI RefSeq</option>
+                <option value="plasmids">Plasmid genomes from NCBI RefSeq</option>
+                <option value="rvdb">Viral proteins from RVDB-prot</option>
+            </param>
+            <when value="refseq">
+                <param name="date" type="select" label="Date">
+                    <option value="2024-08-14">2024-08-14</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+            <when value="refseq_nr">
+                <param name="date" type="select" label="Date">
+                    <option value="2024-08-13">2024-08-13</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+            <when value="refseq_ref">
+                <param name="date" type="select" label="Date">
+                    <option value="2024-08-14">2024-08-14</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+            <when value="progenomes">
+                <param name="date" type="select" label="Date">
+                    <option value="2023-05-25">2023-05-25</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+            <when value="nr">
+                <param name="date" type="select" label="Date">
+                    <option value="2024-08-25">2024-08-25</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+            <when value="nr_euk">
+                <param name="date" type="select" label="Date">
+                    <option value="2023-05-10">2023-05-10</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+            <when value="fungi">
+                <param name="date" type="select" label="Date">
+                    <option value="2024-08-16">2024-08-16</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+            <when value="viruses">
+                <param name="date" type="select" label="Date">
+                    <option value="2024-08-15">2024-08-15</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+            <when value="plasmids">
+                <param name="date" type="select" label="Date">
+                    <option value="2024-08-15">2024-08-15</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+            <when value="rvdb">
+                <param name="date" type="select" label="Date">
+                    <option value="2024-12-20">2024-12-20</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json"/>
+    </outputs>
+    <tests>
+        <test>
+            <conditional name="type">
+                <param name="select" value="viruses"/>
+                <param name="date" value="2024-08-15"/>
+            </conditional>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="kaiju"/>
+                    <has_text text="(viruses)"/>
+                    <has_text text="2024-08-15"/>
+                    <has_text text="@TOOL_VERSION@"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <conditional name="type">
+                <param name="select" value="viruses"/>
+                <param name="date" value="today"/>
+            </conditional>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="kaiju"/>
+                    <has_text text="(viruses)"/>
+                    <has_text negate="true" text="2024-05-10"/>
+                    <has_text text="@TOOL_VERSION@"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+Download pre-built indices for kaiju. If a date is selected pre-built indices are downloaded from: 
+https://bioinformatics-centre.github.io/kaiju/downloads.html. Otherwise (i.e. if "today" is selected)
+reference data is downloaded and an index is computed (which needs substantial ressources).
+Pre-built indices might be preferrable in terms of reproducibility across Galaxy instances.
+
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1038/ncomms11257</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Tue Apr 22 14:02:32 2025 +0000
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/kaiju_data_manager.xml" id="kaiju_data_manager">
+        <data_table name="kaiju">
+            <output>
+                <column name="value" />
+                <column name="name" />
+                <column name="path" output_ref="out_file" >
+                    <move type="directory" relativize_symlinks="True">
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">kaiju/${value}/</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/kaiju/${value}/</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+                <column name="version" />
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kaiju.loc	Tue Apr 22 14:02:32 2025 +0000
@@ -0,0 +1,3 @@
+2025-04-14_viruses	2025-04-14 (viruses) viral genomes from NCBI RefSeq	/tmp/tmpej7pd7vz/galaxy-dev/tool-data/kaiju/2025-04-14_viruses	1.10.1
+2024-08-15_viruses	2024-08-15 (viruses) viral genomes from NCBI RefSeq	/tmp/tmp6ojvc_47/galaxy-dev/tool-data/kaiju/2024-08-15_viruses	1.10.1
+2025-04-14_viruses	2025-04-14 (viruses) viral genomes from NCBI RefSeq	/tmp/tmp6ojvc_47/galaxy-dev/tool-data/kaiju/2025-04-14_viruses	1.10.1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/kaiju.loc.sample	Tue Apr 22 14:02:32 2025 +0000
@@ -0,0 +1,5 @@
+# id: db name + date
+# name: what is shown to the user in the select
+# path: of the reference data must contain database.fmi, names.dmp and nodes.dmp
+# version: version used for constructing the DB (or just the current version at the time when pre-computed indices were downloaded)
+#id	name	path	version
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue Apr 22 14:02:32 2025 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="kaiju" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path, version</columns>
+        <file path="tool-data/kaiju.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Tue Apr 22 14:02:32 2025 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="kaiju" comment_char="#">
+        <columns>value, name, path, version</columns>
+        <file path="${__HERE__}/test-data/kaiju.loc" />
+    </table>
+</tables>