diff data_manager/kaiju_data_manager.xml @ 0:d89783920192 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_kaiju commit 59064c06143fdc7d7b17178e46911ba1009cd32e
author iuc
date Tue, 22 Apr 2025 14:02:32 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/kaiju_data_manager.xml	Tue Apr 22 14:02:32 2025 +0000
@@ -0,0 +1,189 @@
+<tool id="kaiju_data_manager" name="kaiju data manager" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.2">
+    <description>builder</description>
+    <macros>
+        <token name="@TOOL_VERSION@">1.10.1</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">kaiju</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        #if $type.date == "today"
+            kaiju-makedb -s $type.select -t \${GALAXY_SLOTS:-1} &&
+        #else
+            #set year=str($type.date).split("-")[0]
+            wget https://kaiju-idx.s3.eu-central-1.amazonaws.com/${year}/kaiju_db_${type.select}_${type.date}.tgz --output-document=download.tgz &&
+            tar -xvf download.tgz &&
+            rm download.tgz &&
+        #end if
+        mkdir '${out_file.extra_files_path}' &&
+        mv \$(find . -name "*nodes.dmp") '${out_file.extra_files_path}'/nodes.dmp &&
+        mv \$(find . -name "*names.dmp") '${out_file.extra_files_path}'/names.dmp &&
+        mv \$(find . -name "*.fmi") '${out_file.extra_files_path}'/database.fmi &&
+        cp '$dmjson' '$out_file'
+    ]]></command>
+    <configfiles>
+        <configfile name="dmjson"><![CDATA[#slurp
+#import datetime
+#if $type.date == "today"
+    #set date = datetime.datetime.utcnow().strftime("%Y-%m-%d")
+#else
+    #set date = $type.date
+#end if
+#if $type.select == "refseq"
+    #set name = "(refseq) bacterial, archaeal and viral genomes in the NCBI RefSeq database with assembly status Complete"
+#elif $type.select == "refseq_nr"
+    #set name = "(refseq_nr) proteins from bacteria, Archaea, viruses, fungi and microbial eukaryotes from the NCBI RefSeq non-redundant proteins collection"
+#elif $type.select == "refseq_ref"
+    #set name = "(refseq_ref) proteins from bacteria, Archaea from the NCBI RefSeq representative assemblies + viral proteins from NCBI RefSeq"
+#elif $type.select == "progenomes"
+    #set name = "(progenomes) proteins in the set of representative genomes from the proGenomes v3 database and viral proteins from NCBI RefSeq"
+#elif $type.select == "nr"
+    #set name = "(nr) NCBI BLAST non-redundant protein database 'nr', only Archaea, bacteria, and viruses"
+#elif $type.select == "nr_euk"
+    #set name = "(nr_euk) nr and additionally including fungi and microbial eukaryotes"
+#elif $type.select == "fungi"
+    #set name = "(fungi) all fungi genomes from NCBI RefSeq (any assembly status)"
+#elif $type.select == "viruses"
+    #set name = "(viruses) viral genomes from NCBI RefSeq"
+#elif $type.select == "plasmids"
+    #set name = "(plasmids) plasmid genomes from NCBI RefSeq"
+#elif $type.select == "rvdb"
+    #set name = "(tvdb) viral proteins from RVDB-prot"
+#end if
+{
+  "data_tables":{
+    "kaiju":[
+      {
+        "value": "${date}_${$type.select}",
+        "name": "${date} $name",
+        "path": "output/",
+        "version": "@TOOL_VERSION@"
+      }
+    ]
+  }
+}
+]]></configfile>
+    </configfiles>
+    <inputs>
+        <conditional name="type">
+            <param name="select" type="select" label="Source database" help="">
+                <option value="refseq">bacterial, archaeal and viral genomes in the NCBI RefSeq database with assembly status Complete</option>
+                <option value="refseq_nr">proteins from bacteria, Archaea, viruses, fungi and microbial eukaryotes from the NCBI RefSeq non-redundant proteins collection</option>
+                <option value="refseq_ref">proteins from bacteria, Archaea from the NCBI RefSeq representative assemblies + viral proteins from NCBI RefSeq</option>
+                <option value="progenomes">proteins in the set of representative genomes from the proGenomes v3 database and viral proteins from NCBI RefSeq</option>
+                <option value="nr">NCBI BLAST non-redundant protein database "nr", only Archaea, bacteria, and viruses</option>
+                <option value="nr_euk">nr and additionally including fungi and microbial eukaryotes</option>
+                <option value="fungi">All fungi genomes from NCBI RefSeq (any assembly status)</option>
+                <option value="viruses">Viral genomes from NCBI RefSeq</option>
+                <option value="plasmids">Plasmid genomes from NCBI RefSeq</option>
+                <option value="rvdb">Viral proteins from RVDB-prot</option>
+            </param>
+            <when value="refseq">
+                <param name="date" type="select" label="Date">
+                    <option value="2024-08-14">2024-08-14</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+            <when value="refseq_nr">
+                <param name="date" type="select" label="Date">
+                    <option value="2024-08-13">2024-08-13</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+            <when value="refseq_ref">
+                <param name="date" type="select" label="Date">
+                    <option value="2024-08-14">2024-08-14</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+            <when value="progenomes">
+                <param name="date" type="select" label="Date">
+                    <option value="2023-05-25">2023-05-25</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+            <when value="nr">
+                <param name="date" type="select" label="Date">
+                    <option value="2024-08-25">2024-08-25</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+            <when value="nr_euk">
+                <param name="date" type="select" label="Date">
+                    <option value="2023-05-10">2023-05-10</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+            <when value="fungi">
+                <param name="date" type="select" label="Date">
+                    <option value="2024-08-16">2024-08-16</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+            <when value="viruses">
+                <param name="date" type="select" label="Date">
+                    <option value="2024-08-15">2024-08-15</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+            <when value="plasmids">
+                <param name="date" type="select" label="Date">
+                    <option value="2024-08-15">2024-08-15</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+            <when value="rvdb">
+                <param name="date" type="select" label="Date">
+                    <option value="2024-12-20">2024-12-20</option>
+                    <option value="today">Today</option>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json"/>
+    </outputs>
+    <tests>
+        <test>
+            <conditional name="type">
+                <param name="select" value="viruses"/>
+                <param name="date" value="2024-08-15"/>
+            </conditional>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="kaiju"/>
+                    <has_text text="(viruses)"/>
+                    <has_text text="2024-08-15"/>
+                    <has_text text="@TOOL_VERSION@"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <conditional name="type">
+                <param name="select" value="viruses"/>
+                <param name="date" value="today"/>
+            </conditional>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="kaiju"/>
+                    <has_text text="(viruses)"/>
+                    <has_text negate="true" text="2024-05-10"/>
+                    <has_text text="@TOOL_VERSION@"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+Download pre-built indices for kaiju. If a date is selected pre-built indices are downloaded from: 
+https://bioinformatics-centre.github.io/kaiju/downloads.html. Otherwise (i.e. if "today" is selected)
+reference data is downloaded and an index is computed (which needs substantial ressources).
+Pre-built indices might be preferrable in terms of reproducibility across Galaxy instances.
+
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1038/ncomms11257</citation>
+    </citations>
+</tool>