view data_manager/kaiju_data_manager.xml @ 0:d89783920192 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_kaiju commit 59064c06143fdc7d7b17178e46911ba1009cd32e
author iuc
date Tue, 22 Apr 2025 14:02:32 +0000
parents
children
line wrap: on
line source

<tool id="kaiju_data_manager" name="kaiju data manager" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.2">
    <description>builder</description>
    <macros>
        <token name="@TOOL_VERSION@">1.10.1</token>
        <token name="@VERSION_SUFFIX@">0</token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">kaiju</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        #if $type.date == "today"
            kaiju-makedb -s $type.select -t \${GALAXY_SLOTS:-1} &&
        #else
            #set year=str($type.date).split("-")[0]
            wget https://kaiju-idx.s3.eu-central-1.amazonaws.com/${year}/kaiju_db_${type.select}_${type.date}.tgz --output-document=download.tgz &&
            tar -xvf download.tgz &&
            rm download.tgz &&
        #end if
        mkdir '${out_file.extra_files_path}' &&
        mv \$(find . -name "*nodes.dmp") '${out_file.extra_files_path}'/nodes.dmp &&
        mv \$(find . -name "*names.dmp") '${out_file.extra_files_path}'/names.dmp &&
        mv \$(find . -name "*.fmi") '${out_file.extra_files_path}'/database.fmi &&
        cp '$dmjson' '$out_file'
    ]]></command>
    <configfiles>
        <configfile name="dmjson"><![CDATA[#slurp
#import datetime
#if $type.date == "today"
    #set date = datetime.datetime.utcnow().strftime("%Y-%m-%d")
#else
    #set date = $type.date
#end if
#if $type.select == "refseq"
    #set name = "(refseq) bacterial, archaeal and viral genomes in the NCBI RefSeq database with assembly status Complete"
#elif $type.select == "refseq_nr"
    #set name = "(refseq_nr) proteins from bacteria, Archaea, viruses, fungi and microbial eukaryotes from the NCBI RefSeq non-redundant proteins collection"
#elif $type.select == "refseq_ref"
    #set name = "(refseq_ref) proteins from bacteria, Archaea from the NCBI RefSeq representative assemblies + viral proteins from NCBI RefSeq"
#elif $type.select == "progenomes"
    #set name = "(progenomes) proteins in the set of representative genomes from the proGenomes v3 database and viral proteins from NCBI RefSeq"
#elif $type.select == "nr"
    #set name = "(nr) NCBI BLAST non-redundant protein database 'nr', only Archaea, bacteria, and viruses"
#elif $type.select == "nr_euk"
    #set name = "(nr_euk) nr and additionally including fungi and microbial eukaryotes"
#elif $type.select == "fungi"
    #set name = "(fungi) all fungi genomes from NCBI RefSeq (any assembly status)"
#elif $type.select == "viruses"
    #set name = "(viruses) viral genomes from NCBI RefSeq"
#elif $type.select == "plasmids"
    #set name = "(plasmids) plasmid genomes from NCBI RefSeq"
#elif $type.select == "rvdb"
    #set name = "(tvdb) viral proteins from RVDB-prot"
#end if
{
  "data_tables":{
    "kaiju":[
      {
        "value": "${date}_${$type.select}",
        "name": "${date} $name",
        "path": "output/",
        "version": "@TOOL_VERSION@"
      }
    ]
  }
}
]]></configfile>
    </configfiles>
    <inputs>
        <conditional name="type">
            <param name="select" type="select" label="Source database" help="">
                <option value="refseq">bacterial, archaeal and viral genomes in the NCBI RefSeq database with assembly status Complete</option>
                <option value="refseq_nr">proteins from bacteria, Archaea, viruses, fungi and microbial eukaryotes from the NCBI RefSeq non-redundant proteins collection</option>
                <option value="refseq_ref">proteins from bacteria, Archaea from the NCBI RefSeq representative assemblies + viral proteins from NCBI RefSeq</option>
                <option value="progenomes">proteins in the set of representative genomes from the proGenomes v3 database and viral proteins from NCBI RefSeq</option>
                <option value="nr">NCBI BLAST non-redundant protein database "nr", only Archaea, bacteria, and viruses</option>
                <option value="nr_euk">nr and additionally including fungi and microbial eukaryotes</option>
                <option value="fungi">All fungi genomes from NCBI RefSeq (any assembly status)</option>
                <option value="viruses">Viral genomes from NCBI RefSeq</option>
                <option value="plasmids">Plasmid genomes from NCBI RefSeq</option>
                <option value="rvdb">Viral proteins from RVDB-prot</option>
            </param>
            <when value="refseq">
                <param name="date" type="select" label="Date">
                    <option value="2024-08-14">2024-08-14</option>
                    <option value="today">Today</option>
                </param>
            </when>
            <when value="refseq_nr">
                <param name="date" type="select" label="Date">
                    <option value="2024-08-13">2024-08-13</option>
                    <option value="today">Today</option>
                </param>
            </when>
            <when value="refseq_ref">
                <param name="date" type="select" label="Date">
                    <option value="2024-08-14">2024-08-14</option>
                    <option value="today">Today</option>
                </param>
            </when>
            <when value="progenomes">
                <param name="date" type="select" label="Date">
                    <option value="2023-05-25">2023-05-25</option>
                    <option value="today">Today</option>
                </param>
            </when>
            <when value="nr">
                <param name="date" type="select" label="Date">
                    <option value="2024-08-25">2024-08-25</option>
                    <option value="today">Today</option>
                </param>
            </when>
            <when value="nr_euk">
                <param name="date" type="select" label="Date">
                    <option value="2023-05-10">2023-05-10</option>
                    <option value="today">Today</option>
                </param>
            </when>
            <when value="fungi">
                <param name="date" type="select" label="Date">
                    <option value="2024-08-16">2024-08-16</option>
                    <option value="today">Today</option>
                </param>
            </when>
            <when value="viruses">
                <param name="date" type="select" label="Date">
                    <option value="2024-08-15">2024-08-15</option>
                    <option value="today">Today</option>
                </param>
            </when>
            <when value="plasmids">
                <param name="date" type="select" label="Date">
                    <option value="2024-08-15">2024-08-15</option>
                    <option value="today">Today</option>
                </param>
            </when>
            <when value="rvdb">
                <param name="date" type="select" label="Date">
                    <option value="2024-12-20">2024-12-20</option>
                    <option value="today">Today</option>
                </param>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="out_file" format="data_manager_json"/>
    </outputs>
    <tests>
        <test>
            <conditional name="type">
                <param name="select" value="viruses"/>
                <param name="date" value="2024-08-15"/>
            </conditional>
            <output name="out_file">
                <assert_contents>
                    <has_text text="kaiju"/>
                    <has_text text="(viruses)"/>
                    <has_text text="2024-08-15"/>
                    <has_text text="@TOOL_VERSION@"/>
                </assert_contents>
            </output>
        </test>
        <test>
            <conditional name="type">
                <param name="select" value="viruses"/>
                <param name="date" value="today"/>
            </conditional>
            <output name="out_file">
                <assert_contents>
                    <has_text text="kaiju"/>
                    <has_text text="(viruses)"/>
                    <has_text negate="true" text="2024-05-10"/>
                    <has_text text="@TOOL_VERSION@"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

Download pre-built indices for kaiju. If a date is selected pre-built indices are downloaded from: 
https://bioinformatics-centre.github.io/kaiju/downloads.html. Otherwise (i.e. if "today" is selected)
reference data is downloaded and an index is computed (which needs substantial ressources).
Pre-built indices might be preferrable in terms of reproducibility across Galaxy instances.

    ]]></help>
    <citations>
        <citation type="doi">10.1038/ncomms11257</citation>
    </citations>
</tool>