Mercurial > repos > iuc > data_manager_kaiju
changeset 0:d89783920192 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_kaiju commit 59064c06143fdc7d7b17178e46911ba1009cd32e
author | iuc |
---|---|
date | Tue, 22 Apr 2025 14:02:32 +0000 |
parents | |
children | |
files | data_manager/kaiju_data_manager.xml data_manager_conf.xml test-data/kaiju.loc tool-data/kaiju.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 6 files changed, 228 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/kaiju_data_manager.xml Tue Apr 22 14:02:32 2025 +0000 @@ -0,0 +1,189 @@ +<tool id="kaiju_data_manager" name="kaiju data manager" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.2"> + <description>builder</description> + <macros> + <token name="@TOOL_VERSION@">1.10.1</token> + <token name="@VERSION_SUFFIX@">0</token> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">kaiju</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + #if $type.date == "today" + kaiju-makedb -s $type.select -t \${GALAXY_SLOTS:-1} && + #else + #set year=str($type.date).split("-")[0] + wget https://kaiju-idx.s3.eu-central-1.amazonaws.com/${year}/kaiju_db_${type.select}_${type.date}.tgz --output-document=download.tgz && + tar -xvf download.tgz && + rm download.tgz && + #end if + mkdir '${out_file.extra_files_path}' && + mv \$(find . -name "*nodes.dmp") '${out_file.extra_files_path}'/nodes.dmp && + mv \$(find . -name "*names.dmp") '${out_file.extra_files_path}'/names.dmp && + mv \$(find . -name "*.fmi") '${out_file.extra_files_path}'/database.fmi && + cp '$dmjson' '$out_file' + ]]></command> + <configfiles> + <configfile name="dmjson"><![CDATA[#slurp +#import datetime +#if $type.date == "today" + #set date = datetime.datetime.utcnow().strftime("%Y-%m-%d") +#else + #set date = $type.date +#end if +#if $type.select == "refseq" + #set name = "(refseq) bacterial, archaeal and viral genomes in the NCBI RefSeq database with assembly status Complete" +#elif $type.select == "refseq_nr" + #set name = "(refseq_nr) proteins from bacteria, Archaea, viruses, fungi and microbial eukaryotes from the NCBI RefSeq non-redundant proteins collection" +#elif $type.select == "refseq_ref" + #set name = "(refseq_ref) proteins from bacteria, Archaea from the NCBI RefSeq representative assemblies + viral proteins from NCBI RefSeq" +#elif $type.select == "progenomes" + #set name = "(progenomes) proteins in the set of representative genomes from the proGenomes v3 database and viral proteins from NCBI RefSeq" +#elif $type.select == "nr" + #set name = "(nr) NCBI BLAST non-redundant protein database 'nr', only Archaea, bacteria, and viruses" +#elif $type.select == "nr_euk" + #set name = "(nr_euk) nr and additionally including fungi and microbial eukaryotes" +#elif $type.select == "fungi" + #set name = "(fungi) all fungi genomes from NCBI RefSeq (any assembly status)" +#elif $type.select == "viruses" + #set name = "(viruses) viral genomes from NCBI RefSeq" +#elif $type.select == "plasmids" + #set name = "(plasmids) plasmid genomes from NCBI RefSeq" +#elif $type.select == "rvdb" + #set name = "(tvdb) viral proteins from RVDB-prot" +#end if +{ + "data_tables":{ + "kaiju":[ + { + "value": "${date}_${$type.select}", + "name": "${date} $name", + "path": "output/", + "version": "@TOOL_VERSION@" + } + ] + } +} +]]></configfile> + </configfiles> + <inputs> + <conditional name="type"> + <param name="select" type="select" label="Source database" help=""> + <option value="refseq">bacterial, archaeal and viral genomes in the NCBI RefSeq database with assembly status Complete</option> + <option value="refseq_nr">proteins from bacteria, Archaea, viruses, fungi and microbial eukaryotes from the NCBI RefSeq non-redundant proteins collection</option> + <option value="refseq_ref">proteins from bacteria, Archaea from the NCBI RefSeq representative assemblies + viral proteins from NCBI RefSeq</option> + <option value="progenomes">proteins in the set of representative genomes from the proGenomes v3 database and viral proteins from NCBI RefSeq</option> + <option value="nr">NCBI BLAST non-redundant protein database "nr", only Archaea, bacteria, and viruses</option> + <option value="nr_euk">nr and additionally including fungi and microbial eukaryotes</option> + <option value="fungi">All fungi genomes from NCBI RefSeq (any assembly status)</option> + <option value="viruses">Viral genomes from NCBI RefSeq</option> + <option value="plasmids">Plasmid genomes from NCBI RefSeq</option> + <option value="rvdb">Viral proteins from RVDB-prot</option> + </param> + <when value="refseq"> + <param name="date" type="select" label="Date"> + <option value="2024-08-14">2024-08-14</option> + <option value="today">Today</option> + </param> + </when> + <when value="refseq_nr"> + <param name="date" type="select" label="Date"> + <option value="2024-08-13">2024-08-13</option> + <option value="today">Today</option> + </param> + </when> + <when value="refseq_ref"> + <param name="date" type="select" label="Date"> + <option value="2024-08-14">2024-08-14</option> + <option value="today">Today</option> + </param> + </when> + <when value="progenomes"> + <param name="date" type="select" label="Date"> + <option value="2023-05-25">2023-05-25</option> + <option value="today">Today</option> + </param> + </when> + <when value="nr"> + <param name="date" type="select" label="Date"> + <option value="2024-08-25">2024-08-25</option> + <option value="today">Today</option> + </param> + </when> + <when value="nr_euk"> + <param name="date" type="select" label="Date"> + <option value="2023-05-10">2023-05-10</option> + <option value="today">Today</option> + </param> + </when> + <when value="fungi"> + <param name="date" type="select" label="Date"> + <option value="2024-08-16">2024-08-16</option> + <option value="today">Today</option> + </param> + </when> + <when value="viruses"> + <param name="date" type="select" label="Date"> + <option value="2024-08-15">2024-08-15</option> + <option value="today">Today</option> + </param> + </when> + <when value="plasmids"> + <param name="date" type="select" label="Date"> + <option value="2024-08-15">2024-08-15</option> + <option value="today">Today</option> + </param> + </when> + <when value="rvdb"> + <param name="date" type="select" label="Date"> + <option value="2024-12-20">2024-12-20</option> + <option value="today">Today</option> + </param> + </when> + </conditional> + </inputs> + <outputs> + <data name="out_file" format="data_manager_json"/> + </outputs> + <tests> + <test> + <conditional name="type"> + <param name="select" value="viruses"/> + <param name="date" value="2024-08-15"/> + </conditional> + <output name="out_file"> + <assert_contents> + <has_text text="kaiju"/> + <has_text text="(viruses)"/> + <has_text text="2024-08-15"/> + <has_text text="@TOOL_VERSION@"/> + </assert_contents> + </output> + </test> + <test> + <conditional name="type"> + <param name="select" value="viruses"/> + <param name="date" value="today"/> + </conditional> + <output name="out_file"> + <assert_contents> + <has_text text="kaiju"/> + <has_text text="(viruses)"/> + <has_text negate="true" text="2024-05-10"/> + <has_text text="@TOOL_VERSION@"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +Download pre-built indices for kaiju. If a date is selected pre-built indices are downloaded from: +https://bioinformatics-centre.github.io/kaiju/downloads.html. Otherwise (i.e. if "today" is selected) +reference data is downloaded and an index is computed (which needs substantial ressources). +Pre-built indices might be preferrable in terms of reproducibility across Galaxy instances. + + ]]></help> + <citations> + <citation type="doi">10.1038/ncomms11257</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Tue Apr 22 14:02:32 2025 +0000 @@ -0,0 +1,19 @@ +<?xml version="1.0"?> +<data_managers> + <data_manager tool_file="data_manager/kaiju_data_manager.xml" id="kaiju_data_manager"> + <data_table name="kaiju"> + <output> + <column name="value" /> + <column name="name" /> + <column name="path" output_ref="out_file" > + <move type="directory" relativize_symlinks="True"> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">kaiju/${value}/</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/kaiju/${value}/</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + <column name="version" /> + </output> + </data_table> + </data_manager> +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/kaiju.loc Tue Apr 22 14:02:32 2025 +0000 @@ -0,0 +1,3 @@ +2025-04-14_viruses 2025-04-14 (viruses) viral genomes from NCBI RefSeq /tmp/tmpej7pd7vz/galaxy-dev/tool-data/kaiju/2025-04-14_viruses 1.10.1 +2024-08-15_viruses 2024-08-15 (viruses) viral genomes from NCBI RefSeq /tmp/tmp6ojvc_47/galaxy-dev/tool-data/kaiju/2024-08-15_viruses 1.10.1 +2025-04-14_viruses 2025-04-14 (viruses) viral genomes from NCBI RefSeq /tmp/tmp6ojvc_47/galaxy-dev/tool-data/kaiju/2025-04-14_viruses 1.10.1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/kaiju.loc.sample Tue Apr 22 14:02:32 2025 +0000 @@ -0,0 +1,5 @@ +# id: db name + date +# name: what is shown to the user in the select +# path: of the reference data must contain database.fmi, names.dmp and nodes.dmp +# version: version used for constructing the DB (or just the current version at the time when pre-computed indices were downloaded) +#id name path version \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Apr 22 14:02:32 2025 +0000 @@ -0,0 +1,6 @@ +<tables> + <table name="kaiju" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, path, version</columns> + <file path="tool-data/kaiju.loc" /> + </table> +</tables>