changeset 16:54871a78828e draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_build_kraken2_database commit a108f20aebc04574a8bd0a90b955064439a50852
author iuc
date Wed, 05 Nov 2025 13:32:18 +0000
parents 201eff2131d6
children
files data_manager/kraken2_build_database.py data_manager/kraken2_build_database.xml
diffstat 2 files changed, 352 insertions(+), 566 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/kraken2_build_database.py	Sat Jan 25 17:41:48 2025 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,506 +0,0 @@
-#!/usr/bin/env python
-
-import argparse
-import datetime
-import errno
-import json
-import os
-import re
-import shutil
-import subprocess
-import sys
-import tarfile
-from enum import Enum
-
-try:
-    # Python3
-    from urllib.request import urlopen
-    from urllib.error import URLError
-except ImportError:
-    from urllib2 import urlopen
-    from urllib2 import URLError
-
-
-DATA_TABLE_NAME = "kraken2_databases"
-
-
-class KrakenDatabaseTypes(Enum):
-    standard_local_build = 'standard_local_build'
-    standard_prebuilt = 'standard_prebuilt'
-    minikraken = 'minikraken'
-    special_prebuilt = 'special_prebuilt'
-    special = 'special'
-    custom = 'custom'
-
-    def __str__(self):
-        return self.value
-
-
-class SpecialDatabaseTypes(Enum):
-    rdp = 'rdp'
-    greengenes = 'greengenes'
-    silva = 'silva'
-
-    def __str__(self):
-        return self.value
-
-
-class Minikraken2Versions(Enum):
-    v1 = 'v1'
-    v2 = 'v2'
-
-    def __str__(self):
-        return self.value
-
-
-class StandardPrebuiltSizes(Enum):
-    viral = "viral"
-    minusb = "minusb"
-    standard = "standard"
-    standard_08gb = "standard_08gb"
-    standard_16gb = "standard_16gb"
-    pluspf = "pluspf"
-    pluspf_08gb = "pluspf_08gb"
-    pluspf_16gb = "pluspf_16gb"
-    pluspfp = "pluspfp"
-    pluspfp_08gb = "pluspfp_08gb"
-    pluspfp_16gb = "pluspfp_16gb"
-    eupathdb48 = "eupathdb48"
-    core_nt = "core_nt"
-    gtdb_genome_reps = "gtdb_genome_reps"
-
-    def __str__(self):
-        return self.value
-
-
-def kraken2_build_standard(kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME):
-    now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
-
-    database_value = "_".join([
-        now,
-        "standard",
-        "kmer-len", str(kraken2_args["kmer_len"]),
-        "minimizer-len", str(kraken2_args["minimizer_len"]),
-        "minimizer-spaces", str(kraken2_args["minimizer_spaces"]),
-        "load-factor", str(kraken2_args["load_factor"]),
-    ])
-
-    database_name = " ".join([
-        "Standard (Local Build)",
-        "(Created:",
-        now + ",",
-        "kmer-len=" + str(kraken2_args["kmer_len"]) + ",",
-        "minimizer-len=" + str(kraken2_args["minimizer_len"]) + ",",
-        "minimizer-spaces=" + str(kraken2_args["minimizer_spaces"]) + ")",
-        "load-factor", str(kraken2_args["load_factor"]),
-    ])
-
-    database_path = database_value
-
-    args = [
-        '--threads', str(kraken2_args["threads"]),
-        '--standard',
-        '--kmer-len', str(kraken2_args["kmer_len"]),
-        '--minimizer-len', str(kraken2_args["minimizer_len"]),
-        '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]),
-        '--load-factor', str(kraken2_args["load_factor"]),
-        '--db', database_path
-    ]
-
-    subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
-
-    if kraken2_args["clean"]:
-        args = [
-            '--threads', str(kraken2_args["threads"]),
-            '--clean',
-            '--db', database_path
-        ]
-
-        subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
-
-    data_table_entry = {
-        'data_tables': {
-            data_table_name: [
-                {
-                    "value": database_value,
-                    "name": database_name,
-                    "path": database_path,
-                }
-            ]
-        }
-    }
-
-    return data_table_entry
-
-
-def kraken2_build_standard_prebuilt(prebuilt_db, prebuilt_date, target_directory, data_table_name=DATA_TABLE_NAME):
-
-    now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
-
-    prebuild_name = {
-        'viral': "Viral",
-        'minusb': "MinusB (archaea, viral, plasmid, human, UniVec_Core)",
-        'standard': "Standard-Full (archaea, bacteria, viral, plasmid, human,UniVec_Core)",
-        'standard_08gb': "Standard-8 (Standard with DB capped at 8 GB)",
-        'standard_16gb': "Standard-16 (Standard with DB capped at 16 GB)",
-        'pluspf': "PlusPF (Standard plus protozoa and fungi)",
-        'pluspf_08gb': "PlusPF-8 (PlusPF with DB capped at 8 GB)",
-        'pluspf_16gb': "PlusPF-16 (PlusPF with DB capped at 16 GB)",
-        'pluspfp': "PlusPFP (Standard plus protozoa, fungi and plant)",
-        'pluspfp_08gb': "PlusPFP-8 (PlusPFP with DB capped at 8 GB)",
-        'pluspfp_16gb': "PlusPFP-16 (PlusPFP with DB capped at 16 GB)",
-        'eupathdb48': "EuPathDB-46",
-        'core_nt': "core_nt (Very large collection, inclusive of GenBank, RefSeq, TPA and PDB)",
-        'gtdb_genome_reps': "GTDB v220 (Bacterial and archaeal)",
-    }
-
-    database_value = "_".join([
-        now,
-        "standard_prebuilt",
-        prebuilt_db,
-        prebuilt_date
-    ])
-
-    database_name = " ".join([
-        "Prebuilt Refseq indexes: ",
-        prebuild_name[prebuilt_db],
-        "(Version: ",
-        prebuilt_date,
-        "- Downloaded:",
-        now + ")"
-    ])
-
-    database_path = database_value
-
-    # we may need to let the user choose the date when new DBs are posted.
-    date_url_str = prebuilt_date.replace('-', '')
-    # download the pre-built database
-    try:
-        download_url = 'https://genome-idx.s3.amazonaws.com/kraken/k2_%s_%s.tar.gz' % (prebuilt_db, date_url_str)
-        src = urlopen(download_url)
-    except URLError as e:
-        print('url: ' + download_url, file=sys.stderr)
-        print(e, file=sys.stderr)
-        exit(1)
-
-    with open('tmp_data.tar.gz', 'wb') as dst:
-        shutil.copyfileobj(src, dst)
-    # unpack the downloaded archive to the target directory
-    with tarfile.open('tmp_data.tar.gz', 'r:gz') as fh:
-        for member in fh.getmembers():
-            if member.isreg():
-                member.name = os.path.basename(member.name)
-                fh.extract(member, os.path.join(target_directory, database_path))
-
-    data_table_entry = {
-        'data_tables': {
-            data_table_name: [
-                {
-                    "value": database_value,
-                    "name": database_name,
-                    "path": database_path,
-                }
-            ]
-        }
-    }
-
-    return data_table_entry
-
-
-def kraken2_build_minikraken(minikraken2_version, target_directory, data_table_name=DATA_TABLE_NAME):
-
-    now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
-
-    database_value = "_".join([
-        now,
-        "minikraken2",
-        minikraken2_version,
-        "8GB",
-    ])
-
-    database_name = " ".join([
-        "Minikraken2",
-        minikraken2_version,
-        "(Created:",
-        now + ")"
-    ])
-
-    database_path = database_value
-
-    # download the minikraken2 data
-    try:
-        download_url = 'https://genome-idx.s3.amazonaws.com/kraken/minikraken2_%s_8GB_201904.tgz' % minikraken2_version
-        src = urlopen(download_url)
-    except URLError as e:
-        print('url: ' + download_url, file=sys.stderr)
-        print(e, file=sys.stderr)
-        exit(1)
-
-    with open('tmp_data.tar.gz', 'wb') as dst:
-        shutil.copyfileobj(src, dst)
-    # unpack the downloaded archive to the target directory
-    with tarfile.open('tmp_data.tar.gz', 'r:gz') as fh:
-        for member in fh.getmembers():
-            if member.isreg():
-                member.name = os.path.basename(member.name)
-                fh.extract(member, os.path.join(target_directory, database_path))
-
-    data_table_entry = {
-        'data_tables': {
-            data_table_name: [
-                {
-                    "value": database_value,
-                    "name": database_name,
-                    "path": database_path,
-                }
-            ]
-        }
-    }
-
-    return data_table_entry
-
-
-def kraken2_build_special(kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME):
-
-    now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
-
-    special_database_names = {
-        "rdp": "RDP",
-        "greengenes": "Greengenes",
-        "silva": "Silva",
-    }
-
-    database_value = "_".join([
-        now,
-        kraken2_args["special_database_type"],
-        "kmer-len", str(kraken2_args["kmer_len"]),
-        "minimizer-len", str(kraken2_args["minimizer_len"]),
-        "minimizer-spaces", str(kraken2_args["minimizer_spaces"]),
-        "load-factor", str(kraken2_args["load_factor"]),
-    ])
-
-    database_name = " ".join([
-        special_database_names[kraken2_args["special_database_type"]],
-        "(Created:",
-        now + ",",
-        "kmer-len=" + str(kraken2_args["kmer_len"]) + ",",
-        "minimizer-len=" + str(kraken2_args["minimizer_len"]) + ",",
-        "minimizer-spaces=" + str(kraken2_args["minimizer_spaces"]) + ")",
-        "load-factor=" + str(kraken2_args["load_factor"]) + ")",
-    ])
-
-    database_path = database_value
-
-    args = [
-        '--threads', str(kraken2_args["threads"]),
-        '--special', kraken2_args["special_database_type"],
-        '--kmer-len', str(kraken2_args["kmer_len"]),
-        '--minimizer-len', str(kraken2_args["minimizer_len"]),
-        '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]),
-        '--load-factor', str(kraken2_args["load_factor"]),
-        '--db', database_path
-    ]
-
-    subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
-
-    if kraken2_args["clean"]:
-        args = [
-            '--threads', str(kraken2_args["threads"]),
-            '--clean',
-            '--db', database_path
-        ]
-
-        subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
-
-    data_table_entry = {
-        'data_tables': {
-            data_table_name: [
-                {
-                    "value": database_value,
-                    "name": database_name,
-                    "path": database_path,
-                }
-            ]
-        }
-    }
-
-    return data_table_entry
-
-
-def kraken2_build_custom(kraken2_args, custom_database_name, custom_source_info, target_directory, data_table_name=DATA_TABLE_NAME):
-    now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
-
-    database_value = "_".join([
-        now,
-        re.sub(r'[^\w_.-]+', '_', custom_database_name).strip('_'),
-        "kmer-len", str(kraken2_args["kmer_len"]),
-        "minimizer-len", str(kraken2_args["minimizer_len"]),
-        "minimizer-spaces", str(kraken2_args["minimizer_spaces"]),
-        "load-factor", str(kraken2_args["load_factor"]),
-    ])
-
-    database_name = " ".join([
-        custom_database_name,
-        "(" + custom_source_info + ",",
-        "kmer-len=" + str(kraken2_args["kmer_len"]) + ",",
-        "minimizer-len=" + str(kraken2_args["minimizer_len"]) + ",",
-        "minimizer-spaces=" + str(kraken2_args["minimizer_spaces"]) + ",",
-        "load-factor=" + str(kraken2_args["load_factor"]) + ")",
-    ])
-
-    database_path = database_value
-
-    args = [
-        '--threads', str(kraken2_args["threads"]),
-        '--download-taxonomy',
-        '--db', database_path,
-    ]
-
-    if kraken2_args['skip_maps']:
-        args.append('--skip-maps')
-
-    subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
-
-    args = [
-        '--threads', str(kraken2_args["threads"]),
-        '--add-to-library', kraken2_args["custom_fasta"],
-        '--db', database_path,
-    ]
-
-    subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
-
-    args = [
-        '--threads', str(kraken2_args["threads"]),
-        '--build',
-        '--kmer-len', str(kraken2_args["kmer_len"]),
-        '--minimizer-len', str(kraken2_args["minimizer_len"]),
-        '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]),
-        '--load-factor', str(kraken2_args["load_factor"]),
-        '--db', database_path,
-    ]
-
-    subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
-
-    if kraken2_args["clean"]:
-        args = [
-            '--threads', str(kraken2_args["threads"]),
-            '--clean',
-            '--db', database_path,
-        ]
-
-        subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
-
-    data_table_entry = {
-        'data_tables': {
-            data_table_name: [
-                {
-                    "value": database_value,
-                    "name": database_name,
-                    "path": database_path,
-                }
-            ]
-        }
-    }
-
-    return data_table_entry
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('data_manager_json')
-    parser.add_argument('--kmer-len', dest='kmer_len', type=int, default=35, help='kmer length')
-    parser.add_argument('--minimizer-len', dest='minimizer_len', type=int, default=31, help='minimizer length')
-    parser.add_argument('--minimizer-spaces', dest='minimizer_spaces', default=6, help='minimizer spaces')
-    parser.add_argument('--load-factor', dest='load_factor', type=float, default=0.7, help='load factor')
-    parser.add_argument('--threads', dest='threads', default=1, help='threads')
-    parser.add_argument('--database-type', dest='database_type', type=KrakenDatabaseTypes, choices=list(KrakenDatabaseTypes), required=True, help='type of kraken database to build')
-    parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)')
-    parser.add_argument('--prebuilt-db', dest='prebuilt_db', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Prebuilt database to download. Only applies to --database-type standard_prebuilt or special_prebuilt.')
-    parser.add_argument('--prebuilt-date', dest='prebuilt_date', help='Database build date (YYYY-MM-DD). Only applies to --database-type standard_prebuilt.')
-    parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)')
-    parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)')
-    parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)')
-    parser.add_argument('--custom-source-info', dest='custom_source_info', help='Description of how this build has been sourced (only applies to --database-type custom)')
-    parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='')
-    parser.add_argument('--clean', dest='clean', action='store_true', help='Clean up extra files')
-    args = parser.parse_args()
-
-    with open(args.data_manager_json) as fh:
-        data_manager_input = json.load(fh)
-
-    target_directory = data_manager_input['output_data'][0]['extra_files_path']
-
-    try:
-        os.mkdir(target_directory)
-    except OSError as exc:
-        if exc.errno == errno.EEXIST and os.path.isdir(target_directory):
-            pass
-        else:
-            raise
-
-    data_manager_output = {}
-
-    if str(args.database_type) == 'standard_local_build':
-        kraken2_args = {
-            "kmer_len": args.kmer_len,
-            "minimizer_len": args.minimizer_len,
-            "minimizer_spaces": args.minimizer_spaces,
-            "load_factor": args.load_factor,
-            "threads": args.threads,
-            "clean": args.clean,
-        }
-        data_manager_output = kraken2_build_standard(
-            kraken2_args,
-            target_directory,
-        )
-    elif str(args.database_type) in ('standard_prebuilt', 'special_prebuilt'):
-        data_manager_output = kraken2_build_standard_prebuilt(
-            str(args.prebuilt_db),
-            str(args.prebuilt_date),
-            target_directory
-        )
-    elif str(args.database_type) == 'minikraken':
-        data_manager_output = kraken2_build_minikraken(
-            str(args.minikraken2_version),
-            target_directory
-        )
-    elif str(args.database_type) == 'special':
-        kraken2_args = {
-            "special_database_type": str(args.special_database_type),
-            "kmer_len": args.kmer_len,
-            "minimizer_len": args.minimizer_len,
-            "minimizer_spaces": args.minimizer_spaces,
-            "load_factor": args.load_factor,
-            "threads": args.threads,
-            "clean": args.clean,
-        }
-        data_manager_output = kraken2_build_special(
-            kraken2_args,
-            target_directory,
-        )
-    elif str(args.database_type) == 'custom':
-        kraken2_args = {
-            "custom_fasta": args.custom_fasta,
-            "skip_maps": args.skip_maps,
-            "kmer_len": args.kmer_len,
-            "minimizer_len": args.minimizer_len,
-            "minimizer_spaces": args.minimizer_spaces,
-            "load_factor": args.load_factor,
-            "threads": args.threads,
-            "clean": args.clean,
-        }
-        data_manager_output = kraken2_build_custom(
-            kraken2_args,
-            args.custom_database_name,
-            args.custom_source_info,
-            target_directory,
-        )
-    else:
-        sys.exit("Invalid database type")
-
-    with open(args.data_manager_json, 'w') as fh:
-        json.dump(data_manager_output, fh, sort_keys=True)
-
-
-if __name__ == "__main__":
-    main()
--- a/data_manager/kraken2_build_database.xml	Sat Jan 25 17:41:48 2025 +0000
+++ b/data_manager/kraken2_build_database.xml	Wed Nov 05 13:32:18 2025 +0000
@@ -1,15 +1,15 @@
 <tool id="kraken2_build_database" name="Kraken2" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
     <description>database builder</description>
     <macros>
-        <token name="@TOOL_VERSION@">2.1.3</token>
-        <token name="@VERSION_SUFFIX@">6</token>
-        <token name="@PROFILE@">22.01</token>
+        <token name="@TOOL_VERSION@">2.1.6</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+        <token name="@PROFILE@">24.0</token>
         <xml name="common_params">
             <param name="kmer_len" type="integer" value="35" label="K-mer length in BP" />
             <param name="minimizer_len" type="integer" value="31" label="Minimizer length" />
             <param name="minimizer_spaces" type="integer" value="7" label="Minimizer spaces" />
             <param name="load_factor" type="float" value="0.7" min="0" max="1" label="Load factor" help="Proportion of the hash table to be populated" />
-            <param name="clean" type="boolean" truevalue="--clean" falsevalue="" checked="true" label="Clean up extra files" />
+            <param name="clean" type="boolean" truevalue="--clean" falsevalue="" checked="false" label="Clean up extra files. Note: If the extra files are removed this DB cannot be used to build a bracken DB!" />
         </xml>
         <xml name="viral">
             <option value="viral">Viral (viral; ~0.5 GB)</option>
@@ -44,57 +44,151 @@
         <xml name="pluspfp_16gb">
             <option value="pluspfp_16gb">PlusPFP-16 (PlusPFP with DB capped at 16 GB; ~15 GB)</option>
         </xml>
-        <xml name="core_nt">
-            <option value="core_nt">core_nt (Very large collection, inclusive of GenBank, RefSeq, TPA and PDB; ~182 GB)</option>
-        </xml>
     </macros>
     <xrefs>
         <xref type="bio.tools">kraken2</xref>
     </xrefs>
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">kraken2</requirement>
-        <requirement type="package" version="3.13">python</requirement>
     </requirements>
     <version_command>kraken2 -version | head -n 1 | awk '{print $NF}'</version_command>
     <command detect_errors="exit_code"><![CDATA[
-python '$__tool_directory__/kraken2_build_database.py'
-    '$out_file'
-    --database-type '$database_type.database_type'
-#if $database_type.database_type == "standard_local_build"
-    --threads \${GALAXY_SLOTS:-1}
-    --kmer-len $database_type.kmer_len
-    --minimizer-len $database_type.minimizer_len
-    --minimizer-spaces $database_type.minimizer_spaces
-    --load-factor $database_type.load_factor
-    $database_type.clean
-#else if $database_type.database_type == "standard_prebuilt"
-    --prebuilt-db '$database_type.prebuild.prebuilt_db'
-    --prebuilt-date '$database_type.prebuild.prebuilt_date'
+#import datetime
+#import re
+
+#set now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
+#set commands = []
+mkdir '$out_file.extra_files_path' &&
+
+#if  $database_type.database_type == "standard_prebuilt" or $database_type.database_type == "special_prebuilt" or $database_type.database_type == "amplicon_prebuilt"
+    #set prebuilt_name = {
+        'viral': "Viral",
+        'minusb': "MinusB (archaea, viral, plasmid, human, UniVec_Core)",
+        'standard': "Standard-Full (archaea, bacteria, viral, plasmid, human,UniVec_Core)",
+        'standard_08gb': "Standard-8 (Standard with DB capped at 8 GB)",
+        'standard_16gb': "Standard-16 (Standard with DB capped at 16 GB)",
+        'pluspf': "PlusPF (Standard plus protozoa and fungi)",
+        'pluspf_08gb': "PlusPF-8 (PlusPF with DB capped at 8 GB)",
+        'pluspf_16gb': "PlusPF-16 (PlusPF with DB capped at 16 GB)",
+        'pluspfp': "PlusPFP (Standard plus protozoa, fungi and plant)",
+        'pluspfp_08gb': "PlusPFP-8 (PlusPFP with DB capped at 8 GB)",
+        'pluspfp_16gb': "PlusPFP-16 (PlusPFP with DB capped at 16 GB)",
+    }
+    #set special_name = {
+        "core_nt_20250609": "Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (July, 2025)",
+        "core_nt_20241228": "Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (December, 2024)",
+        "core_nt_20240904": "Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (September, 2024)",
+        "gtdb_genome_reps_20250609": "GTDB v226 (Bacterial and archaeal)",
+        "gtdb_genome_reps_20241109": "GTDB v220 (Bacterial and archaeal)",
+        "eupathdb48_20230407": "EuPathDB-46 (April 18, 2023)",
+        "eupathdb48_20201113": "EuPathDB-46 (November 13, 2020)"
+    }
+    #set amplicon_name = {
+        "16S_Greengenes13.5_20200326": "Greengenes 13.5",
+        "16S_RDP11.5_20200326": "RDP 11.5",
+        "16S_Silva132_20200326": "Silva 132",
+        "16S_Silva138_20200326": "Silva 138"
+    }
+
+    #set date_url_str = str($database_type.prebuilt.prebuilt_date).replace('-', '')
+    #set display_name = prebuilt_name.get(str($database_type.prebuilt.prebuilt_db))
+    #if not display_name
+        #set display_name = special_name.get(str($database_type.prebuilt.prebuilt_db)+"_"+date_url_str)
+    #end if
+    #if not display_name
+        #set display_name = amplicon_name.get(str($database_type.prebuilt.prebuilt_db)+"_"+date_url_str)
+    #end if
+
+    #set database_value = "_".join([now, "standard_prebuilt", str($database_type.prebuilt.prebuilt_db), str($database_type.prebuilt.prebuilt_date)])
+    #set database_name = " ".join(["Prebuilt Refseq indexes: ", display_name, "(Version: ", str($database_type.prebuilt.prebuilt_date), "- Downloaded:", now + ")"])
+
+    ## the 16S dbs have a different link and file name 
+    ## and are stored in a subfolder
+    #if $database_type.database_type == "amplicon_prebuilt"
+        #silent commands.append("wget https://genome-idx.s3.amazonaws.com/kraken/" + str($database_type.prebuilt.prebuilt_db) + "_" + date_url_str + ".tgz")
+        #silent commands.append("mkdir -p '" + $out_file.extra_files_path + "/" + database_value + "'/tmp_extract")
+        #silent commands.append("tar -xzf " + str($database_type.prebuilt.prebuilt_db) + "_" + date_url_str + ".tgz -C '" + $out_file.extra_files_path + "/" + database_value + "'/tmp_extract")
+        #silent commands.append("topdir=$(find '" + $out_file.extra_files_path + "/" + database_value + "/tmp_extract' -mindepth 1 -maxdepth 1 -type d | head -n 1)")
+        #silent commands.append("if [ -n \"$topdir\" ]")
+        #silent commands.append("then")
+        #silent commands.append("    mv \"$topdir\"/* '" + $out_file.extra_files_path + "/" + database_value + "/'")
+        #silent commands.append("fi")
+        #silent commands.append("rm -rf '" + $out_file.extra_files_path + "/" + database_value + "/tmp_extract'")
+    #else
+        #silent commands.append("wget https://genome-idx.s3.amazonaws.com/kraken/k2_" + str($database_type.prebuilt.prebuilt_db) + "_" + date_url_str + ".tar.gz")
+        #silent commands.append("mkdir -p '" + $out_file.extra_files_path + "/" + database_value + "'")
+        #silent commands.append("tar -xzf k2_" + str($database_type.prebuilt.prebuilt_db) + "_" + date_url_str + ".tar.gz -C '" + $out_file.extra_files_path + "/" + database_value + "'")
+    #end if
+
 #else if $database_type.database_type == "minikraken"
-    --minikraken2-version '$database_type.minikraken2_version'
-#else if $database_type.database_type == "special_prebuilt"
-    --prebuilt-db '$database_type.special_prebuild.prebuilt_db'
-    --prebuilt-date '$database_type.special_prebuild.prebuilt_date'
-#else if $database_type.database_type == "special"
-    --threads \${GALAXY_SLOTS:-1}
-    --special-database-type '$database_type.special_database_type'
-    --kmer-len $database_type.kmer_len
-    --minimizer-len $database_type.minimizer_len
-    --minimizer-spaces $database_type.minimizer_spaces
-    --load-factor $database_type.load_factor
-    $database_type.clean
-#else if $database_type.database_type == "custom"
-    --threads \${GALAXY_SLOTS:-1}
-    --custom-fasta '$database_type.custom_fasta'
-    --custom-database-name '$database_type.custom_database_name'
-    --custom-source-info '$database_type.custom_source_info'
-    $database_type.skip_maps
-    --kmer-len $database_type.kmer_len
-    --minimizer-len $database_type.minimizer_len
-    --minimizer-spaces $database_type.minimizer_spaces
-    --load-factor $database_type.load_factor
-    $database_type.clean
+    #set database_value = "_".join([now, "minikraken2", str($database_type.minikraken2_version), "8GB"])
+    #set database_name = " ".join(["Minikraken2", str($database_type.minikraken2_version), "(Created:", now + ")"])
+
+    #silent commands.append("wget 'https://genome-idx.s3.amazonaws.com/kraken/minikraken2_" + str($database_type.minikraken2_version) + "_8GB_201904.tgz'")
+    #silent commands.append("mkdir -p '" + $out_file.extra_files_path + "'/'" + database_value + "'")
+    #silent commands.append("tar -xzf 'minikraken2_" + str($database_type.minikraken2_version) + "_8GB_201904.tgz' -C '" + $out_file.extra_files_path + "'/'" + database_value + "'")
+#else
+    #if $database_type.database_type == "standard_local_build"
+        #set database_value = "_".join([now, "standard", "kmer-len", str($database_type.kmer_len), "minimizer-len", str($database_type.minimizer_len), "minimizer-spaces", str($database_type.minimizer_spaces), "load-factor", str($database_type.load_factor)])
+        #set database_name = " ".join(["Standard (Local Build)", "Created:", now + ",", "kmer-len=" + str($database_type.kmer_len) + ",", "minimizer-len=" + str($database_type.minimizer_len) + ",", "minimizer-spaces=" + str($database_type.minimizer_spaces) + ",", "load-factor=" + str($database_type.load_factor) + ")"])
+    #else if $database_type.database_type == "special"
+        #set special_database_names = {"rdp": "RDP", "greengenes": "Greengenes", "silva": "Silva"}
+        #set database_value = "_".join([now, str($database_type.special_database_type), "kmer-len", str($database_type.kmer_len), "minimizer-len", str($database_type.minimizer_len), "minimizer-spaces", str($database_type.minimizer_spaces), "load-factor", str($database_type.load_factor)])
+        #set database_name = " ".join([special_database_names[str($database_type.special_database_type)], "(Created:", now + ",", "kmer-len=" + str($database_type.kmer_len) + ",", "minimizer-len=" + str($database_type.minimizer_len) + ",", "minimizer-spaces=" + str($database_type.minimizer_spaces) + ",", "load-factor=" + str($database_type.load_factor) + ")"])
+    #else if $database_type.database_type == "custom"
+        #set custom_database_name = re.sub(r'[^\w_.-]+', '_', str($database_type.custom_database_name)).strip('_')
+        #set database_name = " ".join([custom_database_name, "(" + str($database_type.custom_source_info) + ",", "kmer-len=" + str($database_type.kmer_len) + ",", "minimizer-len=" + str($database_type.minimizer_len) + ",", "minimizer-spaces=" + str($database_type.minimizer_spaces) + ",", "load-factor=" + str($database_type.load_factor) + ")"])
+        #set database_value = "_".join([now, custom_database_name, "kmer-len", str($database_type.kmer_len), "minimizer-len", str($database_type.minimizer_len), "minimizer-spaces", str($database_type.minimizer_spaces), "load-factor", str($database_type.load_factor)])
+    #else
+        >&2 echo "invalid database_type: $database_type.database_type"
+    #end if
+
+    #if $database_type.database_type == "custom"
+        #silent command = ["kraken2-build", "--threads", '"${GALAXY_SLOTS:-1}"',
+            "--download-taxonomy",
+            "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'",
+            str($database_type.skip_maps)]
+        #silent commands.append(" ".join(command))
+        #silent command = ["kraken2-build", "--threads", '"${GALAXY_SLOTS:-1}"',
+            "--add-to-library", "'" + str($database_type.custom_fasta) + "'",
+            "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'"]
+        #silent commands.append(" ".join(command))
+    #end if
+
+    #silent command = ["kraken2-build", "--threads", '"${GALAXY_SLOTS:-1}"']
+    #if $database_type.database_type == "standard_local_build"
+        #silent command.append("--standard")
+    #else if $database_type.database_type == "special"
+        #silent command.extend(["--special", str($database_type.special_database_type)])
+    #else if $database_type.database_type == "custom"
+        #silent command.append("--build")
+    #end if
+    #silent command.extend([
+        "--kmer-len", str($database_type.kmer_len),
+        "--minimizer-len", str($database_type.minimizer_len),
+        "--minimizer-spaces", str($database_type.minimizer_spaces),
+        "--load-factor", str($database_type.load_factor),
+        "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'"])
+    #silent commands.append(" ".join(command))
+
+    #if $database_type.clean
+        #silent command = ["kraken2-build", "--threads", '"${GALAXY_SLOTS:-1}"',
+            "--clean",
+            "--db", "'" + $out_file.extra_files_path + "'/'" + database_value + "'"]
+        #silent commands.append(" ".join(command))
+    #end if
 #end if
+
+#for command in commands
+    ## In test mode the tool executes `echo COMMAND` instead of `COMMAND`
+    #if $run_test_command == "false"
+        echo
+    #end if
+    #echo command
+    &&
+#end for
+
+echo '{"data_tables": {"kraken2_databases": [{"value": "$database_value", "name": "$database_name", "path": "$database_value"}]}}' > '$out_file'
 ]]>
     </command>
     <inputs>
@@ -104,6 +198,7 @@
                 <option value="standard_prebuilt">Pre-Built Refseq indexes</option>
                 <option value="minikraken">MiniKraken</option>
                 <option value="special_prebuilt">Special Pre-Built indexes</option>
+                <option value="amplicon_prebuilt">16S Pre-Built indexes</option>
                 <option value="special">Special</option>
                 <option value="custom">Custom</option>
             </param>
@@ -111,8 +206,9 @@
                 <expand macro="common_params" />
             </when>
             <when value="standard_prebuilt">
-                <conditional name="prebuild">
+                <conditional name="prebuilt">
                     <param name="prebuilt_date" type="select" label="Select index build date">
+                        <option value="2025-07-14">July 14, 2025</option>
                         <option value="2024-12-28">December 28, 2024</option>
                         <option value="2024-09-04">September 4, 2024</option>
                         <option value="2024-06-05">June 5, 2024</option>
@@ -124,6 +220,21 @@
                         <option value="2020-12-02">December 2, 2020</option>
                         <option value="2020-09-19">September 19, 2020</option>
                     </param>
+                    <when value="2025-07-14">
+                        <param name="prebuilt_db" type="select" label="Select a prebuilt Refseq index to download">
+                            <expand macro="viral"/>
+                            <expand macro="minusb"/>
+                            <expand macro="standard"/>
+                            <expand macro="standard_08gb"/>
+                            <expand macro="standard_16gb"/>
+                            <expand macro="pluspf"/>
+                            <expand macro="pluspf_08gb"/>
+                            <expand macro="pluspf_16gb"/>
+                            <expand macro="pluspfp"/>
+                            <expand macro="pluspfp_08gb"/>
+                            <expand macro="pluspfp_16gb"/>
+                        </param>
+                    </when>
                     <when value="2024-12-28">
                         <param name="prebuilt_db" type="select" label="Select a prebuilt Refseq index to download">
                             <expand macro="viral"/>
@@ -137,7 +248,6 @@
                             <expand macro="pluspfp"/>
                             <expand macro="pluspfp_08gb"/>
                             <expand macro="pluspfp_16gb"/>
-                            <expand macro="core_nt"/>
                         </param>
                     </when>
                     <when value="2024-09-04">
@@ -152,7 +262,6 @@
                             <expand macro="pluspf_16gb"/>
                             <expand macro="pluspfp"/>
                             <expand macro="pluspfp_08gb"/>
-                            <expand macro="core_nt"/>
                         </param>
                     </when>
                     <when value="2024-06-05">
@@ -263,12 +372,32 @@
                 </conditional>
             </when>
             <when value="special_prebuilt">
-                <conditional name="special_prebuild">
-                    <param name="special_prebuilt_db" type="select" multiple="false" label="Select pre-built database to download">
-                        <option value="gtdb_genome_reps_20241109">GTDB v220 (Bacterial and archaeal; ~497 GB) (December 13, 2024)</option>
+                <conditional name="prebuilt">
+                    <param name="xyz" type="select" multiple="false" label="Select pre-built database to download">
+                        <option value="core_nt_20250609">Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (July, 2025)</option>
+                        <option value="core_nt_20241228">Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (December, 2024)</option>
+                        <option value="core_nt_20240904">Very large collection, inclusive of GenBank, RefSeq, TPA and PDB (September, 2024)</option>
+                        <option value="gtdb_genome_reps_20250609">GTDB v226 (Bacterial and archaeal; ~497 GB) (July, 2025)</option>
+                        <option value="gtdb_genome_reps_20241109">GTDB v220 (Bacterial and archaeal; ~644 GB) (December 13, 2024)</option>
                         <option value="eupathdb48_20230407">EuPathDB-46 (April 18, 2023)</option>
                         <option value="eupathdb48_20201113">EuPathDB-46 (November 13, 2020)</option>
                     </param>
+                    <when value="core_nt_20250609">
+                        <param name="prebuilt_db" type="hidden" value="core_nt"/>
+                        <param name="prebuilt_date" type="hidden" value="20250609"/>
+                    </when>
+                    <when value="core_nt_20241228">
+                        <param name="prebuilt_db" type="hidden" value="core_nt"/>
+                        <param name="prebuilt_date" type="hidden" value="20241228"/>
+                    </when>
+                    <when value="core_nt_20240904">
+                        <param name="prebuilt_db" type="hidden" value="core_nt"/>
+                        <param name="prebuilt_date" type="hidden" value="20240904"/>
+                    </when>
+                    <when value="gtdb_genome_reps_20250609">
+                        <param name="prebuilt_db" type="hidden" value="gtdb_genome_reps"/>
+                        <param name="prebuilt_date" type="hidden" value="2025-06-09"/>
+                    </when>
                     <when value="gtdb_genome_reps_20241109">
                         <param name="prebuilt_db" type="hidden" value="gtdb_genome_reps"/>
                         <param name="prebuilt_date" type="hidden" value="2024-11-09"/>
@@ -283,6 +412,32 @@
                     </when>
                 </conditional>
             </when>
+            <when value="amplicon_prebuilt">
+                <conditional name="prebuilt">
+                    <param name="xyz" type="select" multiple="false" label="Select pre-built database to download">
+                        <option value="16S_Greengenes13.5_20200326">Greengenes 13.5</option>
+                        <option value="16S_RDP11.5_20200326">RDP 11.5</option> 
+                        <option value="16S_Silva132_20200326">Silva 132</option> 
+                        <option value="16S_Silva138_20200326">Silva 138</option> 
+                    </param>
+                    <when value="16S_Greengenes13.5_20200326">
+                        <param name="prebuilt_db" type="hidden" value="16S_Greengenes13.5"/>
+                        <param name="prebuilt_date" type="hidden" value="20200326"/>
+                    </when>
+                    <when value="16S_RDP11.5_20200326">
+                        <param name="prebuilt_db" type="hidden" value="16S_RDP11.5"/>
+                        <param name="prebuilt_date" type="hidden" value="20200326"/>
+                    </when>
+                    <when value="16S_Silva132_20200326">
+                        <param name="prebuilt_db" type="hidden" value="16S_Silva132"/>
+                        <param name="prebuilt_date" type="hidden" value="20200326"/>
+                    </when>
+                    <when value="16S_Silva138_20200326">
+                        <param name="prebuilt_db" type="hidden" value="16S_Silva138"/>
+                        <param name="prebuilt_date" type="hidden" value="20200326"/>
+                    </when>
+                </conditional>
+            </when>
             <when value="minikraken">
                 <param name="minikraken2_version" type="select" multiple="false" label="Select MiniKraken2 database version to download">
                     <option value="v2">Version 2</option>
@@ -293,7 +448,7 @@
                 <param name="special_database_type" type="select" multiple="false" label="Select database to build">
                     <option value="greengenes">Greengenes</option>
                     <option value="silva">Silva</option>
-                    <option value="rdp">RDP</option>
+                    <!-- <option value="rdp">RDP</option> https://github.com/DerrickWood/kraken2/issues/736 -->
                 </param>
                 <expand macro="common_params" />
             </when>
@@ -305,39 +460,43 @@
                 <expand macro="common_params" />
             </when>
         </conditional>
+        <param name="run_test_command" type="hidden"/>
     </inputs>
     <outputs>
         <data name="out_file" format="data_manager_json" />
     </outputs>
     <tests>
+        <!-- standard_local_build -->
+
         <test expect_num_outputs="1">
             <conditional name="database_type">
-                <param name="database_type" value="custom" />
-                <param name="custom_fasta" value="adapter.fa" />
-                <param name="custom_database_name" value="custom_database" />
-                <param name="custom_source_info" value="from adapter.fa test data" />
-                <param name="skip_maps" value="true" />
+                <param name="database_type" value="standard_local_build" />
                 <param name="kmer_len" value="35" />
                 <param name="minimizer_spaces" value="6"/>
                 <param name="load_factor" value="0.7" />
                 <param name="clean" value="true"/>
             </conditional>
+            <param name="run_test_command" value="false"/>
             <output name="out_file">
                 <assert_contents>
                     <has_text text="kraken2_databases"/>
                     <has_text text="path"/>
-                    <has_text text="custom_database (from adapter.fa test data, kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7)"/>
+                    <has_text text="Standard (Local Build)"/>
+                    <has_text text="kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7"/>
                 </assert_contents>
             </output>
         </test>
+
+        <!-- standard_prebuilt -->
         <test>
             <conditional name="database_type">
                 <param name="database_type" value="standard_prebuilt" />
-                <conditional name="prebuild">
+                <conditional name="prebuilt">
                     <param name="prebuilt_date" value="2022-06-07"/>
                     <param name="prebuilt_db" value="viral"/>
                 </conditional>
             </conditional>
+            <param name="run_test_command" value="true"/>
             <output name="out_file">
                 <assert_contents>
                     <has_text text="kraken2_databases"/>
@@ -353,11 +512,12 @@
         <test>
             <conditional name="database_type">
                 <param name="database_type" value="standard_prebuilt" />
-                <conditional name="prebuild">
+                <conditional name="prebuilt">
                     <param name="prebuilt_date" value="2024-01-12"/>
                     <param name="prebuilt_db" value="viral"/>
                 </conditional>
             </conditional>
+            <param name="run_test_command" value="true"/>
             <output name="out_file">
                 <assert_contents>
                     <has_text text="kraken2_databases"/>
@@ -373,11 +533,12 @@
         <test>
             <conditional name="database_type">
                 <param name="database_type" value="standard_prebuilt" />
-                <conditional name="prebuild">
+                <conditional name="prebuilt">
                     <param name="prebuilt_date" value="2024-06-05"/>
                     <param name="prebuilt_db" value="viral"/>
                 </conditional>
             </conditional>
+            <param name="run_test_command" value="true"/>
             <output name="out_file">
                 <assert_contents>
                     <has_text text="kraken2_databases"/>
@@ -389,6 +550,137 @@
                 </assert_contents>
             </output>
         </test>
+
+        <!-- minikraken -->
+
+        <test>
+            <conditional name="database_type">
+                <param name="database_type" value="minikraken" />
+                <param name="minikraken2_version" value="v1"/>
+            </conditional>
+            <param name="run_test_command" value="false"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="kraken2_databases"/>
+                    <has_text text="path"/>
+                    <has_text text="Minikraken2"/>
+                    <has_text text="v1"/>
+                    <has_text text="Created"/>
+                </assert_contents>
+            </output>
+        </test> 
+
+        <!-- special_prebuilt -->
+
+       <test>
+            <conditional name="database_type">
+                <param name="database_type" value="special_prebuilt" />
+                <conditional name="prebuilt">
+                    <param name="xyz" value="eupathdb48_20201113"/>
+                    <param name="prebuilt_date" value="2020-11-13"/>
+                    <param name="prebuilt_db" value="eupathdb48"/>
+                </conditional>
+            </conditional>
+            <param name="run_test_command" value="false"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="kraken2_databases"/>
+                    <has_text text="path"/>
+                    <has_text text="Prebuilt Refseq indexes:  EuPathDB-46"/>
+                    <has_text text="standard_prebuilt_eupathdb48_2020-11-13"/>
+                    <has_text text="Prebuilt Refseq indexes"/>
+                    <has_text text="Downloaded"/>
+                </assert_contents>
+            </output>
+        </test> 
+
+        <!-- amplicon_prebuilt -->
+
+       <test>
+            <conditional name="database_type">
+                <param name="database_type" value="amplicon_prebuilt" />
+                <conditional name="prebuilt">
+                    <param name="xyz" value="16S_Greengenes13.5_20200326"/>
+                    <param name="prebuilt_date" value="20200326"/>
+                    <param name="prebuilt_db" value="16S_Greengenes13.5"/>
+                </conditional>
+            </conditional>
+            <param name="run_test_command" value="false"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="kraken2_databases"/>
+                    <has_text text="path"/>
+                    <has_text text="16S_Greengenes13.5"/>
+                </assert_contents>
+            </output>
+        </test> 
+
+        <!-- special -->
+
+        <test expect_num_outputs="1">
+            <conditional name="database_type">
+                <param name="database_type" value="special" />
+                <param name="special_database_type" value="greengenes" />
+                <param name="kmer_len" value="35" />
+                <param name="minimizer_spaces" value="6"/>
+                <param name="load_factor" value="0.7" />
+                <param name="clean" value="true"/>
+            </conditional>
+            <param name="run_test_command" value="true"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="kraken2_databases"/>
+                    <has_text text="path"/>
+                    <has_text text="Greengenes"/>
+                    <has_text text="kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="database_type">
+                <param name="database_type" value="special" />
+                <param name="special_database_type" value="silva" />
+                <param name="kmer_len" value="35" />
+                <param name="minimizer_spaces" value="6"/>
+                <param name="load_factor" value="0.7" />
+                <param name="clean" value="true"/>
+            </conditional>
+            <param name="run_test_command" value="true"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="kraken2_databases"/>
+                    <has_text text="path"/>
+                    <has_text text="Silva"/>
+                    <has_text text="kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- custom -->
+
+        <test expect_num_outputs="1">
+            <conditional name="database_type">
+                <param name="database_type" value="custom" />
+                <param name="custom_fasta" value="adapter.fa" />
+                <param name="custom_database_name" value="custom_database" />
+                <param name="custom_source_info" value="from adapter.fa test data" />
+                <param name="skip_maps" value="true" />
+                <param name="kmer_len" value="35" />
+                <param name="minimizer_spaces" value="6"/>
+                <param name="load_factor" value="0.7" />
+                <param name="clean" value="true"/>
+            </conditional>
+            <param name="run_test_command" value="true"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="kraken2_databases"/>
+                    <has_text text="path"/>
+                    <has_text text="custom_database (from adapter.fa test data, kmer-len=35, minimizer-len=31, minimizer-spaces=6, load-factor=0.7)"/>
+                </assert_contents>
+            </output>
+        </test>
+
+
     </tests>
     <help><![CDATA[
 Build Kraken2 databases or download `prebuilt Kraken2 RefSeq indexes <https://benlangmead.github.io/aws-indexes/k2>`__