# HG changeset patch # User iuc # Date 1762349529 0 # Node ID 174a754bd3b61af798c621d798c3e70c727dfd58 # Parent 84cc0dc92b0c13368a44679f09dc0ce25d534e23 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_build_bracken_database commit a108f20aebc04574a8bd0a90b955064439a50852 diff -r 84cc0dc92b0c -r 174a754bd3b6 data_manager/bracken_build_database.py --- a/data_manager/bracken_build_database.py Wed Mar 06 14:09:08 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,101 +0,0 @@ -#!/usr/bin/env python - -from __future__ import print_function - -import argparse -import errno -import json -import os -import subprocess -import uuid - - -DATA_TABLE_NAME = "bracken_databases" - - -def bracken_build_database(target_directory, bracken_build_args, database_name, prebuilt=False, data_table_name=DATA_TABLE_NAME): - - database_value = str(uuid.uuid4()) - - database_name = database_name - - database_path = os.path.join(bracken_build_args['kraken_database'], 'database' + str(bracken_build_args['read_len']) + 'mers.kmer_distrib') - - if not prebuilt: - bracken_build_args_list = [ - '-t', bracken_build_args['threads'], - '-k', bracken_build_args['kmer_len'], - '-l', bracken_build_args['read_len'], - '-d', bracken_build_args['kraken_database'], - ] - - subprocess.check_call(['bracken-build'] + bracken_build_args_list) - - data_table_entry = { - "data_tables": { - data_table_name: [ - { - "value": database_value, - "name": database_name, - "path": database_path, - } - ] - } - } - - return data_table_entry - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('data_manager_json') - parser.add_argument('--threads', dest='threads', default=1, help='threads') - parser.add_argument('--kmer-len', dest='kmer_len', help='K-mer length') - parser.add_argument('--read-len', dest='read_len', help='Read length') - parser.add_argument('--kraken-db', dest='kraken_database', help='Kraken Database') - parser.add_argument('--database-name', dest='database_name', help='Database Name') - parser.add_argument('--prebuilt', action='store_true', dest='prebuilt', help='Use pre-built DB') - args = parser.parse_args() - - with open(args.data_manager_json) as fh: - data_manager_input = json.load(fh) - - target_directory = data_manager_input['output_data'][0]['extra_files_path'] - - if args.prebuilt: - bracken_build_args = { - 'threads': args.threads, - 'read_len': args.read_len, - 'kraken_database': args.kraken_database, - } - else: - bracken_build_args = { - 'threads': args.threads, - 'kmer_len': args.kmer_len, - 'read_len': args.read_len, - 'kraken_database': args.kraken_database, - } - - try: - os.mkdir(target_directory) - except OSError as exc: - if exc.errno == errno.EEXIST and os.path.isdir(target_directory): - pass - else: - raise - - data_manager_output = {} - - data_manager_output = bracken_build_database( - target_directory, - bracken_build_args, - args.database_name, - args.prebuilt, - ) - - with open(args.data_manager_json, 'w') as fh: - json.dump(data_manager_output, fh, sort_keys=True) - - -if __name__ == "__main__": - main() diff -r 84cc0dc92b0c -r 174a754bd3b6 data_manager/bracken_build_database.xml --- a/data_manager/bracken_build_database.xml Wed Mar 06 14:09:08 2024 +0000 +++ b/data_manager/bracken_build_database.xml Wed Nov 05 13:32:09 2025 +0000 @@ -1,37 +1,77 @@ - bracken database builder - 2.8 + 3.1 0 - 22.01 + 24.0 bracken &2 echo "Have you chosen the correct k-mer length? file $db_dir/database${check_prebuilt.read_len}mers.kmer_distrib does not exist."; + exit 1; + fi + ## delete any other (links to) prebuilt bracken databases (except the one of the chosen read_len) + && find '$db_dir' -maxdepth 1 -name "*mers.kraken" -a \! -name "*${check_prebuilt.read_len}mers.kraken" -delete + && find '$db_dir' -maxdepth 1 -name "*mers.kmer_distrib" -a \! -name "*${check_prebuilt.read_len}mers.kmer_distrib" -delete + #end if + && cp '$dmjson' '$out_file' ]]> + + + @@ -58,8 +98,8 @@ - - + + @@ -67,10 +107,29 @@ - + + + + + - + + + + + + + + + + + + + + + + @@ -85,15 +144,16 @@ ====================================== The prebuilt option does use existing bracken DBs, that are shipped with kraken2 DBs. This is the case for DBs downloaded from https://benlangmead.github.io/aws-indexes/k2. -All prebuilt databases contain a Kraken 2 database along with Bracken databases built for 50, 75, 100, 150, 200, 250 and 300 read lengths (refering to the lenght of the sequenced reads to be analysis using that database (one read in case of paired reads)). +All prebuilt databases contain a Kraken 2 database along with Bracken databases built for 50, 75, 100, 150, 200, 250 and 300 read lengths (referring to the length of the sequenced reads to be analysis using that database (one read in case of paired reads)). In this case the data manager points to the same DB as the kracken2 DB. +Note the Kraken 2 / Bracken 16s DBs only contain the Bracken databases built for 100mers, 150mers, and 200mers. **The prebuilt option must not be used for custom kraken2 DBs ! For this the bracken DB needs to be build.** ====================================== Building new DBs ====================================== -Use the same K-mer length as the kraken2 DB and choose read lengths that are close to the read length of analysis you want to perform (one read in case of paired reads). +Use the same K-mer length as the kraken2 DB and choose read lengths that are close to the read length of the analysis you want to perform (one read in case of paired reads). 10.7717/peerj-cs.104 diff -r 84cc0dc92b0c -r 174a754bd3b6 data_manager_conf.xml --- a/data_manager_conf.xml Wed Mar 06 14:09:08 2024 +0000 +++ b/data_manager_conf.xml Wed Nov 05 13:32:09 2025 +0000 @@ -4,8 +4,25 @@ - + + + + + #import os + #set base_dir = os.path.dirname($path) + ${base_dir} + + bracken_databases/${value} + + + + #import os + ${GALAXY_DATA_MANAGER_DATA_PATH}/bracken_databases/${value}/${os.path.basename($path)} + + abspath + + diff -r 84cc0dc92b0c -r 174a754bd3b6 test-data/kraken2_databases.loc --- a/test-data/kraken2_databases.loc Wed Mar 06 14:09:08 2024 +0000 +++ b/test-data/kraken2_databases.loc Wed Nov 05 13:32:09 2025 +0000 @@ -3,4 +3,4 @@ # - name (Galaxy shows this in the UI) # - path (folder name containing the Kraken DB) # -test_entry "Test Database" ${__HERE__}/test_db +test_entry Test Database ${__HERE__}/test_db diff -r 84cc0dc92b0c -r 174a754bd3b6 test-data/test_db/database.kraken --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_db/database.kraken Wed Nov 05 13:32:09 2025 +0000 @@ -0,0 +1,2 @@ +C NC_011750.1 585057 910 585057:11 A:40 585057:117 A:66 585057:32 A:41 585057:569 +C NC_003198.1 585057 910 0:169 A:66 585057:24 0:9 A:41 0:10 585057:65 0:134 585057:2 0:356 diff -r 84cc0dc92b0c -r 174a754bd3b6 test-data/test_db/database100mers.kmer_distrib --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_db/database100mers.kmer_distrib Wed Nov 05 13:32:09 2025 +0000 @@ -0,0 +1,3 @@ +mapped_taxid genome_taxids:kmers_mapped:total_genome_kmers +0 585057:1:811 932157936:530:811 +585057 585057:810:811 932157936:281:811 diff -r 84cc0dc92b0c -r 174a754bd3b6 test-data/test_db/database100mers.kraken Binary file test-data/test_db/database100mers.kraken has changed