comparison data_manager/kraken2_build_database.py @ 12:90b4d4f0a3a4 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit 9835da32741d05d129a1a44835f66e32713770ad
author iuc
date Fri, 18 Oct 2024 17:08:15 +0000
parents 9002633b4737
children e9ee4d074d5d
comparison
equal deleted inserted replaced
11:1e34d2e3d285 12:90b4d4f0a3a4
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2
3 from __future__ import print_function
4 2
5 import argparse 3 import argparse
6 import datetime 4 import datetime
7 import errno 5 import errno
8 import json 6 import json
9 import os 7 import os
8 import re
10 import shutil 9 import shutil
11 import subprocess 10 import subprocess
12 import sys 11 import sys
13 import tarfile 12 import tarfile
14 from enum import Enum 13 from enum import Enum
319 } 318 }
320 319
321 return data_table_entry 320 return data_table_entry
322 321
323 322
324 def kraken2_build_custom(kraken2_args, custom_database_name, target_directory, data_table_name=DATA_TABLE_NAME): 323 def kraken2_build_custom(kraken2_args, custom_database_name, custom_source_info, target_directory, data_table_name=DATA_TABLE_NAME):
324 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
325
326 database_value = "_".join([
327 now,
328 re.sub(r'[^\w_.-]+', '_', custom_database_name).strip('_'),
329 "kmer-len", str(kraken2_args["kmer_len"]),
330 "minimizer-len", str(kraken2_args["minimizer_len"]),
331 "minimizer-spaces", str(kraken2_args["minimizer_spaces"]),
332 "load-factor", str(kraken2_args["load_factor"]),
333 ])
334
335 database_name = " ".join([
336 custom_database_name,
337 "(" + custom_source_info + ",",
338 "kmer-len=" + str(kraken2_args["kmer_len"]) + ",",
339 "minimizer-len=" + str(kraken2_args["minimizer_len"]) + ",",
340 "minimizer-spaces=" + str(kraken2_args["minimizer_spaces"]) + ",",
341 "load-factor=" + str(kraken2_args["load_factor"]) + ")",
342 ])
343
344 database_path = database_value
325 345
326 args = [ 346 args = [
327 '--threads', str(kraken2_args["threads"]), 347 '--threads', str(kraken2_args["threads"]),
328 '--download-taxonomy', 348 '--download-taxonomy',
329 '--db', custom_database_name, 349 '--db', database_path,
330 ] 350 ]
331 351
332 if kraken2_args['skip_maps']: 352 if kraken2_args['skip_maps']:
333 args.append('--skip-maps') 353 args.append('--skip-maps')
334 354
335 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) 355 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
336 356
337 args = [ 357 args = [
338 '--threads', str(kraken2_args["threads"]), 358 '--threads', str(kraken2_args["threads"]),
339 '--add-to-library', kraken2_args["custom_fasta"], 359 '--add-to-library', kraken2_args["custom_fasta"],
340 '--db', custom_database_name 360 '--db', database_path,
341 ] 361 ]
342 362
343 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) 363 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
344 364
345 args = [ 365 args = [
347 '--build', 367 '--build',
348 '--kmer-len', str(kraken2_args["kmer_len"]), 368 '--kmer-len', str(kraken2_args["kmer_len"]),
349 '--minimizer-len', str(kraken2_args["minimizer_len"]), 369 '--minimizer-len', str(kraken2_args["minimizer_len"]),
350 '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]), 370 '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]),
351 '--load-factor', str(kraken2_args["load_factor"]), 371 '--load-factor', str(kraken2_args["load_factor"]),
352 '--db', custom_database_name 372 '--db', database_path,
353 ] 373 ]
354 374
355 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) 375 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
356 376
357 if kraken2_args["clean"]: 377 if kraken2_args["clean"]:
358 args = [ 378 args = [
359 '--threads', str(kraken2_args["threads"]), 379 '--threads', str(kraken2_args["threads"]),
360 '--clean', 380 '--clean',
361 '--db', custom_database_name 381 '--db', database_path,
362 ] 382 ]
363 383
364 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) 384 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory)
365 385
366 data_table_entry = { 386 data_table_entry = {
367 'data_tables': { 387 'data_tables': {
368 data_table_name: [ 388 data_table_name: [
369 { 389 {
370 "value": custom_database_name, 390 "value": database_value,
371 "name": custom_database_name, 391 "name": database_name,
372 "path": custom_database_name 392 "path": database_path,
373 } 393 }
374 ] 394 ]
375 } 395 }
376 } 396 }
377 397
391 parser.add_argument('--prebuilt-db', dest='prebuilt_db', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Prebuilt database to download. Only applies to --database-type standard_prebuilt.') 411 parser.add_argument('--prebuilt-db', dest='prebuilt_db', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Prebuilt database to download. Only applies to --database-type standard_prebuilt.')
392 parser.add_argument('--prebuilt-date', dest='prebuilt_date', help='Database build date (YYYY-MM-DD). Only applies to --database-type standard_prebuilt.') 412 parser.add_argument('--prebuilt-date', dest='prebuilt_date', help='Database build date (YYYY-MM-DD). Only applies to --database-type standard_prebuilt.')
393 parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') 413 parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)')
394 parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') 414 parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)')
395 parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)') 415 parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)')
416 parser.add_argument('--custom-source-info', dest='custom_source_info', help='Description of how this build has been sourced (only applies to --database-type custom)')
396 parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='') 417 parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='')
397 parser.add_argument('--clean', dest='clean', action='store_true', help='Clean up extra files') 418 parser.add_argument('--clean', dest='clean', action='store_true', help='Clean up extra files')
398 args = parser.parse_args() 419 args = parser.parse_args()
399 420
400 with open(args.data_manager_json) as fh: 421 with open(args.data_manager_json) as fh:
462 "clean": args.clean, 483 "clean": args.clean,
463 } 484 }
464 data_manager_output = kraken2_build_custom( 485 data_manager_output = kraken2_build_custom(
465 kraken2_args, 486 kraken2_args,
466 args.custom_database_name, 487 args.custom_database_name,
488 args.custom_source_info,
467 target_directory, 489 target_directory,
468 ) 490 )
469 else: 491 else:
470 sys.exit("Invalid database type") 492 sys.exit("Invalid database type")
471 493