Mercurial > repos > iuc > data_manager_build_kraken2_database
comparison data_manager/kraken2_build_database.py @ 12:90b4d4f0a3a4 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit 9835da32741d05d129a1a44835f66e32713770ad
author | iuc |
---|---|
date | Fri, 18 Oct 2024 17:08:15 +0000 |
parents | 9002633b4737 |
children | e9ee4d074d5d |
comparison
equal
deleted
inserted
replaced
11:1e34d2e3d285 | 12:90b4d4f0a3a4 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | |
3 from __future__ import print_function | |
4 | 2 |
5 import argparse | 3 import argparse |
6 import datetime | 4 import datetime |
7 import errno | 5 import errno |
8 import json | 6 import json |
9 import os | 7 import os |
8 import re | |
10 import shutil | 9 import shutil |
11 import subprocess | 10 import subprocess |
12 import sys | 11 import sys |
13 import tarfile | 12 import tarfile |
14 from enum import Enum | 13 from enum import Enum |
319 } | 318 } |
320 | 319 |
321 return data_table_entry | 320 return data_table_entry |
322 | 321 |
323 | 322 |
324 def kraken2_build_custom(kraken2_args, custom_database_name, target_directory, data_table_name=DATA_TABLE_NAME): | 323 def kraken2_build_custom(kraken2_args, custom_database_name, custom_source_info, target_directory, data_table_name=DATA_TABLE_NAME): |
324 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") | |
325 | |
326 database_value = "_".join([ | |
327 now, | |
328 re.sub(r'[^\w_.-]+', '_', custom_database_name).strip('_'), | |
329 "kmer-len", str(kraken2_args["kmer_len"]), | |
330 "minimizer-len", str(kraken2_args["minimizer_len"]), | |
331 "minimizer-spaces", str(kraken2_args["minimizer_spaces"]), | |
332 "load-factor", str(kraken2_args["load_factor"]), | |
333 ]) | |
334 | |
335 database_name = " ".join([ | |
336 custom_database_name, | |
337 "(" + custom_source_info + ",", | |
338 "kmer-len=" + str(kraken2_args["kmer_len"]) + ",", | |
339 "minimizer-len=" + str(kraken2_args["minimizer_len"]) + ",", | |
340 "minimizer-spaces=" + str(kraken2_args["minimizer_spaces"]) + ",", | |
341 "load-factor=" + str(kraken2_args["load_factor"]) + ")", | |
342 ]) | |
343 | |
344 database_path = database_value | |
325 | 345 |
326 args = [ | 346 args = [ |
327 '--threads', str(kraken2_args["threads"]), | 347 '--threads', str(kraken2_args["threads"]), |
328 '--download-taxonomy', | 348 '--download-taxonomy', |
329 '--db', custom_database_name, | 349 '--db', database_path, |
330 ] | 350 ] |
331 | 351 |
332 if kraken2_args['skip_maps']: | 352 if kraken2_args['skip_maps']: |
333 args.append('--skip-maps') | 353 args.append('--skip-maps') |
334 | 354 |
335 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) | 355 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) |
336 | 356 |
337 args = [ | 357 args = [ |
338 '--threads', str(kraken2_args["threads"]), | 358 '--threads', str(kraken2_args["threads"]), |
339 '--add-to-library', kraken2_args["custom_fasta"], | 359 '--add-to-library', kraken2_args["custom_fasta"], |
340 '--db', custom_database_name | 360 '--db', database_path, |
341 ] | 361 ] |
342 | 362 |
343 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) | 363 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) |
344 | 364 |
345 args = [ | 365 args = [ |
347 '--build', | 367 '--build', |
348 '--kmer-len', str(kraken2_args["kmer_len"]), | 368 '--kmer-len', str(kraken2_args["kmer_len"]), |
349 '--minimizer-len', str(kraken2_args["minimizer_len"]), | 369 '--minimizer-len', str(kraken2_args["minimizer_len"]), |
350 '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]), | 370 '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]), |
351 '--load-factor', str(kraken2_args["load_factor"]), | 371 '--load-factor', str(kraken2_args["load_factor"]), |
352 '--db', custom_database_name | 372 '--db', database_path, |
353 ] | 373 ] |
354 | 374 |
355 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) | 375 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) |
356 | 376 |
357 if kraken2_args["clean"]: | 377 if kraken2_args["clean"]: |
358 args = [ | 378 args = [ |
359 '--threads', str(kraken2_args["threads"]), | 379 '--threads', str(kraken2_args["threads"]), |
360 '--clean', | 380 '--clean', |
361 '--db', custom_database_name | 381 '--db', database_path, |
362 ] | 382 ] |
363 | 383 |
364 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) | 384 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) |
365 | 385 |
366 data_table_entry = { | 386 data_table_entry = { |
367 'data_tables': { | 387 'data_tables': { |
368 data_table_name: [ | 388 data_table_name: [ |
369 { | 389 { |
370 "value": custom_database_name, | 390 "value": database_value, |
371 "name": custom_database_name, | 391 "name": database_name, |
372 "path": custom_database_name | 392 "path": database_path, |
373 } | 393 } |
374 ] | 394 ] |
375 } | 395 } |
376 } | 396 } |
377 | 397 |
391 parser.add_argument('--prebuilt-db', dest='prebuilt_db', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Prebuilt database to download. Only applies to --database-type standard_prebuilt.') | 411 parser.add_argument('--prebuilt-db', dest='prebuilt_db', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Prebuilt database to download. Only applies to --database-type standard_prebuilt.') |
392 parser.add_argument('--prebuilt-date', dest='prebuilt_date', help='Database build date (YYYY-MM-DD). Only applies to --database-type standard_prebuilt.') | 412 parser.add_argument('--prebuilt-date', dest='prebuilt_date', help='Database build date (YYYY-MM-DD). Only applies to --database-type standard_prebuilt.') |
393 parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') | 413 parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') |
394 parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') | 414 parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') |
395 parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)') | 415 parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)') |
416 parser.add_argument('--custom-source-info', dest='custom_source_info', help='Description of how this build has been sourced (only applies to --database-type custom)') | |
396 parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='') | 417 parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='') |
397 parser.add_argument('--clean', dest='clean', action='store_true', help='Clean up extra files') | 418 parser.add_argument('--clean', dest='clean', action='store_true', help='Clean up extra files') |
398 args = parser.parse_args() | 419 args = parser.parse_args() |
399 | 420 |
400 with open(args.data_manager_json) as fh: | 421 with open(args.data_manager_json) as fh: |
462 "clean": args.clean, | 483 "clean": args.clean, |
463 } | 484 } |
464 data_manager_output = kraken2_build_custom( | 485 data_manager_output = kraken2_build_custom( |
465 kraken2_args, | 486 kraken2_args, |
466 args.custom_database_name, | 487 args.custom_database_name, |
488 args.custom_source_info, | |
467 target_directory, | 489 target_directory, |
468 ) | 490 ) |
469 else: | 491 else: |
470 sys.exit("Invalid database type") | 492 sys.exit("Invalid database type") |
471 | 493 |