Mercurial > repos > iuc > data_manager_build_kraken2_database
comparison data_manager/kraken2_build_database.py @ 12:90b4d4f0a3a4 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit 9835da32741d05d129a1a44835f66e32713770ad
| author | iuc |
|---|---|
| date | Fri, 18 Oct 2024 17:08:15 +0000 |
| parents | 9002633b4737 |
| children | e9ee4d074d5d |
comparison
equal
deleted
inserted
replaced
| 11:1e34d2e3d285 | 12:90b4d4f0a3a4 |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 | |
| 3 from __future__ import print_function | |
| 4 | 2 |
| 5 import argparse | 3 import argparse |
| 6 import datetime | 4 import datetime |
| 7 import errno | 5 import errno |
| 8 import json | 6 import json |
| 9 import os | 7 import os |
| 8 import re | |
| 10 import shutil | 9 import shutil |
| 11 import subprocess | 10 import subprocess |
| 12 import sys | 11 import sys |
| 13 import tarfile | 12 import tarfile |
| 14 from enum import Enum | 13 from enum import Enum |
| 319 } | 318 } |
| 320 | 319 |
| 321 return data_table_entry | 320 return data_table_entry |
| 322 | 321 |
| 323 | 322 |
| 324 def kraken2_build_custom(kraken2_args, custom_database_name, target_directory, data_table_name=DATA_TABLE_NAME): | 323 def kraken2_build_custom(kraken2_args, custom_database_name, custom_source_info, target_directory, data_table_name=DATA_TABLE_NAME): |
| 324 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") | |
| 325 | |
| 326 database_value = "_".join([ | |
| 327 now, | |
| 328 re.sub(r'[^\w_.-]+', '_', custom_database_name).strip('_'), | |
| 329 "kmer-len", str(kraken2_args["kmer_len"]), | |
| 330 "minimizer-len", str(kraken2_args["minimizer_len"]), | |
| 331 "minimizer-spaces", str(kraken2_args["minimizer_spaces"]), | |
| 332 "load-factor", str(kraken2_args["load_factor"]), | |
| 333 ]) | |
| 334 | |
| 335 database_name = " ".join([ | |
| 336 custom_database_name, | |
| 337 "(" + custom_source_info + ",", | |
| 338 "kmer-len=" + str(kraken2_args["kmer_len"]) + ",", | |
| 339 "minimizer-len=" + str(kraken2_args["minimizer_len"]) + ",", | |
| 340 "minimizer-spaces=" + str(kraken2_args["minimizer_spaces"]) + ",", | |
| 341 "load-factor=" + str(kraken2_args["load_factor"]) + ")", | |
| 342 ]) | |
| 343 | |
| 344 database_path = database_value | |
| 325 | 345 |
| 326 args = [ | 346 args = [ |
| 327 '--threads', str(kraken2_args["threads"]), | 347 '--threads', str(kraken2_args["threads"]), |
| 328 '--download-taxonomy', | 348 '--download-taxonomy', |
| 329 '--db', custom_database_name, | 349 '--db', database_path, |
| 330 ] | 350 ] |
| 331 | 351 |
| 332 if kraken2_args['skip_maps']: | 352 if kraken2_args['skip_maps']: |
| 333 args.append('--skip-maps') | 353 args.append('--skip-maps') |
| 334 | 354 |
| 335 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) | 355 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) |
| 336 | 356 |
| 337 args = [ | 357 args = [ |
| 338 '--threads', str(kraken2_args["threads"]), | 358 '--threads', str(kraken2_args["threads"]), |
| 339 '--add-to-library', kraken2_args["custom_fasta"], | 359 '--add-to-library', kraken2_args["custom_fasta"], |
| 340 '--db', custom_database_name | 360 '--db', database_path, |
| 341 ] | 361 ] |
| 342 | 362 |
| 343 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) | 363 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) |
| 344 | 364 |
| 345 args = [ | 365 args = [ |
| 347 '--build', | 367 '--build', |
| 348 '--kmer-len', str(kraken2_args["kmer_len"]), | 368 '--kmer-len', str(kraken2_args["kmer_len"]), |
| 349 '--minimizer-len', str(kraken2_args["minimizer_len"]), | 369 '--minimizer-len', str(kraken2_args["minimizer_len"]), |
| 350 '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]), | 370 '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]), |
| 351 '--load-factor', str(kraken2_args["load_factor"]), | 371 '--load-factor', str(kraken2_args["load_factor"]), |
| 352 '--db', custom_database_name | 372 '--db', database_path, |
| 353 ] | 373 ] |
| 354 | 374 |
| 355 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) | 375 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) |
| 356 | 376 |
| 357 if kraken2_args["clean"]: | 377 if kraken2_args["clean"]: |
| 358 args = [ | 378 args = [ |
| 359 '--threads', str(kraken2_args["threads"]), | 379 '--threads', str(kraken2_args["threads"]), |
| 360 '--clean', | 380 '--clean', |
| 361 '--db', custom_database_name | 381 '--db', database_path, |
| 362 ] | 382 ] |
| 363 | 383 |
| 364 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) | 384 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) |
| 365 | 385 |
| 366 data_table_entry = { | 386 data_table_entry = { |
| 367 'data_tables': { | 387 'data_tables': { |
| 368 data_table_name: [ | 388 data_table_name: [ |
| 369 { | 389 { |
| 370 "value": custom_database_name, | 390 "value": database_value, |
| 371 "name": custom_database_name, | 391 "name": database_name, |
| 372 "path": custom_database_name | 392 "path": database_path, |
| 373 } | 393 } |
| 374 ] | 394 ] |
| 375 } | 395 } |
| 376 } | 396 } |
| 377 | 397 |
| 391 parser.add_argument('--prebuilt-db', dest='prebuilt_db', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Prebuilt database to download. Only applies to --database-type standard_prebuilt.') | 411 parser.add_argument('--prebuilt-db', dest='prebuilt_db', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Prebuilt database to download. Only applies to --database-type standard_prebuilt.') |
| 392 parser.add_argument('--prebuilt-date', dest='prebuilt_date', help='Database build date (YYYY-MM-DD). Only applies to --database-type standard_prebuilt.') | 412 parser.add_argument('--prebuilt-date', dest='prebuilt_date', help='Database build date (YYYY-MM-DD). Only applies to --database-type standard_prebuilt.') |
| 393 parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') | 413 parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') |
| 394 parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') | 414 parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') |
| 395 parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)') | 415 parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)') |
| 416 parser.add_argument('--custom-source-info', dest='custom_source_info', help='Description of how this build has been sourced (only applies to --database-type custom)') | |
| 396 parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='') | 417 parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='') |
| 397 parser.add_argument('--clean', dest='clean', action='store_true', help='Clean up extra files') | 418 parser.add_argument('--clean', dest='clean', action='store_true', help='Clean up extra files') |
| 398 args = parser.parse_args() | 419 args = parser.parse_args() |
| 399 | 420 |
| 400 with open(args.data_manager_json) as fh: | 421 with open(args.data_manager_json) as fh: |
| 462 "clean": args.clean, | 483 "clean": args.clean, |
| 463 } | 484 } |
| 464 data_manager_output = kraken2_build_custom( | 485 data_manager_output = kraken2_build_custom( |
| 465 kraken2_args, | 486 kraken2_args, |
| 466 args.custom_database_name, | 487 args.custom_database_name, |
| 488 args.custom_source_info, | |
| 467 target_directory, | 489 target_directory, |
| 468 ) | 490 ) |
| 469 else: | 491 else: |
| 470 sys.exit("Invalid database type") | 492 sys.exit("Invalid database type") |
| 471 | 493 |
