view data_manager/bracken_build_database.py @ 5:5c8a23ccb14a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bracken_database/ commit db62b99fe2c0e77e7ee63da5fb315f4b6d95170b
author iuc
date Mon, 22 May 2023 19:24:35 +0000
parents 3c7d2c84cb09
children
line wrap: on
line source

#!/usr/bin/env python

from __future__ import print_function

import argparse
import errno
import json
import os
import subprocess
import uuid


DATA_TABLE_NAME = "bracken_databases"


def bracken_build_database(target_directory, bracken_build_args, database_name, prebuilt=False, data_table_name=DATA_TABLE_NAME):

    database_value = str(uuid.uuid4())

    database_name = database_name

    database_path = os.path.join(bracken_build_args['kraken_database'], 'database' + str(bracken_build_args['read_len']) + 'mers.kmer_distrib')

    if not prebuilt:
        bracken_build_args_list = [
            '-t', bracken_build_args['threads'],
            '-k', bracken_build_args['kmer_len'],
            '-l', bracken_build_args['read_len'],
            '-d', bracken_build_args['kraken_database'],
        ]

        subprocess.check_call(['bracken-build'] + bracken_build_args_list)

    data_table_entry = {
        "data_tables": {
            data_table_name: [
                {
                    "value": database_value,
                    "name": database_name,
                    "path": database_path,
                }
            ]
        }
    }

    return data_table_entry


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('data_manager_json')
    parser.add_argument('--threads', dest='threads', default=1, help='threads')
    parser.add_argument('--kmer-len', dest='kmer_len', help='K-mer length')
    parser.add_argument('--read-len', dest='read_len', help='Read length')
    parser.add_argument('--kraken-db', dest='kraken_database', help='Kraken Database')
    parser.add_argument('--database-name', dest='database_name', help='Database Name')
    parser.add_argument('--prebuilt', action='store_true', dest='prebuilt', help='Use pre-built DB')
    args = parser.parse_args()

    with open(args.data_manager_json) as fh:
        data_manager_input = json.load(fh)

    target_directory = data_manager_input['output_data'][0]['extra_files_path']

    if args.prebuilt:
        bracken_build_args = {
            'threads': args.threads,
            'read_len': args.read_len,
            'kraken_database': args.kraken_database,
        }
    else:
        bracken_build_args = {
            'threads': args.threads,
            'kmer_len': args.kmer_len,
            'read_len': args.read_len,
            'kraken_database': args.kraken_database,
        }

    try:
        os.mkdir(target_directory)
    except OSError as exc:
        if exc.errno == errno.EEXIST and os.path.isdir(target_directory):
            pass
        else:
            raise

    data_manager_output = {}

    data_manager_output = bracken_build_database(
        target_directory,
        bracken_build_args,
        args.database_name,
        args.prebuilt,
    )

    with open(args.data_manager_json, 'w') as fh:
        json.dump(data_manager_output, fh, sort_keys=True)


if __name__ == "__main__":
    main()