comparison data_manager/kraken2_build_special.py @ 7:ae6180bdb1e9 draft

planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/data_managers/data_manager_build_kraken2_database/ commit 70494d589aa1990618ebc7a895e91477d41c3203-dirty
author dfornika
date Mon, 04 Mar 2019 19:20:33 -0500
parents
children f53064ca5398
comparison
equal deleted inserted replaced
6:ce101123d015 7:ae6180bdb1e9
1 #!/usr/bin/env python
2
3 from __future__ import print_function
4
5 import argparse
6 import datetime
7 import errno
8 import json
9 import os
10 import string
11 import subprocess
12 import sys
13
14
15 DATA_TABLE_NAME = "kraken2_databases"
16
17 def run(args, cwd):
18 proc = subprocess.Popen(args=args, shell=False, cwd=cwd)
19 return_code = proc.wait()
20 if return_code:
21 print("Error building database.", file=sys.stderr)
22 sys.exit( return_code )
23
24 def kraken2_build_standard(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME):
25 today = datetime.date.today().isoformat()
26 database_name = "_".join([
27 today,
28 kraken2_args["special"],
29 "kmer-len=" + str(kraken2_args["kmer_len"]),
30 "minimizer-len=" + str(kraken2_args["minimizer_len"]),
31 "minimizer-spaces=" + str(kraken2_args["minimizer_spaces"]),
32 ])
33
34 args = [
35 '--threads', str(kraken2_args["threads"]),
36 '--special', kraken2_args["special"]
37 '--kmer-len', str(kraken2_args["kmer_len"]),
38 '--minimizer-len', str(kraken2_args["minimizer_len"]),
39 '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]),
40 '--db', database_name
41 ]
42
43 run(['kraken2-build'] + args, target_directory)
44
45 args = [
46 '--threads', str(kraken2_args["threads"]),
47 '--clean',
48 '--db', database_name
49 ]
50
51 run(['kraken2-build'] + args, target_directory)
52
53 data_table_entry = {
54 "value": database_name,
55 "name": database_name,
56 "path": database_name
57 }
58
59 _add_data_table_entry(data_manager_dict, data_table_entry)
60
61
62 def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name=DATA_TABLE_NAME):
63 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
64 data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get( data_table_name, [] )
65 data_manager_dict['data_tables'][data_table_name].append( data_table_entry )
66 return data_manager_dict
67
68
69 def main():
70 parser = argparse.ArgumentParser()
71 parser.add_argument('data_manager_json')
72 parser.add_argument( '-b', '--db-type', dest='db_type', help='database type (one of: silva, rdp, greengenes)' )
73 parser.add_argument( '-k', '--kmer-len', dest='kmer_len', type=int, default=35, help='kmer length' )
74 parser.add_argument( '-m', '--minimizer-len', dest='minimizer_len', type=int, default=31, help='minimizer length' )
75 parser.add_argument( '-s', '--minimizer-spaces', dest='minimizer_spaces', default=6, help='minimizer spaces' )
76 parser.add_argument( '-t', '--threads', dest='threads', default=1, help='threads' )
77 args = parser.parse_args()
78
79 kraken2_args = {
80 "special": args.db_type,
81 "kmer_len": args.kmer_len,
82 "minimizer_len": args.minimizer_len,
83 "minimizer_spaces": args.minimizer_spaces,
84 "threads": args.threads,
85 }
86
87 data_manager_input = json.loads(open(args.data_manager_json).read())
88
89 target_directory = data_manager_input['output_data'][0]['extra_files_path']
90
91 try:
92 os.mkdir( target_directory )
93 except OSError as exc:
94 if exc.errno == errno.EEXIST and os.path.isdir( target_directory ):
95 pass
96 else:
97 raise
98
99 kraken2_build_standard(
100 data_manager_input,
101 kraken2_args,
102 target_directory,
103 )
104
105 open(args.data_manager_json, 'wb').write(json.dumps(data_manager_input))
106
107
108 if __name__ == "__main__":
109 main()