comparison data_manager/kraken2_build_minikraken.py @ 11:1dc93ae264e6 draft

planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/data_managers/data_manager_build_kraken2_database/ commit 70494d589aa1990618ebc7a895e91477d41c3203-dirty
author dfornika
date Tue, 05 Mar 2019 14:33:23 -0500
parents
children 039a65ff445d
comparison
equal deleted inserted replaced
10:d0b9df6c21dd 11:1dc93ae264e6
1 #!/usr/bin/env python
2
3 from __future__ import print_function
4
5 import argparse
6 import datetime
7 import errno
8 import json
9 import os
10 import string
11 import subprocess
12 import sys
13
14
15 DATA_TABLE_NAME = "kraken2_databases"
16
17 def run(args, cwd):
18 proc = subprocess.Popen(args=args, shell=False, cwd=cwd)
19 return_code = proc.wait()
20 if return_code:
21 print("Error building database.", file=sys.stderr)
22 sys.exit( return_code )
23
24 def kraken2_build_standard(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME):
25
26 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
27
28 special_database_names = {
29 "rdp": "RDP",
30 "greengenes": "Greengenes",
31 "silva": "Silva",
32 }
33
34 database_value = "_".join([
35 now,
36 kraken2_args["special"],
37 "kmer-len", str(kraken2_args["kmer_len"]),
38 "minimizer-len", str(kraken2_args["minimizer_len"]),
39 "minimizer-spaces", str(kraken2_args["minimizer_spaces"]),
40 ])
41
42 database_name = " ".join([
43 special_database_names[kraken2_args["special"]],
44 "(Created:",
45 now + ","
46 "kmer-len=" + str(kraken2_args["kmer_len"]) + ",",
47 "minimizer-len=" + str(kraken2_args["minimizer_len"]) + ",",
48 "minimizer-spaces=" + str(kraken2_args["minimizer_spaces"]) + ")",
49 ])
50
51 database_path = database_value
52
53 args = [
54 '--threads', str(kraken2_args["threads"]),
55 '--special', kraken2_args["special"],
56 '--kmer-len', str(kraken2_args["kmer_len"]),
57 '--minimizer-len', str(kraken2_args["minimizer_len"]),
58 '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]),
59 '--db', database_path
60 ]
61
62 run(['kraken2-build'] + args, target_directory)
63
64 args = [
65 '--threads', str(kraken2_args["threads"]),
66 '--clean',
67 '--db', database_path
68 ]
69
70 run(['kraken2-build'] + args, target_directory)
71
72 data_table_entry = {
73 "value": database_value,
74 "name": database_name,
75 "path": database_path,
76 }
77
78 _add_data_table_entry(data_manager_dict, data_table_entry)
79
80
81 def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name=DATA_TABLE_NAME):
82 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
83 data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get( data_table_name, [] )
84 data_manager_dict['data_tables'][data_table_name].append( data_table_entry )
85 return data_manager_dict
86
87
88 def main():
89 parser = argparse.ArgumentParser()
90 parser.add_argument('data_manager_json')
91 parser.add_argument( '-b', '--db-type', dest='db_type', help='database type (one of: silva, rdp, greengenes)' )
92 parser.add_argument( '-k', '--kmer-len', dest='kmer_len', type=int, default=35, help='kmer length' )
93 parser.add_argument( '-m', '--minimizer-len', dest='minimizer_len', type=int, default=31, help='minimizer length' )
94 parser.add_argument( '-s', '--minimizer-spaces', dest='minimizer_spaces', default=6, help='minimizer spaces' )
95 parser.add_argument( '-t', '--threads', dest='threads', default=1, help='threads' )
96 args = parser.parse_args()
97
98 kraken2_args = {
99 "special": args.db_type,
100 "kmer_len": args.kmer_len,
101 "minimizer_len": args.minimizer_len,
102 "minimizer_spaces": args.minimizer_spaces,
103 "threads": args.threads,
104 }
105
106 data_manager_input = json.loads(open(args.data_manager_json).read())
107
108 target_directory = data_manager_input['output_data'][0]['extra_files_path']
109
110 try:
111 os.mkdir( target_directory )
112 except OSError as exc:
113 if exc.errno == errno.EEXIST and os.path.isdir( target_directory ):
114 pass
115 else:
116 raise
117
118 data_manager_output = {}
119
120 kraken2_build_standard(
121 data_manager_output,
122 kraken2_args,
123 target_directory,
124 )
125
126 open(args.data_manager_json, 'wb').write(json.dumps(data_manager_output))
127
128
129 if __name__ == "__main__":
130 main()