Mercurial > repos > dfornika > data_manager_build_kraken2_database
comparison data_manager/kraken2_build_minikraken.py @ 11:1dc93ae264e6 draft
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/data_managers/data_manager_build_kraken2_database/ commit 70494d589aa1990618ebc7a895e91477d41c3203-dirty
author | dfornika |
---|---|
date | Tue, 05 Mar 2019 14:33:23 -0500 |
parents | |
children | 039a65ff445d |
comparison
equal
deleted
inserted
replaced
10:d0b9df6c21dd | 11:1dc93ae264e6 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 from __future__ import print_function | |
4 | |
5 import argparse | |
6 import datetime | |
7 import errno | |
8 import json | |
9 import os | |
10 import string | |
11 import subprocess | |
12 import sys | |
13 | |
14 | |
15 DATA_TABLE_NAME = "kraken2_databases" | |
16 | |
17 def run(args, cwd): | |
18 proc = subprocess.Popen(args=args, shell=False, cwd=cwd) | |
19 return_code = proc.wait() | |
20 if return_code: | |
21 print("Error building database.", file=sys.stderr) | |
22 sys.exit( return_code ) | |
23 | |
24 def kraken2_build_standard(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): | |
25 | |
26 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") | |
27 | |
28 special_database_names = { | |
29 "rdp": "RDP", | |
30 "greengenes": "Greengenes", | |
31 "silva": "Silva", | |
32 } | |
33 | |
34 database_value = "_".join([ | |
35 now, | |
36 kraken2_args["special"], | |
37 "kmer-len", str(kraken2_args["kmer_len"]), | |
38 "minimizer-len", str(kraken2_args["minimizer_len"]), | |
39 "minimizer-spaces", str(kraken2_args["minimizer_spaces"]), | |
40 ]) | |
41 | |
42 database_name = " ".join([ | |
43 special_database_names[kraken2_args["special"]], | |
44 "(Created:", | |
45 now + "," | |
46 "kmer-len=" + str(kraken2_args["kmer_len"]) + ",", | |
47 "minimizer-len=" + str(kraken2_args["minimizer_len"]) + ",", | |
48 "minimizer-spaces=" + str(kraken2_args["minimizer_spaces"]) + ")", | |
49 ]) | |
50 | |
51 database_path = database_value | |
52 | |
53 args = [ | |
54 '--threads', str(kraken2_args["threads"]), | |
55 '--special', kraken2_args["special"], | |
56 '--kmer-len', str(kraken2_args["kmer_len"]), | |
57 '--minimizer-len', str(kraken2_args["minimizer_len"]), | |
58 '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]), | |
59 '--db', database_path | |
60 ] | |
61 | |
62 run(['kraken2-build'] + args, target_directory) | |
63 | |
64 args = [ | |
65 '--threads', str(kraken2_args["threads"]), | |
66 '--clean', | |
67 '--db', database_path | |
68 ] | |
69 | |
70 run(['kraken2-build'] + args, target_directory) | |
71 | |
72 data_table_entry = { | |
73 "value": database_value, | |
74 "name": database_name, | |
75 "path": database_path, | |
76 } | |
77 | |
78 _add_data_table_entry(data_manager_dict, data_table_entry) | |
79 | |
80 | |
81 def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name=DATA_TABLE_NAME): | |
82 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) | |
83 data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get( data_table_name, [] ) | |
84 data_manager_dict['data_tables'][data_table_name].append( data_table_entry ) | |
85 return data_manager_dict | |
86 | |
87 | |
88 def main(): | |
89 parser = argparse.ArgumentParser() | |
90 parser.add_argument('data_manager_json') | |
91 parser.add_argument( '-b', '--db-type', dest='db_type', help='database type (one of: silva, rdp, greengenes)' ) | |
92 parser.add_argument( '-k', '--kmer-len', dest='kmer_len', type=int, default=35, help='kmer length' ) | |
93 parser.add_argument( '-m', '--minimizer-len', dest='minimizer_len', type=int, default=31, help='minimizer length' ) | |
94 parser.add_argument( '-s', '--minimizer-spaces', dest='minimizer_spaces', default=6, help='minimizer spaces' ) | |
95 parser.add_argument( '-t', '--threads', dest='threads', default=1, help='threads' ) | |
96 args = parser.parse_args() | |
97 | |
98 kraken2_args = { | |
99 "special": args.db_type, | |
100 "kmer_len": args.kmer_len, | |
101 "minimizer_len": args.minimizer_len, | |
102 "minimizer_spaces": args.minimizer_spaces, | |
103 "threads": args.threads, | |
104 } | |
105 | |
106 data_manager_input = json.loads(open(args.data_manager_json).read()) | |
107 | |
108 target_directory = data_manager_input['output_data'][0]['extra_files_path'] | |
109 | |
110 try: | |
111 os.mkdir( target_directory ) | |
112 except OSError as exc: | |
113 if exc.errno == errno.EEXIST and os.path.isdir( target_directory ): | |
114 pass | |
115 else: | |
116 raise | |
117 | |
118 data_manager_output = {} | |
119 | |
120 kraken2_build_standard( | |
121 data_manager_output, | |
122 kraken2_args, | |
123 target_directory, | |
124 ) | |
125 | |
126 open(args.data_manager_json, 'wb').write(json.dumps(data_manager_output)) | |
127 | |
128 | |
129 if __name__ == "__main__": | |
130 main() |