Mercurial > repos > dfornika > data_manager_build_kraken2_database
comparison data_manager/kraken2_build_standard.py @ 5:1f6fe9dc5490 draft
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/data_managers/data_manager_build_kraken2_database/ commit 47bcc8467200adee842a5574b67bc6ef46c0be71-dirty
author | dfornika |
---|---|
date | Mon, 04 Mar 2019 18:59:44 -0500 |
parents | 32d04371ed19 |
children | ae6180bdb1e9 |
comparison
equal
deleted
inserted
replaced
4:988c436ad798 | 5:1f6fe9dc5490 |
---|---|
9 import os | 9 import os |
10 import string | 10 import string |
11 import subprocess | 11 import subprocess |
12 import sys | 12 import sys |
13 | 13 |
14 from pprint import pprint | |
15 | 14 |
16 DATA_TABLE_NAME = "kraken2_databases" | 15 DATA_TABLE_NAME = "kraken2_databases" |
17 | 16 |
18 def kraken2_build_standard(data_manager_dict, kraken2_args, params, target_directory, data_table_name=DATA_TABLE_NAME): | 17 def run(args, cwd): |
18 proc = subprocess.Popen(args=args, shell=False, cwd=cwd) | |
19 return_code = proc.wait() | |
20 if return_code: | |
21 print("Error building database.", file=sys.stderr) | |
22 sys.exit( return_code ) | |
23 | |
24 def kraken2_build_standard(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): | |
19 today = datetime.date.today().isoformat() | 25 today = datetime.date.today().isoformat() |
20 database_name = "_".join([today, "standard"]) | 26 database_name = "_".join([ |
27 today, | |
28 "standard", | |
29 "kmer-len=" + str(kraken2_args["kmer_len"]), | |
30 "minimizer-len=" + str(kraken2_args["minimizer_len"]), | |
31 "minimizer-spaces=" + str(kraken2_args["minimizer_spaces"]), | |
32 ]) | |
21 args = [ | 33 args = [ |
22 'kraken2-build', | |
23 '--threads', str(kraken2_args["threads"]), | 34 '--threads', str(kraken2_args["threads"]), |
24 '--standard', | 35 '--standard', |
25 '--kmer-len', str(kraken2_args["kmer_len"]), | 36 '--kmer-len', str(kraken2_args["kmer_len"]), |
26 '--minimizer-len', str(kraken2_args["minimizer_len"]), | 37 '--minimizer-len', str(kraken2_args["minimizer_len"]), |
27 '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]), | 38 '--minimizer-spaces', str(kraken2_args["minimizer_spaces"]), |
28 '--db', database_name | 39 '--db', database_name |
29 ] | 40 ] |
30 proc = subprocess.Popen(args=args, shell=False, cwd=target_directory) | 41 |
31 return_code = proc.wait() | 42 run(['kraken2-build'] + args, target_directory) |
32 if return_code: | 43 |
33 print("Error building database.", file=sys.stderr) | |
34 sys.exit( return_code ) | |
35 args = [ | 44 args = [ |
36 'kraken2-build', | 45 'kraken2-build', |
37 '--threads', str(kraken2_args["threads"]), | 46 '--threads', str(kraken2_args["threads"]), |
38 '--clean', | 47 '--clean', |
39 '--db', database_name | 48 '--db', database_name |
40 ] | 49 ] |
41 proc = subprocess.Popen(args=args, shell=False, cwd=target_directory) | 50 |
42 return_code = proc.wait() | 51 run(['kraken2-build'] + args, target_directory) |
43 if return_code: | 52 |
44 print("Error building database.", file=sys.stderr) | |
45 sys.exit( return_code ) | |
46 data_table_entry = { | 53 data_table_entry = { |
47 "value": database_name, | 54 "value": database_name, |
48 "name": database_name, | 55 "name": database_name, |
49 "path": database_name | 56 "path": database_name |
50 } | 57 } |
51 | 58 |
52 _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) | 59 _add_data_table_entry(data_manager_dict, data_table_entry) |
53 | 60 |
54 | 61 |
55 def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry): | 62 def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name=DATA_TABLE_NAME): |
56 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) | 63 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) |
57 data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] ) | 64 data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get( data_table_name, [] ) |
58 data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry ) | 65 data_manager_dict['data_tables'][data_table_name].append( data_table_entry ) |
59 return data_manager_dict | 66 return data_manager_dict |
60 | 67 |
61 | 68 |
62 def main(): | 69 def main(): |
63 parser = argparse.ArgumentParser() | 70 parser = argparse.ArgumentParser() |
64 parser.add_argument('params') | 71 parser.add_argument('data_manager_json') |
65 parser.add_argument( '-k', '--kmer-len', dest='kmer_len', type=int, default=35, help='kmer length' ) | 72 parser.add_argument( '-k', '--kmer-len', dest='kmer_len', type=int, default=35, help='kmer length' ) |
66 parser.add_argument( '-m', '--minimizer-len', dest='minimizer_len', type=int, default=31, help='minimizer length' ) | 73 parser.add_argument( '-m', '--minimizer-len', dest='minimizer_len', type=int, default=31, help='minimizer length' ) |
67 parser.add_argument( '-s', '--minimizer-spaces', dest='minimizer_spaces', default=6, help='minimizer spaces' ) | 74 parser.add_argument( '-s', '--minimizer-spaces', dest='minimizer_spaces', default=6, help='minimizer spaces' ) |
68 parser.add_argument( '-t', '--threads', dest='threads', default=1, help='threads' ) | 75 parser.add_argument( '-t', '--threads', dest='threads', default=1, help='threads' ) |
69 args = parser.parse_args() | 76 args = parser.parse_args() |
73 "minimizer_len": args.minimizer_len, | 80 "minimizer_len": args.minimizer_len, |
74 "minimizer_spaces": args.minimizer_spaces, | 81 "minimizer_spaces": args.minimizer_spaces, |
75 "threads": args.threads, | 82 "threads": args.threads, |
76 } | 83 } |
77 | 84 |
78 params = json.loads(open(args.params).read()) | 85 data_manager_input = json.loads(open(args.data_manager_json).read()) |
79 pprint(params) | 86 |
80 target_directory = params['output_data'][0]['extra_files_path'] | 87 target_directory = data_manager_input['output_data'][0]['extra_files_path'] |
81 | 88 |
82 try: | 89 try: |
83 os.mkdir( target_directory ) | 90 os.mkdir( target_directory ) |
84 except OSError as exc: | 91 except OSError as exc: |
85 if exc.errno == errno.EEXIST and os.path.isdir( target_directory ): | 92 if exc.errno == errno.EEXIST and os.path.isdir( target_directory ): |
86 pass | 93 pass |
87 else: | 94 else: |
88 raise | 95 raise |
89 | 96 |
90 data_manager_dict = {} | |
91 | |
92 # build the index | |
93 kraken2_build_standard( | 97 kraken2_build_standard( |
94 data_manager_dict, | 98 data_manager_input, |
95 kraken2_args, | 99 kraken2_args, |
96 params, | 100 target_directory, |
97 target_directory | |
98 ) | 101 ) |
99 | 102 |
100 # save info to json file | 103 open(args.data_manager_json, 'wb').write(json.dumps(data_manager_input)) |
101 open(args.params, 'wb').write(json.dumps(data_manager_dict)) | |
102 | 104 |
103 | 105 |
104 if __name__ == "__main__": | 106 if __name__ == "__main__": |
105 main() | 107 main() |