Mercurial > repos > dfornika > data_manager_build_kraken2_database
comparison data_manager/kraken2_build_custom.py @ 2:cf0cda80b659 draft
planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/data_managers/data_manager_build_kraken2_database/ commit f05f93ee024df8b38efa1c92db9785d52c726f85-dirty
author | dfornika |
---|---|
date | Thu, 28 Feb 2019 18:18:01 -0500 |
parents | |
children | b03007bc0f05 |
comparison
equal
deleted
inserted
replaced
1:32d04371ed19 | 2:cf0cda80b659 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 from __future__ import print_function | |
4 | |
5 import argparse | |
6 import datetime | |
7 import errno | |
8 import json | |
9 import os | |
10 import string | |
11 import subprocess | |
12 import sys | |
13 | |
14 from pprint import pprint | |
15 | |
16 DATA_TABLE_NAME = "kraken2_databases" | |
17 | |
18 def run(args, cwd): | |
19 proc = subprocess.Popen(args=args, shell=False, cwd=cwd) | |
20 return_code = proc.wait() | |
21 if return_code: | |
22 print("Error building database.", file=sys.stderr) | |
23 sys.exit( return_code ) | |
24 | |
25 | |
26 def kraken2_build(data_manager_dict, kraken2_args, params, target_directory, data_table_name=DATA_TABLE_NAME): | |
27 today = datetime.date.today().isoformat() | |
28 database_name = "_".join([today, "custom"]) | |
29 | |
30 args = [ | |
31 'kraken2-build', | |
32 '--threads', str(kraken2_args["threads"]), | |
33 '--download-taxonomy', | |
34 '--db', database_name | |
35 ] | |
36 | |
37 run(args, target_directory) | |
38 | |
39 args = [ | |
40 'kraken2-build', | |
41 '--threads', str(kraken2_args["threads"]), | |
42 '--add-to-library', kraken2_args["fasta"], | |
43 '--db', database_name | |
44 ] | |
45 | |
46 run(args, target_directory) | |
47 | |
48 args = [ | |
49 'kraken2-build', | |
50 '--threads', str(kraken2_args["threads"]), | |
51 '--clean', | |
52 '--db', database_name | |
53 ] | |
54 | |
55 run(args, target_directory) | |
56 | |
57 data_table_entry = { | |
58 "value": database_name, | |
59 "name": database_name, | |
60 "path": database_name | |
61 } | |
62 | |
63 _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) | |
64 | |
65 | |
66 def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry): | |
67 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) | |
68 data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] ) | |
69 data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry ) | |
70 return data_manager_dict | |
71 | |
72 | |
73 def main(): | |
74 parser = argparse.ArgumentParser() | |
75 parser.add_argument('params') | |
76 parser.add_argument( '-k', '--kmer-len', dest='kmer_len', type=int, default=35, help='kmer length' ) | |
77 parser.add_argument( '-m', '--minimizer-len', dest='minimizer_len', type=int, default=31, help='minimizer length' ) | |
78 parser.add_argument( '-s', '--minimizer-spaces', dest='minimizer_spaces', default=6, help='minimizer spaces' ) | |
79 parser.add_argument( '-t', '--threads', dest='threads', default=1, help='threads' ) | |
80 args = parser.parse_args() | |
81 | |
82 kraken2_args = { | |
83 "kmer_len": args.kmer_len, | |
84 "minimizer_len": args.minimizer_len, | |
85 "minimizer_spaces": args.minimizer_spaces, | |
86 "fasta": args.fasta, | |
87 "threads": args.threads, | |
88 } | |
89 | |
90 params = json.loads(open(args.params).read()) | |
91 pprint(params) | |
92 target_directory = params['output_data'][0]['extra_files_path'] | |
93 | |
94 try: | |
95 os.mkdir( target_directory ) | |
96 except OSError as exc: | |
97 if exc.errno == errno.EEXIST and os.path.isdir( target_directory ): | |
98 pass | |
99 else: | |
100 raise | |
101 | |
102 data_manager_dict = {} | |
103 | |
104 # build the index | |
105 kraken2_build( | |
106 data_manager_dict, | |
107 kraken2_args, | |
108 params, | |
109 target_directory | |
110 ) | |
111 | |
112 # save info to json file | |
113 open(args.params, 'wb').write(json.dumps(data_manager_dict)) | |
114 | |
115 | |
116 if __name__ == "__main__": | |
117 main() |