Mercurial > repos > iuc > data_manager_humann2_database_downloader
comparison data_manager/data_manager_humann2_download.py @ 0:048593e41359 draft
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/data_managers/data_manager_humann2_database_downloader commit 3e179ac4ab2051414320b3811540dfc9b0966061
author | iuc |
---|---|
date | Sun, 12 Mar 2017 14:33:34 -0400 |
parents | |
children | 6112a571f012 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:048593e41359 |
---|---|
1 #!/usr/bin/env python | |
2 # | |
3 # Data manager for reference data for the 'humann2' Galaxy tools | |
4 import datetime | |
5 import json | |
6 import optparse | |
7 import os | |
8 import subprocess | |
9 import sys | |
10 | |
11 | |
12 HUMANN2_REFERENCE_DATA = { | |
13 "full": "Full", | |
14 "DEMO": "Demo", | |
15 "uniref50_diamond": "Full UniRef50", | |
16 "uniref50_ec_filtered_diamond": "EC-filtered UniRef50", | |
17 "uniref50_GO_filtered_rapsearch2": "GO filtered UniRef50 for rapsearch2", | |
18 "uniref90_diamond": "Full UniRef50", | |
19 "uniref90_ec_filtered_diamond": "EC-filtered UniRef90", | |
20 "DEMO_diamond": "Demo" | |
21 } | |
22 | |
23 | |
24 # Utility functions for interacting with Galaxy JSON | |
25 def read_input_json(jsonfile): | |
26 """Read the JSON supplied from the data manager tool | |
27 | |
28 Returns a tuple (param_dict,extra_files_path) | |
29 | |
30 'param_dict' is an arbitrary dictionary of parameters | |
31 input into the tool; 'extra_files_path' is the path | |
32 to a directory where output files must be put for the | |
33 receiving data manager to pick them up. | |
34 | |
35 NB the directory pointed to by 'extra_files_path' | |
36 doesn't exist initially, it is the job of the script | |
37 to create it if necessary. | |
38 | |
39 """ | |
40 params = json.loads(open(jsonfile).read()) | |
41 return (params['param_dict'], | |
42 params['output_data'][0]['extra_files_path']) | |
43 | |
44 | |
45 # Utility functions for creating data table dictionaries | |
46 # | |
47 # Example usage: | |
48 # >>> d = create_data_tables_dict() | |
49 # >>> add_data_table(d,'my_data') | |
50 # >>> add_data_table_entry(dict(dbkey='hg19',value='human')) | |
51 # >>> add_data_table_entry(dict(dbkey='mm9',value='mouse')) | |
52 # >>> print str(json.dumps(d)) | |
53 def create_data_tables_dict(): | |
54 """Return a dictionary for storing data table information | |
55 | |
56 Returns a dictionary that can be used with 'add_data_table' | |
57 and 'add_data_table_entry' to store information about a | |
58 data table. It can be converted to JSON to be sent back to | |
59 the data manager. | |
60 | |
61 """ | |
62 d = {} | |
63 d['data_tables'] = {} | |
64 return d | |
65 | |
66 | |
67 def add_data_table(d, table): | |
68 """Add a data table to the data tables dictionary | |
69 | |
70 Creates a placeholder for a data table called 'table'. | |
71 | |
72 """ | |
73 d['data_tables'][table] = [] | |
74 | |
75 | |
76 def add_data_table_entry(d, table, entry): | |
77 """Add an entry to a data table | |
78 | |
79 Appends an entry to the data table 'table'. 'entry' | |
80 should be a dictionary where the keys are the names of | |
81 columns in the data table. | |
82 | |
83 Raises an exception if the named data table doesn't | |
84 exist. | |
85 | |
86 """ | |
87 try: | |
88 d['data_tables'][table].append(entry) | |
89 except KeyError: | |
90 raise Exception("add_data_table_entry: no table '%s'" % table) | |
91 | |
92 | |
93 def download_humann2_db(data_tables, table_name, database, build, target_dir): | |
94 """Download HUMAnN2 database | |
95 | |
96 Creates references to the specified file(s) on the Galaxy | |
97 server in the appropriate data table (determined from the | |
98 file extension). | |
99 | |
100 The 'data_tables' dictionary should have been created using | |
101 the 'create_data_tables_dict' and 'add_data_table' functions. | |
102 | |
103 Arguments: | |
104 data_tables: a dictionary containing the data table info | |
105 table_name: name of the table | |
106 database: database to download (chocophlan or uniref) | |
107 build: build of the database to download | |
108 target_dir: directory to put copy or link to the data file | |
109 | |
110 """ | |
111 value = "%s-%s-%s" % (database, build, datetime.date.today().isoformat()) | |
112 db_target_dir = os.path.join(target_dir, database) | |
113 build_target_dir = os.path.join(db_target_dir, build) | |
114 os.makedirs(build_target_dir) | |
115 cmd = "humann2_databases --download %s %s %s" % (database, | |
116 build, | |
117 db_target_dir) | |
118 subprocess.check_call(cmd, shell=True) | |
119 print(os.listdir(db_target_dir)) | |
120 os.rename(os.path.join(db_target_dir, database), build_target_dir) | |
121 print(os.listdir(db_target_dir)) | |
122 add_data_table_entry( | |
123 data_tables, | |
124 table_name, | |
125 dict( | |
126 dbkey=build, | |
127 value=value, | |
128 name=HUMANN2_REFERENCE_DATA[build], | |
129 path=build_target_dir)) | |
130 | |
131 | |
132 if __name__ == "__main__": | |
133 print("Starting...") | |
134 | |
135 # Read command line | |
136 parser = optparse.OptionParser(description='Download HUMAnN2 database') | |
137 parser.add_option('--database', help="Database name") | |
138 parser.add_option('--build', help="Build of the database") | |
139 options, args = parser.parse_args() | |
140 print("args : %s" % args) | |
141 | |
142 # Check for JSON file | |
143 if len(args) != 1: | |
144 sys.stderr.write("Need to supply JSON file name") | |
145 sys.exit(1) | |
146 | |
147 jsonfile = args[0] | |
148 | |
149 # Read the input JSON | |
150 params, target_dir = read_input_json(jsonfile) | |
151 | |
152 # Make the target directory | |
153 print("Making %s" % target_dir) | |
154 os.mkdir(target_dir) | |
155 | |
156 # Set up data tables dictionary | |
157 data_tables = create_data_tables_dict() | |
158 | |
159 if options.database == "chocophlan": | |
160 table_name = 'humann2_nucleotide_database' | |
161 else: | |
162 table_name = 'humann2_protein_database' | |
163 add_data_table(data_tables, table_name) | |
164 | |
165 # Fetch data from specified data sources | |
166 download_humann2_db( | |
167 data_tables, | |
168 table_name, | |
169 options.database, | |
170 options.build, | |
171 target_dir) | |
172 | |
173 # Write output JSON | |
174 print("Outputting JSON") | |
175 print(str(json.dumps(data_tables))) | |
176 open(jsonfile, 'wb').write(json.dumps(data_tables)) | |
177 print("Done.") |