comparison data_manager/data_manager_humann2_download.py @ 0:048593e41359 draft

planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/data_managers/data_manager_humann2_database_downloader commit 3e179ac4ab2051414320b3811540dfc9b0966061
author iuc
date Sun, 12 Mar 2017 14:33:34 -0400
parents
children 6112a571f012
comparison
equal deleted inserted replaced
-1:000000000000 0:048593e41359
1 #!/usr/bin/env python
2 #
3 # Data manager for reference data for the 'humann2' Galaxy tools
4 import datetime
5 import json
6 import optparse
7 import os
8 import subprocess
9 import sys
10
11
12 HUMANN2_REFERENCE_DATA = {
13 "full": "Full",
14 "DEMO": "Demo",
15 "uniref50_diamond": "Full UniRef50",
16 "uniref50_ec_filtered_diamond": "EC-filtered UniRef50",
17 "uniref50_GO_filtered_rapsearch2": "GO filtered UniRef50 for rapsearch2",
18 "uniref90_diamond": "Full UniRef50",
19 "uniref90_ec_filtered_diamond": "EC-filtered UniRef90",
20 "DEMO_diamond": "Demo"
21 }
22
23
24 # Utility functions for interacting with Galaxy JSON
25 def read_input_json(jsonfile):
26 """Read the JSON supplied from the data manager tool
27
28 Returns a tuple (param_dict,extra_files_path)
29
30 'param_dict' is an arbitrary dictionary of parameters
31 input into the tool; 'extra_files_path' is the path
32 to a directory where output files must be put for the
33 receiving data manager to pick them up.
34
35 NB the directory pointed to by 'extra_files_path'
36 doesn't exist initially, it is the job of the script
37 to create it if necessary.
38
39 """
40 params = json.loads(open(jsonfile).read())
41 return (params['param_dict'],
42 params['output_data'][0]['extra_files_path'])
43
44
45 # Utility functions for creating data table dictionaries
46 #
47 # Example usage:
48 # >>> d = create_data_tables_dict()
49 # >>> add_data_table(d,'my_data')
50 # >>> add_data_table_entry(dict(dbkey='hg19',value='human'))
51 # >>> add_data_table_entry(dict(dbkey='mm9',value='mouse'))
52 # >>> print str(json.dumps(d))
53 def create_data_tables_dict():
54 """Return a dictionary for storing data table information
55
56 Returns a dictionary that can be used with 'add_data_table'
57 and 'add_data_table_entry' to store information about a
58 data table. It can be converted to JSON to be sent back to
59 the data manager.
60
61 """
62 d = {}
63 d['data_tables'] = {}
64 return d
65
66
67 def add_data_table(d, table):
68 """Add a data table to the data tables dictionary
69
70 Creates a placeholder for a data table called 'table'.
71
72 """
73 d['data_tables'][table] = []
74
75
76 def add_data_table_entry(d, table, entry):
77 """Add an entry to a data table
78
79 Appends an entry to the data table 'table'. 'entry'
80 should be a dictionary where the keys are the names of
81 columns in the data table.
82
83 Raises an exception if the named data table doesn't
84 exist.
85
86 """
87 try:
88 d['data_tables'][table].append(entry)
89 except KeyError:
90 raise Exception("add_data_table_entry: no table '%s'" % table)
91
92
93 def download_humann2_db(data_tables, table_name, database, build, target_dir):
94 """Download HUMAnN2 database
95
96 Creates references to the specified file(s) on the Galaxy
97 server in the appropriate data table (determined from the
98 file extension).
99
100 The 'data_tables' dictionary should have been created using
101 the 'create_data_tables_dict' and 'add_data_table' functions.
102
103 Arguments:
104 data_tables: a dictionary containing the data table info
105 table_name: name of the table
106 database: database to download (chocophlan or uniref)
107 build: build of the database to download
108 target_dir: directory to put copy or link to the data file
109
110 """
111 value = "%s-%s-%s" % (database, build, datetime.date.today().isoformat())
112 db_target_dir = os.path.join(target_dir, database)
113 build_target_dir = os.path.join(db_target_dir, build)
114 os.makedirs(build_target_dir)
115 cmd = "humann2_databases --download %s %s %s" % (database,
116 build,
117 db_target_dir)
118 subprocess.check_call(cmd, shell=True)
119 print(os.listdir(db_target_dir))
120 os.rename(os.path.join(db_target_dir, database), build_target_dir)
121 print(os.listdir(db_target_dir))
122 add_data_table_entry(
123 data_tables,
124 table_name,
125 dict(
126 dbkey=build,
127 value=value,
128 name=HUMANN2_REFERENCE_DATA[build],
129 path=build_target_dir))
130
131
132 if __name__ == "__main__":
133 print("Starting...")
134
135 # Read command line
136 parser = optparse.OptionParser(description='Download HUMAnN2 database')
137 parser.add_option('--database', help="Database name")
138 parser.add_option('--build', help="Build of the database")
139 options, args = parser.parse_args()
140 print("args : %s" % args)
141
142 # Check for JSON file
143 if len(args) != 1:
144 sys.stderr.write("Need to supply JSON file name")
145 sys.exit(1)
146
147 jsonfile = args[0]
148
149 # Read the input JSON
150 params, target_dir = read_input_json(jsonfile)
151
152 # Make the target directory
153 print("Making %s" % target_dir)
154 os.mkdir(target_dir)
155
156 # Set up data tables dictionary
157 data_tables = create_data_tables_dict()
158
159 if options.database == "chocophlan":
160 table_name = 'humann2_nucleotide_database'
161 else:
162 table_name = 'humann2_protein_database'
163 add_data_table(data_tables, table_name)
164
165 # Fetch data from specified data sources
166 download_humann2_db(
167 data_tables,
168 table_name,
169 options.database,
170 options.build,
171 target_dir)
172
173 # Write output JSON
174 print("Outputting JSON")
175 print(str(json.dumps(data_tables)))
176 open(jsonfile, 'wb').write(json.dumps(data_tables))
177 print("Done.")