comparison data_manager/data_manager_metaphlan_download.py @ 6:d8dc037e3c13 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_metaphlan_database_downloader commit 3c1a0c4a94f78437c6df74b5348826e33e734a05
author iuc
date Mon, 29 Jul 2024 07:13:31 +0000
parents a88f077f1994
children
comparison
equal deleted inserted replaced
5:a88f077f1994 6:d8dc037e3c13
1 #!/usr/bin/env python
2 #
3 # Data manager for reference data for the MetaPhlAn Galaxy tools
4 import argparse
5 import json
6 import subprocess
7 from datetime import date
8 from pathlib import Path
9
10
11 # Utility functions for interacting with Galaxy JSON
12 def read_input_json(json_fp):
13 """Read the JSON supplied from the data manager tool
14
15 Returns a tuple (param_dict,extra_files_path)
16
17 'param_dict' is an arbitrary dictionary of parameters
18 input into the tool; 'extra_files_path' is the path
19 to a directory where output files must be put for the
20 receiving data manager to pick them up.
21
22 NB the directory pointed to by 'extra_files_path'
23 doesn't exist initially, it is the job of the script
24 to create it if necessary.
25
26 """
27 with open(json_fp) as fh:
28 params = json.load(fh)
29 return (params['param_dict'],
30 Path(params['output_data'][0]['extra_files_path']))
31
32
33 # Utility functions for creating data table dictionaries
34 #
35 # Example usage:
36 # >>> d = create_data_tables_dict()
37 # >>> add_data_table(d,'my_data')
38 # >>> add_data_table_entry(dict(dbkey='hg19',value='human'))
39 # >>> add_data_table_entry(dict(dbkey='mm9',value='mouse'))
40 # >>> print(json.dumps(d))
41 def create_data_tables_dict():
42 """Return a dictionary for storing data table information
43
44 Returns a dictionary that can be used with 'add_data_table'
45 and 'add_data_table_entry' to store information about a
46 data table. It can be converted to JSON to be sent back to
47 the data manager.
48
49 """
50 d = {
51 'data_tables': {}
52 }
53 return d
54
55
56 def add_data_table(d, table):
57 """Add a data table to the data tables dictionary
58
59 Creates a placeholder for a data table called 'table'.
60
61 """
62 d['data_tables'][table] = []
63
64
65 def add_data_table_entry(d, table, entry):
66 """Add an entry to a data table
67
68 Appends an entry to the data table 'table'. 'entry'
69 should be a dictionary where the keys are the names of
70 columns in the data table.
71
72 Raises an exception if the named data table doesn't
73 exist.
74
75 """
76 try:
77 d['data_tables'][table].append(entry)
78 except KeyError:
79 raise Exception("add_data_table_entry: no table '%s'" % table)
80
81
82 def download_metaphlan_db(data_tables, index, table_name, target_dp):
83 """Download MetaPhlAn database
84
85 Creates references to the specified file(s) on the Galaxy
86 server in the appropriate data table (determined from the
87 file extension).
88
89 The 'data_tables' dictionary should have been created using
90 the 'create_data_tables_dict' and 'add_data_table' functions.
91
92 Arguments:
93 data_tables: a dictionary containing the data table info
94 index: version
95 table_name: name of the table
96 target_dp: directory to put copy or link to the data file
97
98 """
99 db_dp = target_dp / Path(index)
100 cmd = "metaphlan --install --index %s --bowtie2db %s" % (index, db_dp)
101 subprocess.check_call(cmd, shell=True)
102 add_data_table_entry(
103 data_tables,
104 table_name,
105 dict(
106 dbkey=index,
107 value='%s-%s' % (index, date.today().strftime("%d%m%Y")),
108 name="MetaPhlAn clade-specific marker genes (%s)" % index,
109 path=str(db_dp),
110 db_version='SGB' if 'SGB' in index else 'legacy'))
111
112
113 if __name__ == "__main__":
114 print("Starting...")
115
116 # Read command line
117 parser = argparse.ArgumentParser(description='Download and build MetaPhlan database')
118 parser.add_argument('--index', help="Version of the database")
119 parser.add_argument('--json', help="Path to JSON file")
120 args = parser.parse_args()
121 print("args : %s" % args)
122
123 # Read the input JSON
124 json_fp = Path(args.json)
125 params, target_dp = read_input_json(json_fp)
126
127 # Make the target directory
128 print("Making %s" % target_dp)
129 target_dp.mkdir(parents=True, exist_ok=True)
130
131 # Set up data tables dictionary
132 data_tables = create_data_tables_dict()
133 add_data_table(data_tables, "metaphlan_database_versioned")
134
135 # Fetch data from specified data sources
136 print("Download and build database")
137 download_metaphlan_db(
138 data_tables,
139 args.index,
140 "metaphlan_database_versioned",
141 target_dp)
142
143 # Write output JSON
144 print("Outputting JSON")
145 with open(json_fp, 'w') as fh:
146 json.dump(data_tables, fh, sort_keys=True)
147 print("Done.")