Mercurial > repos > iuc > data_manager_gtdbtk_database_installer
comparison data_manager/gtdbtk_database_installer.py @ 7:3b1d503c6260 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_gtdbtk_database_installer commit 671e8c706fa211b6ec1c476d3d1a36d098822fe5
author | iuc |
---|---|
date | Thu, 03 Oct 2024 12:42:22 +0000 |
parents | df84aaed4769 |
children | 750d902de22c |
comparison
equal
deleted
inserted
replaced
6:df84aaed4769 | 7:3b1d503c6260 |
---|---|
5 import json | 5 import json |
6 import os | 6 import os |
7 import shutil | 7 import shutil |
8 import sys | 8 import sys |
9 import tarfile | 9 import tarfile |
10 from datetime import datetime | 10 from datetime import date |
11 from urllib.parse import urlparse | 11 from urllib.parse import urlparse |
12 from urllib.request import HTTPError, Request, urlopen | 12 from urllib.request import HTTPError, Request, urlopen |
13 | 13 |
14 # rather provide the urls based on the release, less error potential for the admins ! | 14 # rather provide the urls based on the release, less error potential for the admins ! |
15 urls = { | 15 urls = { |
116 shutil.move(item_path, target_directory) | 116 shutil.move(item_path, target_directory) |
117 os.rmdir(subdir_path) | 117 os.rmdir(subdir_path) |
118 return target_directory | 118 return target_directory |
119 | 119 |
120 | 120 |
121 def download(database_name, release, meta, test, out_file): | 121 def create_data_manager_entry(database_name, release, file_path): |
122 time = date.today().strftime("%Y-%m-%d") | |
123 data_manager_entry = {} | |
124 data_manager_entry["value"] = ( | |
125 f"{database_name.replace(' ', '_').lower()}_release_{release}_downloaded_{time}" | |
126 ) | |
127 data_manager_entry["name"] = f"{database_name} - release {release} ({time})" | |
128 data_manager_entry["path"] = file_path | |
129 data_manager_entry["version"] = release | |
130 return data_manager_entry | |
131 | |
132 | |
133 def download(release, meta, test, out_file): | |
122 | 134 |
123 with open(out_file) as fh: | 135 with open(out_file) as fh: |
124 params = json.load(fh) | 136 params = json.load(fh) |
125 | 137 |
126 target_directory = params["output_data"][0]["extra_files_path"] | 138 target_directory = params["output_data"][0]["extra_files_path"] |
135 # make use of the test to check if all urls exists | 147 # make use of the test to check if all urls exists |
136 for _version, items in urls.items(): | 148 for _version, items in urls.items(): |
137 for url in items.values(): | 149 for url in items.values(): |
138 assert is_urlfile(url) | 150 assert is_urlfile(url) |
139 | 151 |
140 # download both taxonomy metadata tables | 152 data_manager_json = {"data_tables": {}} |
153 | |
154 # download taxonomy metadata tables | |
141 if meta: | 155 if meta: |
142 url = urls[release]["meta_ar"] | 156 url = urls[release]["meta_ar"] |
143 file_path = url_download(url, target_directory, meta) | 157 url_download(url, target_directory, meta) |
144 url = urls[release]["meta_bac"] | 158 url = urls[release]["meta_bac"] |
145 file_path = url_download(url, target_directory, meta) | 159 file_path = url_download(url, target_directory, meta) |
160 | |
161 data_manager_json["data_tables"]["gtdbtk_database_metadata_versioned"] = [ | |
162 create_data_manager_entry("Metadata Tables", release, file_path) | |
163 ] | |
146 # download the full DB | 164 # download the full DB |
147 else: | 165 else: |
148 url = urls[release]["full"] | 166 url = urls[release]["full"] |
149 file_path = url_download(url, target_directory, meta) | 167 file_path = url_download(url, target_directory, meta) |
150 | 168 data_manager_json["data_tables"]["gtdbtk_database_versioned"] = [ |
151 time = datetime.utcnow().strftime("%Y-%m-%d") | 169 create_data_manager_entry("Full Database", release, file_path) |
152 | 170 ] |
153 data_manager_json = {"data_tables": {}} | |
154 data_manager_entry = {} | |
155 data_manager_entry["value"] = f"{database_name}_release_{release}_downloaded_{time}" | |
156 data_manager_entry["name"] = database_name | |
157 data_manager_entry["path"] = file_path | |
158 data_manager_entry["version"] = release | |
159 | 171 |
160 # store in dedicated metadata table | 172 # store in dedicated metadata table |
161 if meta: | |
162 data_manager_json["data_tables"][ | |
163 "gtdbtk_database_metadata_versioned" | |
164 ] = data_manager_entry | |
165 else: | |
166 data_manager_json["data_tables"][ | |
167 "gtdbtk_database_versioned" | |
168 ] = data_manager_entry | |
169 | |
170 with open(out_file, "w") as fh: | 173 with open(out_file, "w") as fh: |
171 json.dump(data_manager_json, fh, sort_keys=True) | 174 json.dump(data_manager_json, fh, sort_keys=True) |
172 | 175 |
173 | 176 |
174 parser = argparse.ArgumentParser() | 177 if __name__ == "__main__": |
175 | 178 parser = argparse.ArgumentParser() |
176 parser.add_argument( | 179 |
177 "--database_name", dest="database_name", help="GTDB-Tk database display name" | 180 parser.add_argument("--version", dest="version", help="DB version") |
178 ) | 181 parser.add_argument( |
179 | 182 "--release", dest="release", help="Release of the GTDB-Tk database version" |
180 parser.add_argument("--version", dest="version", help="DB version") | 183 ) |
181 | 184 parser.add_argument("--out_file", dest="out_file", help="JSON output file") |
182 parser.add_argument( | 185 parser.add_argument( |
183 "--release", dest="release", help="Release of the GTDB-Tk database version" | 186 "--meta", |
184 ) | 187 dest="meta", |
185 parser.add_argument("--out_file", dest="out_file", help="JSON output file") | 188 action="store_true", |
186 parser.add_argument( | 189 help="Store meta data flag", |
187 "--meta", | 190 ) |
188 dest="meta", | 191 parser.add_argument( |
189 action="store_true", | 192 "--test", |
190 help="Store meta data flag", | 193 dest="test", |
191 ) | 194 action="store_true", |
192 | 195 help="Run test", |
193 parser.add_argument( | 196 ) |
194 "--test", | 197 args = parser.parse_args() |
195 dest="test", | 198 |
196 action="store_true", | 199 download( |
197 help="Run test", | 200 args.release, |
198 ) | 201 args.meta, |
199 | 202 args.test, |
200 args = parser.parse_args() | 203 args.out_file, |
201 | 204 ) |
202 download( | |
203 args.database_name, | |
204 args.release, | |
205 args.meta, | |
206 args.test, | |
207 args.out_file, | |
208 ) |