comparison data_manager/gtdbtk_database_installer.py @ 7:3b1d503c6260 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_gtdbtk_database_installer commit 671e8c706fa211b6ec1c476d3d1a36d098822fe5
author iuc
date Thu, 03 Oct 2024 12:42:22 +0000
parents df84aaed4769
children 750d902de22c
comparison
equal deleted inserted replaced
6:df84aaed4769 7:3b1d503c6260
5 import json 5 import json
6 import os 6 import os
7 import shutil 7 import shutil
8 import sys 8 import sys
9 import tarfile 9 import tarfile
10 from datetime import datetime 10 from datetime import date
11 from urllib.parse import urlparse 11 from urllib.parse import urlparse
12 from urllib.request import HTTPError, Request, urlopen 12 from urllib.request import HTTPError, Request, urlopen
13 13
14 # rather provide the urls based on the release, less error potential for the admins ! 14 # rather provide the urls based on the release, less error potential for the admins !
15 urls = { 15 urls = {
116 shutil.move(item_path, target_directory) 116 shutil.move(item_path, target_directory)
117 os.rmdir(subdir_path) 117 os.rmdir(subdir_path)
118 return target_directory 118 return target_directory
119 119
120 120
121 def download(database_name, release, meta, test, out_file): 121 def create_data_manager_entry(database_name, release, file_path):
122 time = date.today().strftime("%Y-%m-%d")
123 data_manager_entry = {}
124 data_manager_entry["value"] = (
125 f"{database_name.replace(' ', '_').lower()}_release_{release}_downloaded_{time}"
126 )
127 data_manager_entry["name"] = f"{database_name} - release {release} ({time})"
128 data_manager_entry["path"] = file_path
129 data_manager_entry["version"] = release
130 return data_manager_entry
131
132
133 def download(release, meta, test, out_file):
122 134
123 with open(out_file) as fh: 135 with open(out_file) as fh:
124 params = json.load(fh) 136 params = json.load(fh)
125 137
126 target_directory = params["output_data"][0]["extra_files_path"] 138 target_directory = params["output_data"][0]["extra_files_path"]
135 # make use of the test to check if all urls exists 147 # make use of the test to check if all urls exists
136 for _version, items in urls.items(): 148 for _version, items in urls.items():
137 for url in items.values(): 149 for url in items.values():
138 assert is_urlfile(url) 150 assert is_urlfile(url)
139 151
140 # download both taxonomy metadata tables 152 data_manager_json = {"data_tables": {}}
153
154 # download taxonomy metadata tables
141 if meta: 155 if meta:
142 url = urls[release]["meta_ar"] 156 url = urls[release]["meta_ar"]
143 file_path = url_download(url, target_directory, meta) 157 url_download(url, target_directory, meta)
144 url = urls[release]["meta_bac"] 158 url = urls[release]["meta_bac"]
145 file_path = url_download(url, target_directory, meta) 159 file_path = url_download(url, target_directory, meta)
160
161 data_manager_json["data_tables"]["gtdbtk_database_metadata_versioned"] = [
162 create_data_manager_entry("Metadata Tables", release, file_path)
163 ]
146 # download the full DB 164 # download the full DB
147 else: 165 else:
148 url = urls[release]["full"] 166 url = urls[release]["full"]
149 file_path = url_download(url, target_directory, meta) 167 file_path = url_download(url, target_directory, meta)
150 168 data_manager_json["data_tables"]["gtdbtk_database_versioned"] = [
151 time = datetime.utcnow().strftime("%Y-%m-%d") 169 create_data_manager_entry("Full Database", release, file_path)
152 170 ]
153 data_manager_json = {"data_tables": {}}
154 data_manager_entry = {}
155 data_manager_entry["value"] = f"{database_name}_release_{release}_downloaded_{time}"
156 data_manager_entry["name"] = database_name
157 data_manager_entry["path"] = file_path
158 data_manager_entry["version"] = release
159 171
160 # store in dedicated metadata table 172 # store in dedicated metadata table
161 if meta:
162 data_manager_json["data_tables"][
163 "gtdbtk_database_metadata_versioned"
164 ] = data_manager_entry
165 else:
166 data_manager_json["data_tables"][
167 "gtdbtk_database_versioned"
168 ] = data_manager_entry
169
170 with open(out_file, "w") as fh: 173 with open(out_file, "w") as fh:
171 json.dump(data_manager_json, fh, sort_keys=True) 174 json.dump(data_manager_json, fh, sort_keys=True)
172 175
173 176
174 parser = argparse.ArgumentParser() 177 if __name__ == "__main__":
175 178 parser = argparse.ArgumentParser()
176 parser.add_argument( 179
177 "--database_name", dest="database_name", help="GTDB-Tk database display name" 180 parser.add_argument("--version", dest="version", help="DB version")
178 ) 181 parser.add_argument(
179 182 "--release", dest="release", help="Release of the GTDB-Tk database version"
180 parser.add_argument("--version", dest="version", help="DB version") 183 )
181 184 parser.add_argument("--out_file", dest="out_file", help="JSON output file")
182 parser.add_argument( 185 parser.add_argument(
183 "--release", dest="release", help="Release of the GTDB-Tk database version" 186 "--meta",
184 ) 187 dest="meta",
185 parser.add_argument("--out_file", dest="out_file", help="JSON output file") 188 action="store_true",
186 parser.add_argument( 189 help="Store meta data flag",
187 "--meta", 190 )
188 dest="meta", 191 parser.add_argument(
189 action="store_true", 192 "--test",
190 help="Store meta data flag", 193 dest="test",
191 ) 194 action="store_true",
192 195 help="Run test",
193 parser.add_argument( 196 )
194 "--test", 197 args = parser.parse_args()
195 dest="test", 198
196 action="store_true", 199 download(
197 help="Run test", 200 args.release,
198 ) 201 args.meta,
199 202 args.test,
200 args = parser.parse_args() 203 args.out_file,
201 204 )
202 download(
203 args.database_name,
204 args.release,
205 args.meta,
206 args.test,
207 args.out_file,
208 )