# HG changeset patch # User iuc # Date 1687556225 0 # Node ID 3e73c97f025d1e39dfac522ec7d42181461654d1 # Parent adfd6bf710bde31b3611d7cab6c925f5ee6f9ff5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bakta_database commit 487cb35fe55883ac6eeb8dda58b56c9ca2ec0a85 diff -r adfd6bf710bd -r 3e73c97f025d data_manager/bakta_build_database.py --- a/data_manager/bakta_build_database.py Sun Apr 16 08:29:25 2023 +0000 +++ b/data_manager/bakta_build_database.py Fri Jun 23 21:37:05 2023 +0000 @@ -2,6 +2,7 @@ import hashlib import json import os +import re import sys import tarfile from datetime import datetime @@ -16,38 +17,50 @@ Extract bakta database information to make a json file for data_manager """ - def __init__(self, - data_table_name="bakta_database", - db_name=Path.cwd().joinpath("db"), - db_version="latest", - test_mode=False): + def __init__( + self, + data_table_name="bakta_database", + db_name=Path.cwd().joinpath("db"), + db_version="latest", + tarball_name="db.tar.gz", + test_mode=False, + ): self.bakta_table_list = None self.db_url = None + self.db_type = "" self.data_table_entry = None self.data_table_name = data_table_name self.db_name = db_name + self.tar_name = tarball_name self.db_version = db_version - self.DB_VERSIONS_URL = 'https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json' - self.DB_TEST_URL = 'https://zenodo.org/record/7360542/files/db-versions.json' + self.DB_VERSIONS_URL = "https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json" + self.DB_TEST_URL = "https://zenodo.org/record/8021032/files/db-versions.json" self.test_mode = test_mode + def get_database_type(self): + self.light_db = bool(re.search(pattern="light", string=self.db_version)) + self.db_version = self.db_version.split(sep="_")[0] + if self.light_db: + self.db_type = "light" + self.tar_name = "db-light.tar.gz" + self.md5 = self.fetch_db_versions()["md5-light"] + else: + self.md5 = self.fetch_db_versions()["md5"] + def get_data_table_format(self): """ Skeleton of a data_table format return: a data table formated for json output """ - self.data_table_entry = { - "data_tables": { - self.data_table_name: {} - } - } + self.data_table_entry = {"data_tables": {self.data_table_name: {}}} return self.data_table_entry - def fetch_db_versions(self, db_version="latest"): + def fetch_db_versions(self): """ List bakta database info related to the db_version selected """ - if self.test_mode is True: + + if self.test_mode: self.DB_VERSIONS_URL = self.DB_TEST_URL try: with requests.get(self.DB_VERSIONS_URL) as resp: @@ -55,38 +68,43 @@ except IOError as e: print(e, file=sys.stderr) raise e + + if self.db_version == "latest": + db_date_list = [] + for db_dic in versions: + db_date_list.append( + datetime.strptime(db_dic["date"], "%Y-%m-%d").date() + ) + filtered_version = max(versions, key=lambda x: x["date"]) else: - if db_version == "latest": - db_date_list = [] - for db_dic in versions: - db_date_list.append(datetime.strptime(db_dic["date"], - '%Y-%m-%d').date()) - filtered_version = max(versions, key=lambda x: x['date']) - else: - filtered_version = None - for item in versions: - if '{0}.{1}'.format(item["major"], item["minor"]) == db_version: - filtered_version = item - break - if filtered_version is None: - print("No matching version detected in the list") - if filtered_version is not None: - self.db_url = f"https://zenodo.org/record/" \ - f"{filtered_version['record']}/files/db.tar.gz" - self.db_version = db_version - return filtered_version + filtered_version = None + for item in versions: + if "{0}.{1}".format(item["major"], item["minor"]) == self.db_version: + filtered_version = item + break + if filtered_version is None: + print("No matching version detected in the list") + else: + self.db_url = f"https://zenodo.org/record/{filtered_version['record']}/files/{self.tar_name}" + return filtered_version def get_data_manager(self, bakta_database_info): self.bakta_table_list = self.get_data_table_format() - bakta_name = f"V{bakta_database_info['major']}." \ - f"{bakta_database_info['minor']}_" \ - f"{bakta_database_info['date']}" - tool_version = str(f"{bakta_database_info['software-min']['major']}." - f"{bakta_database_info['software-min']['minor']}") - data_info = dict(value=bakta_name, - dbkey=bakta_database_info['record'], - bakta_version=tool_version, - path="db") + bakta_name = ( + f"V{bakta_database_info['major']}." + f"{bakta_database_info['minor']}{self.db_type}_" + f"{bakta_database_info['date']}" + ) + tool_version = str( + f"{bakta_database_info['software-min']['major']}." + f"{bakta_database_info['software-min']['minor']}" + ) + data_info = dict( + value=bakta_name, + dbkey=bakta_database_info["record"], + bakta_version=tool_version, + path="db", + ) self.bakta_table_list["data_tables"][self.data_table_name] = [data_info] return self.bakta_table_list @@ -98,110 +116,88 @@ untar the download db and update for the amrfinderplus database """ - def __init__(self, - db_dir=Path.cwd(), - db_name="bakta", - tarball_name="db.tar.gz", - test_mode=False): + def __init__( + self, db_dir=Path.cwd(), db_name="bakta", db_version="latest", test_mode=False + ): super().__init__() self.md5 = None + self.db_version = db_version self.db_dir = db_dir self.db_name = db_name - self.tarball_name = tarball_name - self.tarball_path = None + self.tarball_path = "" self.test_mode = test_mode + self.get_database_type() def download(self): - self.db_name = f'{self.db_name}_{self.db_version}' - bakta_path = Path(self.db_dir).joinpath(self.tarball_name) + self.db_name = f"{self.db_name}_{self.db_version}{self.db_type}" + bakta_path = Path(self.db_dir).joinpath(self.tar_name) try: - with bakta_path.open('wb') as fh_out, \ - requests.get(self.db_url, stream=True) as resp: - total_length = resp.headers.get('content-length') + with bakta_path.open("wb") as fh_out, requests.get( + self.db_url, stream=True) as resp: + total_length = resp.headers.get("content-length") if total_length is None: # no content length header for data in resp.iter_content(chunk_size=1024 * 1024): fh_out.write(data) else: for data in resp.iter_content(chunk_size=1024 * 1024): fh_out.write(data) - print(f'Download bakta database {self.db_version}') + print(f"Download bakta database {self.db_version}") self.tarball_path = bakta_path except IOError: - print(f'ERROR: Could not download file from Zenodo!' - f' url={self.db_url}, path={self.tarball_name}') + print( + f"ERROR: Could not download file from Zenodo!" + f" url={self.db_url}, to={self.tarball_path}" + ) def untar(self): db_path = Path(self.db_dir).as_posix() try: - with self.tarball_path.open('rb') as fh_in, \ - tarfile.open(fileobj=fh_in, mode='r:gz') as tar_file: + with self.tarball_path.open("rb") as fh_in, tarfile.open( + fileobj=fh_in, mode="r:gz" + ) as tar_file: tar_file.extractall(path=db_path) - print(f'Untar the database in {db_path}') + print(f"Untar the database in {db_path}") return db_path except OSError: - sys.exit(f'ERROR: Could not extract {self.tarball_name} ' - f'to {self.db_name}') + sys.exit(f"ERROR: Could not extract {self.tar_name} " f"to {self.db_name}") def calc_md5_sum(self, buffer_size=1048576): - tarball_path = Path(self.db_dir).joinpath(self.tarball_name) - self.md5 = self.fetch_db_versions(db_version=self.db_version)["md5"] + tarball_path = Path(self.db_dir).joinpath(self.tar_name) md5 = hashlib.md5() - with tarball_path.open('rb') as fh: + with tarball_path.open("rb") as fh: data = fh.read(buffer_size) while data: md5.update(data) data = fh.read(buffer_size) if md5.hexdigest() == self.md5: - print('\t...md5 control database OK') + print("\t...md5 control database OK") else: - print(f"Error: corrupt database file! " - f"calculated md5 = {md5.hexdigest()}" - f" different from {self.md5} ") - - -""" -This is the method to download the amrfinderplus database need by bakta. -Deprecated to use the amrfinderplus data_manager - def update_amrfinderplus_db(self): - amrfinderplus_db_path = f"{self.db_dir}/{self.db_name}/db/amrfinderplus-db" - if self.db_version == "test": - cmd = [ - 'amrfinder_update', - '--database', str(amrfinderplus_db_path), - '--force_update', - '--help' - ] - else: - cmd = [ - 'amrfinder_update', - '--database', str(amrfinderplus_db_path), - '--force_update' - ] - proc = sp.run( - cmd, - universal_newlines=True - ) - if proc.returncode != 0: - print(f"ERROR: AMRFinderPlus failed! " - f"command: 'amrfinder_update --force_update" - f" --database {amrfinderplus_db_path}'") - else: - print("AMRFinderPlus database download") -""" + print( + f"Error: corrupt database file! " + f"calculated md5 = {md5.hexdigest()}" + f" different from {self.md5} " + ) def parse_arguments(): # parse options and arguments arg_parser = argparse.ArgumentParser() arg_parser.add_argument("data_manager_json") - arg_parser.add_argument("-d", "--database_version", - help='Select the database version ' - '(major and minor eg. 4.0),' - 'default is the latest version', - default="latest", - required=True) - arg_parser.add_argument("-t", "--test", action='store_true', - help="option to test the script with an empty database") + arg_parser.add_argument( + "-d", + "--database_version", + help="Select the database version " + "(major and minor eg. 4.0)," + "default is the latest version", + default="latest", + required=True, + ) + arg_parser.add_argument( + "-t", + "--test", + action="store_true", + help="option to test the script with an empty database", + ) return arg_parser.parse_args() @@ -209,11 +205,13 @@ all_args = parse_arguments() with open(all_args.data_manager_json) as fh: params = json.load(fh) - target_dir = params['output_data'][0]['extra_files_path'] + target_dir = params["output_data"][0]["extra_files_path"] os.makedirs(target_dir) # init the class to download bakta db - bakta_upload = InstallBaktaDatabase(test_mode=all_args.test) - bakta_db = bakta_upload.fetch_db_versions(db_version=all_args.database_version) + bakta_upload = InstallBaktaDatabase( + test_mode=all_args.test, db_version=all_args.database_version + ) + bakta_db = bakta_upload.fetch_db_versions() # update the path for galaxy bakta_upload.db_dir = target_dir # download the database @@ -224,9 +222,9 @@ bakta_upload.untar() # make the data_manager metadata bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db) - with open(all_args.data_manager_json, 'w') as fh: + with open(all_args.data_manager_json, "w") as fh: json.dump(bakta_data_manager, fh, sort_keys=True) -if __name__ == '__main__': +if __name__ == "__main__": main() diff -r adfd6bf710bd -r 3e73c97f025d data_manager/bakta_build_database.xml --- a/data_manager/bakta_build_database.xml Sun Apr 16 08:29:25 2023 +0000 +++ b/data_manager/bakta_build_database.xml Fri Jun 23 21:37:05 2023 +0000 @@ -20,6 +20,8 @@ + + @@ -31,13 +33,19 @@ - + - + + + + + + + - 1.5.1 + 1.8.1 2.27.1 3.8 - 0 + 1 21.05 diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test.loc --- a/test-data/bakta_test.loc Sun Apr 16 08:29:25 2023 +0000 +++ b/test-data/bakta_test.loc Fri Jun 23 21:37:05 2023 +0000 @@ -1,9 +1,6 @@ -# this is a tab separated file describing the location of bakta database -# -# the columns are: -# value, dbkey, bakta_version, path -# -# for example -7197299 V0.0_date_test 0.0 ${__HERE__} -V1.0_2022-10-12 7197299 1.4 /tmp/tmpiyh6lcqw/galaxy-dev/tool-data/bakta_database/7197299 -V2.0_2022-11-25 7360139 1.5 /tmp/tmpiyh6lcqw/galaxy-dev/tool-data/bakta_database/7360139 +V1.0_2022-10-12 7197299 1.4 /tmp/tmpq5t7s3c5/galaxy-dev/tool-data/bakta_database/7197299 +V5.0_2023-06-08 8021027 1.8 /tmp/tmpq5t7s3c5/galaxy-dev/tool-data/bakta_database/8021027 +V5.0light_2023-06-08 8021027 1.8 /tmp/tmpq5t7s3c5/galaxy-dev/tool-data/bakta_database/8021027 +V1.0_2022-10-12 7197299 1.4 /tmp/tmpydhjlpxl/galaxy-dev/tool-data/bakta_database/7197299 +V5.0_2023-06-08 8021027 1.8 /tmp/tmpydhjlpxl/galaxy-dev/tool-data/bakta_database/8021027 +V5.0light_2023-06-08 8021027 1.8 /tmp/tmpydhjlpxl/galaxy-dev/tool-data/bakta_database/8021027 diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager.json --- a/test-data/bakta_test_data_manager.json Sun Apr 16 08:29:25 2023 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -{"data_tables": {"bakta_database": [{"bakta_version": "1.4", "dbkey": "7197299", "path": "db", "value": "V1.0_2022-10-12"}]}} \ No newline at end of file diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager1.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bakta_test_data_manager1.json Fri Jun 23 21:37:05 2023 +0000 @@ -0,0 +1,1 @@ +{"data_tables": {"bakta_database": [{"bakta_version": "1.4", "dbkey": "7197299", "path": "db", "value": "V1.0_2022-10-12"}]}} \ No newline at end of file diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager2.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bakta_test_data_manager2.json Fri Jun 23 21:37:05 2023 +0000 @@ -0,0 +1,1 @@ +{"data_tables": {"bakta_database": [{"bakta_version": "1.8", "dbkey": "8021027", "path": "db", "value": "V5.0_2023-06-08"}]}} \ No newline at end of file diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager3.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bakta_test_data_manager3.json Fri Jun 23 21:37:05 2023 +0000 @@ -0,0 +1,1 @@ +{"data_tables": {"bakta_database": [{"bakta_version": "1.8", "dbkey": "8021027", "path": "db", "value": "V5.0light_2023-06-08"}]}} \ No newline at end of file diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager_test2.json --- a/test-data/bakta_test_data_manager_test2.json Sun Apr 16 08:29:25 2023 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -{"data_tables": {"bakta_database": [{"bakta_version": "1.5", "dbkey": "7360139", "path": "db", "value": "V2.0_2022-11-25"}]}} \ No newline at end of file diff -r adfd6bf710bd -r 3e73c97f025d test-data/db-versions.json --- a/test-data/db-versions.json Sun Apr 16 08:29:25 2023 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -[ - { - "date": "2022-10-12", - "major": 1, - "minor": 0, - "doi": "10.5281/zenodo.7197299", - "record": "7197299", - "md5": "8b0250c17078742fc12207d4efb0fc1a", - "software-min": { - "major": 1, - "minor": 4 - } - }, - { - "date": "2022-11-25", - "major": 2, - "minor": 0, - "doi": "10.5281/zenodo.7360139", - "record": "7360139", - "md5": "ebdb799a6bd97e56ca359db781ab8bab", - "software-min": { - "major": 1, - "minor": 5 - } - } -]