# HG changeset patch # User iuc # Date 1670709148 0 # Node ID a19189a128cb15e1634b95eca90857c268c94969 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_bakta commit fba6deae1d3707e0c14202433d0495e157745afd diff -r 000000000000 -r a19189a128cb data_manager/bakta_build_database.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/bakta_build_database.py Sat Dec 10 21:52:28 2022 +0000 @@ -0,0 +1,232 @@ +import argparse +import hashlib +import json +import os +import sys +import tarfile +from datetime import datetime +from pathlib import Path + + +import requests + + +class GetBaktaDatabaseInfo: + """ + Extract bakta database information to make a json file for data_manager + """ + + def __init__(self, + data_table_name="bakta_database", + db_name=Path.cwd().joinpath("db"), + db_version="latest", + test_mode=False): + self.bakta_table_list = None + self.db_url = None + self.data_table_entry = None + self.data_table_name = data_table_name + self.db_name = db_name + self.db_version = db_version + self.DB_VERSIONS_URL = 'https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json' + self.DB_TEST_URL = 'https://zenodo.org/record/7360542/files/db-versions.json' + self.test_mode = test_mode + + def get_data_table_format(self): + """ + Skeleton of a data_table format + return: a data table formated for json output + """ + self.data_table_entry = { + "data_tables": { + self.data_table_name: {} + } + } + return self.data_table_entry + + def fetch_db_versions(self, db_version="latest"): + """ + List bakta database info related to the db_version selected + """ + if self.test_mode is True: + self.DB_VERSIONS_URL = self.DB_TEST_URL + try: + with requests.get(self.DB_VERSIONS_URL) as resp: + versions = json.loads(resp.content) + except IOError as e: + print(e, file=sys.stderr) + raise e + else: + if db_version == "latest": + db_date_list = [] + for db_dic in versions: + db_date_list.append(datetime.strptime(db_dic["date"], + '%Y-%m-%d').date()) + filtered_version = max(versions, key=lambda x: x['date']) + else: + filtered_version = None + for item in versions: + if '{0}.{1}'.format(item["major"], item["minor"]) == db_version: + filtered_version = item + break + if filtered_version is None: + print("No matching version detected in the list") + if filtered_version is not None: + self.db_url = f"https://zenodo.org/record/" \ + f"{filtered_version['record']}/files/db.tar.gz" + self.db_version = db_version + return filtered_version + + def get_data_manager(self, bakta_database_info): + self.bakta_table_list = self.get_data_table_format() + bakta_value = f"V{bakta_database_info['major']}." \ + f"{bakta_database_info['minor']}_" \ + f"{bakta_database_info['date']}" + tool_version = str(f"{bakta_database_info['software-min']['major']}." + f"{bakta_database_info['software-min']['minor']}") + data_info = dict(value=bakta_database_info['record'], + dbkey=bakta_value, + bakta_version=tool_version, + path="db") + self.bakta_table_list["data_tables"][self.data_table_name] = [data_info] + return self.bakta_table_list + + +class InstallBaktaDatabase(GetBaktaDatabaseInfo): + """ + Download the bakta database, + check md5 sum, + untar the download db and update for the amrfinderplus database + """ + + def __init__(self, + db_dir=Path.cwd(), + db_name="bakta", + tarball_name="db.tar.gz", + test_mode=False): + super().__init__() + self.md5 = None + self.db_dir = db_dir + self.db_name = db_name + self.tarball_name = tarball_name + self.tarball_path = None + self.test_mode = test_mode + + def download(self): + self.db_name = f'{self.db_name}_{self.db_version}' + bakta_path = Path(self.db_dir).joinpath(self.tarball_name) + try: + with bakta_path.open('wb') as fh_out, \ + requests.get(self.db_url, stream=True) as resp: + total_length = resp.headers.get('content-length') + if total_length is None: # no content length header + for data in resp.iter_content(chunk_size=1024 * 1024): + fh_out.write(data) + else: + for data in resp.iter_content(chunk_size=1024 * 1024): + fh_out.write(data) + print(f'Download bakta database {self.db_version}') + self.tarball_path = bakta_path + except IOError: + print(f'ERROR: Could not download file from Zenodo!' + f' url={self.db_url}, path={self.tarball_name}') + + def untar(self): + db_path = Path(self.db_dir).as_posix() + try: + with self.tarball_path.open('rb') as fh_in, \ + tarfile.open(fileobj=fh_in, mode='r:gz') as tar_file: + tar_file.extractall(path=db_path) + print(f'Untar the database in {db_path}') + return db_path + except OSError: + sys.exit(f'ERROR: Could not extract {self.tarball_name} ' + f'to {self.db_name}') + + def calc_md5_sum(self, buffer_size=1048576): + tarball_path = Path(self.db_dir).joinpath(self.tarball_name) + self.md5 = self.fetch_db_versions(db_version=self.db_version)["md5"] + md5 = hashlib.md5() + with tarball_path.open('rb') as fh: + data = fh.read(buffer_size) + while data: + md5.update(data) + data = fh.read(buffer_size) + if md5.hexdigest() == self.md5: + print('\t...md5 control database OK') + else: + print(f"Error: corrupt database file! " + f"calculated md5 = {md5.hexdigest()}" + f" different from {self.md5} ") + + +""" +This is the method to download the amrfinderplus database need by bakta. +Deprecated to use the amrfinderplus data_manager + def update_amrfinderplus_db(self): + amrfinderplus_db_path = f"{self.db_dir}/{self.db_name}/db/amrfinderplus-db" + if self.db_version == "test": + cmd = [ + 'amrfinder_update', + '--database', str(amrfinderplus_db_path), + '--force_update', + '--help' + ] + else: + cmd = [ + 'amrfinder_update', + '--database', str(amrfinderplus_db_path), + '--force_update' + ] + proc = sp.run( + cmd, + universal_newlines=True + ) + if proc.returncode != 0: + print(f"ERROR: AMRFinderPlus failed! " + f"command: 'amrfinder_update --force_update" + f" --database {amrfinderplus_db_path}'") + else: + print("AMRFinderPlus database download") +""" + + +def parse_arguments(): + # parse options and arguments + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument("data_manager_json") + arg_parser.add_argument("-d", "--database_version", + help='Select the database version ' + '(major and minor eg. 4.0),' + 'default is the latest version', + default="latest", + required=True) + arg_parser.add_argument("-t", "--test", action='store_true', + help="option to test the script with an empty database") + return arg_parser.parse_args() + + +def main(): + all_args = parse_arguments() + with open(all_args.data_manager_json) as fh: + params = json.load(fh) + target_dir = params['output_data'][0]['extra_files_path'] + os.makedirs(target_dir) + # init the class to download bakta db + bakta_upload = InstallBaktaDatabase(test_mode=all_args.test) + bakta_db = bakta_upload.fetch_db_versions(db_version=all_args.database_version) + # update the path for galaxy + bakta_upload.db_dir = target_dir + # download the database + bakta_upload.download() + # check md5 sum + bakta_upload.calc_md5_sum() + # untar db + bakta_upload.untar() + # make the data_manager metadata + bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db) + with open(all_args.data_manager_json, 'w') as fh: + json.dump(bakta_data_manager, fh, sort_keys=True) + + +if __name__ == '__main__': + main() diff -r 000000000000 -r a19189a128cb data_manager/bakta_build_database.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/bakta_build_database.xml Sat Dec 10 21:52:28 2022 +0000 @@ -0,0 +1,49 @@ + + Bakta database builder + + macro.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ]]> + + 10.1099/mgen.0.000685 + + diff -r 000000000000 -r a19189a128cb data_manager/macro.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/macro.xml Sat Dec 10 21:52:28 2022 +0000 @@ -0,0 +1,13 @@ + + 1.5.1 + 2.27.1 + 3.8 + 0 + 21.05 + + + python + requests + + + diff -r 000000000000 -r a19189a128cb data_manager_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Sat Dec 10 21:52:28 2022 +0000 @@ -0,0 +1,19 @@ + + + + + + + + + + ${path} + bakta_database/${value} + + ${GALAXY_DATA_MANAGER_DATA_PATH}/bakta_database/${value} + abspath + + + + + diff -r 000000000000 -r a19189a128cb test-data/bakta_test.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bakta_test.loc Sat Dec 10 21:52:28 2022 +0000 @@ -0,0 +1,11 @@ +# this is a tab separated file describing the location of bakta database +# +# the columns are: +# value, dbkey, bakta_version, path +# +# for example +7197299 V0.0_date_test 0.0 ${__HERE__} +7197299 V1.0_2022-10-12 1.4 /tmp/tmpxrkfnuec/galaxy-dev/tool-data/bakta_database/7197299 +7360139 V2.0_2022-11-25 1.5 /tmp/tmpxrkfnuec/galaxy-dev/tool-data/bakta_database/7360139 +7197299 V1.0_2022-10-12 1.4 /tmp/tmpwe9n4gyg/galaxy-dev/tool-data/bakta_database/7197299 +7360139 V2.0_2022-11-25 1.5 /tmp/tmpwe9n4gyg/galaxy-dev/tool-data/bakta_database/7360139 diff -r 000000000000 -r a19189a128cb test-data/bakta_test_data_manager.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bakta_test_data_manager.json Sat Dec 10 21:52:28 2022 +0000 @@ -0,0 +1,1 @@ +{"data_tables": {"bakta_database": [{"bakta_version": "1.4", "dbkey": "V1.0_2022-10-12", "path": "db", "value": "7197299"}]}} \ No newline at end of file diff -r 000000000000 -r a19189a128cb test-data/bakta_test_data_manager_test2.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bakta_test_data_manager_test2.json Sat Dec 10 21:52:28 2022 +0000 @@ -0,0 +1,1 @@ +{"data_tables": {"bakta_database": [{"bakta_version": "1.5", "dbkey": "V2.0_2022-11-25", "path": "db", "value": "7360139"}]}} \ No newline at end of file diff -r 000000000000 -r a19189a128cb test-data/db-versions.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/db-versions.json Sat Dec 10 21:52:28 2022 +0000 @@ -0,0 +1,26 @@ +[ + { + "date": "2022-10-12", + "major": 1, + "minor": 0, + "doi": "10.5281/zenodo.7197299", + "record": "7197299", + "md5": "8b0250c17078742fc12207d4efb0fc1a", + "software-min": { + "major": 1, + "minor": 4 + } + }, + { + "date": "2022-11-25", + "major": 2, + "minor": 0, + "doi": "10.5281/zenodo.7360139", + "record": "7360139", + "md5": "ebdb799a6bd97e56ca359db781ab8bab", + "software-min": { + "major": 1, + "minor": 5 + } + } +] diff -r 000000000000 -r a19189a128cb tool-data/bakta_database.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/bakta_database.loc Sat Dec 10 21:52:28 2022 +0000 @@ -0,0 +1,7 @@ +# this is a tab separated file describing the location of bakta database +# +# the columns are: +# value, dbkey, bakta_version, path +# +# for example +#7197299 V0.0_date_test 0.0 ${__HERE__} diff -r 000000000000 -r a19189a128cb tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Sat Dec 10 21:52:28 2022 +0000 @@ -0,0 +1,7 @@ + + + + value, dbkey, bakta_version, path + +
+
diff -r 000000000000 -r a19189a128cb tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Sat Dec 10 21:52:28 2022 +0000 @@ -0,0 +1,7 @@ + + + + value, dbkey, bakta_version, path + +
+