# HG changeset patch
# User iuc
# Date 1687556225 0
# Node ID 3e73c97f025d1e39dfac522ec7d42181461654d1
# Parent adfd6bf710bde31b3611d7cab6c925f5ee6f9ff5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bakta_database commit 487cb35fe55883ac6eeb8dda58b56c9ca2ec0a85
diff -r adfd6bf710bd -r 3e73c97f025d data_manager/bakta_build_database.py
--- a/data_manager/bakta_build_database.py Sun Apr 16 08:29:25 2023 +0000
+++ b/data_manager/bakta_build_database.py Fri Jun 23 21:37:05 2023 +0000
@@ -2,6 +2,7 @@
import hashlib
import json
import os
+import re
import sys
import tarfile
from datetime import datetime
@@ -16,38 +17,50 @@
Extract bakta database information to make a json file for data_manager
"""
- def __init__(self,
- data_table_name="bakta_database",
- db_name=Path.cwd().joinpath("db"),
- db_version="latest",
- test_mode=False):
+ def __init__(
+ self,
+ data_table_name="bakta_database",
+ db_name=Path.cwd().joinpath("db"),
+ db_version="latest",
+ tarball_name="db.tar.gz",
+ test_mode=False,
+ ):
self.bakta_table_list = None
self.db_url = None
+ self.db_type = ""
self.data_table_entry = None
self.data_table_name = data_table_name
self.db_name = db_name
+ self.tar_name = tarball_name
self.db_version = db_version
- self.DB_VERSIONS_URL = 'https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json'
- self.DB_TEST_URL = 'https://zenodo.org/record/7360542/files/db-versions.json'
+ self.DB_VERSIONS_URL = "https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json"
+ self.DB_TEST_URL = "https://zenodo.org/record/8021032/files/db-versions.json"
self.test_mode = test_mode
+ def get_database_type(self):
+ self.light_db = bool(re.search(pattern="light", string=self.db_version))
+ self.db_version = self.db_version.split(sep="_")[0]
+ if self.light_db:
+ self.db_type = "light"
+ self.tar_name = "db-light.tar.gz"
+ self.md5 = self.fetch_db_versions()["md5-light"]
+ else:
+ self.md5 = self.fetch_db_versions()["md5"]
+
def get_data_table_format(self):
"""
Skeleton of a data_table format
return: a data table formated for json output
"""
- self.data_table_entry = {
- "data_tables": {
- self.data_table_name: {}
- }
- }
+ self.data_table_entry = {"data_tables": {self.data_table_name: {}}}
return self.data_table_entry
- def fetch_db_versions(self, db_version="latest"):
+ def fetch_db_versions(self):
"""
List bakta database info related to the db_version selected
"""
- if self.test_mode is True:
+
+ if self.test_mode:
self.DB_VERSIONS_URL = self.DB_TEST_URL
try:
with requests.get(self.DB_VERSIONS_URL) as resp:
@@ -55,38 +68,43 @@
except IOError as e:
print(e, file=sys.stderr)
raise e
+
+ if self.db_version == "latest":
+ db_date_list = []
+ for db_dic in versions:
+ db_date_list.append(
+ datetime.strptime(db_dic["date"], "%Y-%m-%d").date()
+ )
+ filtered_version = max(versions, key=lambda x: x["date"])
else:
- if db_version == "latest":
- db_date_list = []
- for db_dic in versions:
- db_date_list.append(datetime.strptime(db_dic["date"],
- '%Y-%m-%d').date())
- filtered_version = max(versions, key=lambda x: x['date'])
- else:
- filtered_version = None
- for item in versions:
- if '{0}.{1}'.format(item["major"], item["minor"]) == db_version:
- filtered_version = item
- break
- if filtered_version is None:
- print("No matching version detected in the list")
- if filtered_version is not None:
- self.db_url = f"https://zenodo.org/record/" \
- f"{filtered_version['record']}/files/db.tar.gz"
- self.db_version = db_version
- return filtered_version
+ filtered_version = None
+ for item in versions:
+ if "{0}.{1}".format(item["major"], item["minor"]) == self.db_version:
+ filtered_version = item
+ break
+ if filtered_version is None:
+ print("No matching version detected in the list")
+ else:
+ self.db_url = f"https://zenodo.org/record/{filtered_version['record']}/files/{self.tar_name}"
+ return filtered_version
def get_data_manager(self, bakta_database_info):
self.bakta_table_list = self.get_data_table_format()
- bakta_name = f"V{bakta_database_info['major']}." \
- f"{bakta_database_info['minor']}_" \
- f"{bakta_database_info['date']}"
- tool_version = str(f"{bakta_database_info['software-min']['major']}."
- f"{bakta_database_info['software-min']['minor']}")
- data_info = dict(value=bakta_name,
- dbkey=bakta_database_info['record'],
- bakta_version=tool_version,
- path="db")
+ bakta_name = (
+ f"V{bakta_database_info['major']}."
+ f"{bakta_database_info['minor']}{self.db_type}_"
+ f"{bakta_database_info['date']}"
+ )
+ tool_version = str(
+ f"{bakta_database_info['software-min']['major']}."
+ f"{bakta_database_info['software-min']['minor']}"
+ )
+ data_info = dict(
+ value=bakta_name,
+ dbkey=bakta_database_info["record"],
+ bakta_version=tool_version,
+ path="db",
+ )
self.bakta_table_list["data_tables"][self.data_table_name] = [data_info]
return self.bakta_table_list
@@ -98,110 +116,88 @@
untar the download db and update for the amrfinderplus database
"""
- def __init__(self,
- db_dir=Path.cwd(),
- db_name="bakta",
- tarball_name="db.tar.gz",
- test_mode=False):
+ def __init__(
+ self, db_dir=Path.cwd(), db_name="bakta", db_version="latest", test_mode=False
+ ):
super().__init__()
self.md5 = None
+ self.db_version = db_version
self.db_dir = db_dir
self.db_name = db_name
- self.tarball_name = tarball_name
- self.tarball_path = None
+ self.tarball_path = ""
self.test_mode = test_mode
+ self.get_database_type()
def download(self):
- self.db_name = f'{self.db_name}_{self.db_version}'
- bakta_path = Path(self.db_dir).joinpath(self.tarball_name)
+ self.db_name = f"{self.db_name}_{self.db_version}{self.db_type}"
+ bakta_path = Path(self.db_dir).joinpath(self.tar_name)
try:
- with bakta_path.open('wb') as fh_out, \
- requests.get(self.db_url, stream=True) as resp:
- total_length = resp.headers.get('content-length')
+ with bakta_path.open("wb") as fh_out, requests.get(
+ self.db_url, stream=True) as resp:
+ total_length = resp.headers.get("content-length")
if total_length is None: # no content length header
for data in resp.iter_content(chunk_size=1024 * 1024):
fh_out.write(data)
else:
for data in resp.iter_content(chunk_size=1024 * 1024):
fh_out.write(data)
- print(f'Download bakta database {self.db_version}')
+ print(f"Download bakta database {self.db_version}")
self.tarball_path = bakta_path
except IOError:
- print(f'ERROR: Could not download file from Zenodo!'
- f' url={self.db_url}, path={self.tarball_name}')
+ print(
+ f"ERROR: Could not download file from Zenodo!"
+ f" url={self.db_url}, to={self.tarball_path}"
+ )
def untar(self):
db_path = Path(self.db_dir).as_posix()
try:
- with self.tarball_path.open('rb') as fh_in, \
- tarfile.open(fileobj=fh_in, mode='r:gz') as tar_file:
+ with self.tarball_path.open("rb") as fh_in, tarfile.open(
+ fileobj=fh_in, mode="r:gz"
+ ) as tar_file:
tar_file.extractall(path=db_path)
- print(f'Untar the database in {db_path}')
+ print(f"Untar the database in {db_path}")
return db_path
except OSError:
- sys.exit(f'ERROR: Could not extract {self.tarball_name} '
- f'to {self.db_name}')
+ sys.exit(f"ERROR: Could not extract {self.tar_name} " f"to {self.db_name}")
def calc_md5_sum(self, buffer_size=1048576):
- tarball_path = Path(self.db_dir).joinpath(self.tarball_name)
- self.md5 = self.fetch_db_versions(db_version=self.db_version)["md5"]
+ tarball_path = Path(self.db_dir).joinpath(self.tar_name)
md5 = hashlib.md5()
- with tarball_path.open('rb') as fh:
+ with tarball_path.open("rb") as fh:
data = fh.read(buffer_size)
while data:
md5.update(data)
data = fh.read(buffer_size)
if md5.hexdigest() == self.md5:
- print('\t...md5 control database OK')
+ print("\t...md5 control database OK")
else:
- print(f"Error: corrupt database file! "
- f"calculated md5 = {md5.hexdigest()}"
- f" different from {self.md5} ")
-
-
-"""
-This is the method to download the amrfinderplus database need by bakta.
-Deprecated to use the amrfinderplus data_manager
- def update_amrfinderplus_db(self):
- amrfinderplus_db_path = f"{self.db_dir}/{self.db_name}/db/amrfinderplus-db"
- if self.db_version == "test":
- cmd = [
- 'amrfinder_update',
- '--database', str(amrfinderplus_db_path),
- '--force_update',
- '--help'
- ]
- else:
- cmd = [
- 'amrfinder_update',
- '--database', str(amrfinderplus_db_path),
- '--force_update'
- ]
- proc = sp.run(
- cmd,
- universal_newlines=True
- )
- if proc.returncode != 0:
- print(f"ERROR: AMRFinderPlus failed! "
- f"command: 'amrfinder_update --force_update"
- f" --database {amrfinderplus_db_path}'")
- else:
- print("AMRFinderPlus database download")
-"""
+ print(
+ f"Error: corrupt database file! "
+ f"calculated md5 = {md5.hexdigest()}"
+ f" different from {self.md5} "
+ )
def parse_arguments():
# parse options and arguments
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument("data_manager_json")
- arg_parser.add_argument("-d", "--database_version",
- help='Select the database version '
- '(major and minor eg. 4.0),'
- 'default is the latest version',
- default="latest",
- required=True)
- arg_parser.add_argument("-t", "--test", action='store_true',
- help="option to test the script with an empty database")
+ arg_parser.add_argument(
+ "-d",
+ "--database_version",
+ help="Select the database version "
+ "(major and minor eg. 4.0),"
+ "default is the latest version",
+ default="latest",
+ required=True,
+ )
+ arg_parser.add_argument(
+ "-t",
+ "--test",
+ action="store_true",
+ help="option to test the script with an empty database",
+ )
return arg_parser.parse_args()
@@ -209,11 +205,13 @@
all_args = parse_arguments()
with open(all_args.data_manager_json) as fh:
params = json.load(fh)
- target_dir = params['output_data'][0]['extra_files_path']
+ target_dir = params["output_data"][0]["extra_files_path"]
os.makedirs(target_dir)
# init the class to download bakta db
- bakta_upload = InstallBaktaDatabase(test_mode=all_args.test)
- bakta_db = bakta_upload.fetch_db_versions(db_version=all_args.database_version)
+ bakta_upload = InstallBaktaDatabase(
+ test_mode=all_args.test, db_version=all_args.database_version
+ )
+ bakta_db = bakta_upload.fetch_db_versions()
# update the path for galaxy
bakta_upload.db_dir = target_dir
# download the database
@@ -224,9 +222,9 @@
bakta_upload.untar()
# make the data_manager metadata
bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db)
- with open(all_args.data_manager_json, 'w') as fh:
+ with open(all_args.data_manager_json, "w") as fh:
json.dump(bakta_data_manager, fh, sort_keys=True)
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff -r adfd6bf710bd -r 3e73c97f025d data_manager/bakta_build_database.xml
--- a/data_manager/bakta_build_database.xml Sun Apr 16 08:29:25 2023 +0000
+++ b/data_manager/bakta_build_database.xml Fri Jun 23 21:37:05 2023 +0000
@@ -20,6 +20,8 @@
+
+
@@ -31,13 +33,19 @@
-
+
-
+
+
+
+
+
+
+
- 1.5.1
+ 1.8.1
2.27.1
3.8
- 0
+ 1
21.05
diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test.loc
--- a/test-data/bakta_test.loc Sun Apr 16 08:29:25 2023 +0000
+++ b/test-data/bakta_test.loc Fri Jun 23 21:37:05 2023 +0000
@@ -1,9 +1,6 @@
-# this is a tab separated file describing the location of bakta database
-#
-# the columns are:
-# value, dbkey, bakta_version, path
-#
-# for example
-7197299 V0.0_date_test 0.0 ${__HERE__}
-V1.0_2022-10-12 7197299 1.4 /tmp/tmpiyh6lcqw/galaxy-dev/tool-data/bakta_database/7197299
-V2.0_2022-11-25 7360139 1.5 /tmp/tmpiyh6lcqw/galaxy-dev/tool-data/bakta_database/7360139
+V1.0_2022-10-12 7197299 1.4 /tmp/tmpq5t7s3c5/galaxy-dev/tool-data/bakta_database/7197299
+V5.0_2023-06-08 8021027 1.8 /tmp/tmpq5t7s3c5/galaxy-dev/tool-data/bakta_database/8021027
+V5.0light_2023-06-08 8021027 1.8 /tmp/tmpq5t7s3c5/galaxy-dev/tool-data/bakta_database/8021027
+V1.0_2022-10-12 7197299 1.4 /tmp/tmpydhjlpxl/galaxy-dev/tool-data/bakta_database/7197299
+V5.0_2023-06-08 8021027 1.8 /tmp/tmpydhjlpxl/galaxy-dev/tool-data/bakta_database/8021027
+V5.0light_2023-06-08 8021027 1.8 /tmp/tmpydhjlpxl/galaxy-dev/tool-data/bakta_database/8021027
diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager.json
--- a/test-data/bakta_test_data_manager.json Sun Apr 16 08:29:25 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-{"data_tables": {"bakta_database": [{"bakta_version": "1.4", "dbkey": "7197299", "path": "db", "value": "V1.0_2022-10-12"}]}}
\ No newline at end of file
diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager1.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bakta_test_data_manager1.json Fri Jun 23 21:37:05 2023 +0000
@@ -0,0 +1,1 @@
+{"data_tables": {"bakta_database": [{"bakta_version": "1.4", "dbkey": "7197299", "path": "db", "value": "V1.0_2022-10-12"}]}}
\ No newline at end of file
diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager2.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bakta_test_data_manager2.json Fri Jun 23 21:37:05 2023 +0000
@@ -0,0 +1,1 @@
+{"data_tables": {"bakta_database": [{"bakta_version": "1.8", "dbkey": "8021027", "path": "db", "value": "V5.0_2023-06-08"}]}}
\ No newline at end of file
diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager3.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bakta_test_data_manager3.json Fri Jun 23 21:37:05 2023 +0000
@@ -0,0 +1,1 @@
+{"data_tables": {"bakta_database": [{"bakta_version": "1.8", "dbkey": "8021027", "path": "db", "value": "V5.0light_2023-06-08"}]}}
\ No newline at end of file
diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager_test2.json
--- a/test-data/bakta_test_data_manager_test2.json Sun Apr 16 08:29:25 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-{"data_tables": {"bakta_database": [{"bakta_version": "1.5", "dbkey": "7360139", "path": "db", "value": "V2.0_2022-11-25"}]}}
\ No newline at end of file
diff -r adfd6bf710bd -r 3e73c97f025d test-data/db-versions.json
--- a/test-data/db-versions.json Sun Apr 16 08:29:25 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-[
- {
- "date": "2022-10-12",
- "major": 1,
- "minor": 0,
- "doi": "10.5281/zenodo.7197299",
- "record": "7197299",
- "md5": "8b0250c17078742fc12207d4efb0fc1a",
- "software-min": {
- "major": 1,
- "minor": 4
- }
- },
- {
- "date": "2022-11-25",
- "major": 2,
- "minor": 0,
- "doi": "10.5281/zenodo.7360139",
- "record": "7360139",
- "md5": "ebdb799a6bd97e56ca359db781ab8bab",
- "software-min": {
- "major": 1,
- "minor": 5
- }
- }
-]