Previous changeset 2:adfd6bf710bd (2023-04-16) Next changeset 4:d74850cf4e42 (2023-08-25) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bakta_database commit 487cb35fe55883ac6eeb8dda58b56c9ca2ec0a85 |
modified:
data_manager/bakta_build_database.py data_manager/bakta_build_database.xml data_manager/macro.xml test-data/bakta_test.loc |
added:
test-data/bakta_test_data_manager1.json test-data/bakta_test_data_manager2.json test-data/bakta_test_data_manager3.json |
removed:
test-data/bakta_test_data_manager.json test-data/bakta_test_data_manager_test2.json test-data/db-versions.json |
b |
diff -r adfd6bf710bd -r 3e73c97f025d data_manager/bakta_build_database.py --- a/data_manager/bakta_build_database.py Sun Apr 16 08:29:25 2023 +0000 +++ b/data_manager/bakta_build_database.py Fri Jun 23 21:37:05 2023 +0000 |
[ |
b'@@ -2,6 +2,7 @@\n import hashlib\n import json\n import os\n+import re\n import sys\n import tarfile\n from datetime import datetime\n@@ -16,38 +17,50 @@\n Extract bakta database information to make a json file for data_manager\n """\n \n- def __init__(self,\n- data_table_name="bakta_database",\n- db_name=Path.cwd().joinpath("db"),\n- db_version="latest",\n- test_mode=False):\n+ def __init__(\n+ self,\n+ data_table_name="bakta_database",\n+ db_name=Path.cwd().joinpath("db"),\n+ db_version="latest",\n+ tarball_name="db.tar.gz",\n+ test_mode=False,\n+ ):\n self.bakta_table_list = None\n self.db_url = None\n+ self.db_type = ""\n self.data_table_entry = None\n self.data_table_name = data_table_name\n self.db_name = db_name\n+ self.tar_name = tarball_name\n self.db_version = db_version\n- self.DB_VERSIONS_URL = \'https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json\'\n- self.DB_TEST_URL = \'https://zenodo.org/record/7360542/files/db-versions.json\'\n+ self.DB_VERSIONS_URL = "https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json"\n+ self.DB_TEST_URL = "https://zenodo.org/record/8021032/files/db-versions.json"\n self.test_mode = test_mode\n \n+ def get_database_type(self):\n+ self.light_db = bool(re.search(pattern="light", string=self.db_version))\n+ self.db_version = self.db_version.split(sep="_")[0]\n+ if self.light_db:\n+ self.db_type = "light"\n+ self.tar_name = "db-light.tar.gz"\n+ self.md5 = self.fetch_db_versions()["md5-light"]\n+ else:\n+ self.md5 = self.fetch_db_versions()["md5"]\n+\n def get_data_table_format(self):\n """\n Skeleton of a data_table format\n return: a data table formated for json output\n """\n- self.data_table_entry = {\n- "data_tables": {\n- self.data_table_name: {}\n- }\n- }\n+ self.data_table_entry = {"data_tables": {self.data_table_name: {}}}\n return self.data_table_entry\n \n- def fetch_db_versions(self, db_version="latest"):\n+ def fetch_db_versions(self):\n """\n List bakta database info related to the db_version selected\n """\n- if self.test_mode is True:\n+\n+ if self.test_mode:\n self.DB_VERSIONS_URL = self.DB_TEST_URL\n try:\n with requests.get(self.DB_VERSIONS_URL) as resp:\n@@ -55,38 +68,43 @@\n except IOError as e:\n print(e, file=sys.stderr)\n raise e\n+\n+ if self.db_version == "latest":\n+ db_date_list = []\n+ for db_dic in versions:\n+ db_date_list.append(\n+ datetime.strptime(db_dic["date"], "%Y-%m-%d").date()\n+ )\n+ filtered_version = max(versions, key=lambda x: x["date"])\n else:\n- if db_version == "latest":\n- db_date_list = []\n- for db_dic in versions:\n- db_date_list.append(datetime.strptime(db_dic["date"],\n- \'%Y-%m-%d\').date())\n- filtered_version = max(versions, key=lambda x: x[\'date\'])\n- else:\n- filtered_version = None\n- for item in versions:\n- if \'{0}.{1}\'.format(item["major"], item["minor"]) == db_version:\n- filtered_version = item\n- break\n- if filtered_version is None:\n- print("No matching version detected in the list")\n- if filtered_version is not None:\n- self.db_url = f"https://zenodo.org/record/" \\\n- f"{filtered_version[\'record\']}/files/db.tar.gz"\n- self.db_version = db_version\n- '..b'n("rb") as fh:\n data = fh.read(buffer_size)\n while data:\n md5.update(data)\n data = fh.read(buffer_size)\n if md5.hexdigest() == self.md5:\n- print(\'\\t...md5 control database OK\')\n+ print("\\t...md5 control database OK")\n else:\n- print(f"Error: corrupt database file! "\n- f"calculated md5 = {md5.hexdigest()}"\n- f" different from {self.md5} ")\n-\n-\n-"""\n-This is the method to download the amrfinderplus database need by bakta.\n-Deprecated to use the amrfinderplus data_manager\n- def update_amrfinderplus_db(self):\n- amrfinderplus_db_path = f"{self.db_dir}/{self.db_name}/db/amrfinderplus-db"\n- if self.db_version == "test":\n- cmd = [\n- \'amrfinder_update\',\n- \'--database\', str(amrfinderplus_db_path),\n- \'--force_update\',\n- \'--help\'\n- ]\n- else:\n- cmd = [\n- \'amrfinder_update\',\n- \'--database\', str(amrfinderplus_db_path),\n- \'--force_update\'\n- ]\n- proc = sp.run(\n- cmd,\n- universal_newlines=True\n- )\n- if proc.returncode != 0:\n- print(f"ERROR: AMRFinderPlus failed! "\n- f"command: \'amrfinder_update --force_update"\n- f" --database {amrfinderplus_db_path}\'")\n- else:\n- print("AMRFinderPlus database download")\n-"""\n+ print(\n+ f"Error: corrupt database file! "\n+ f"calculated md5 = {md5.hexdigest()}"\n+ f" different from {self.md5} "\n+ )\n \n \n def parse_arguments():\n # parse options and arguments\n arg_parser = argparse.ArgumentParser()\n arg_parser.add_argument("data_manager_json")\n- arg_parser.add_argument("-d", "--database_version",\n- help=\'Select the database version \'\n- \'(major and minor eg. 4.0),\'\n- \'default is the latest version\',\n- default="latest",\n- required=True)\n- arg_parser.add_argument("-t", "--test", action=\'store_true\',\n- help="option to test the script with an empty database")\n+ arg_parser.add_argument(\n+ "-d",\n+ "--database_version",\n+ help="Select the database version "\n+ "(major and minor eg. 4.0),"\n+ "default is the latest version",\n+ default="latest",\n+ required=True,\n+ )\n+ arg_parser.add_argument(\n+ "-t",\n+ "--test",\n+ action="store_true",\n+ help="option to test the script with an empty database",\n+ )\n return arg_parser.parse_args()\n \n \n@@ -209,11 +205,13 @@\n all_args = parse_arguments()\n with open(all_args.data_manager_json) as fh:\n params = json.load(fh)\n- target_dir = params[\'output_data\'][0][\'extra_files_path\']\n+ target_dir = params["output_data"][0]["extra_files_path"]\n os.makedirs(target_dir)\n # init the class to download bakta db\n- bakta_upload = InstallBaktaDatabase(test_mode=all_args.test)\n- bakta_db = bakta_upload.fetch_db_versions(db_version=all_args.database_version)\n+ bakta_upload = InstallBaktaDatabase(\n+ test_mode=all_args.test, db_version=all_args.database_version\n+ )\n+ bakta_db = bakta_upload.fetch_db_versions()\n # update the path for galaxy\n bakta_upload.db_dir = target_dir\n # download the database\n@@ -224,9 +222,9 @@\n bakta_upload.untar()\n # make the data_manager metadata\n bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db)\n- with open(all_args.data_manager_json, \'w\') as fh:\n+ with open(all_args.data_manager_json, "w") as fh:\n json.dump(bakta_data_manager, fh, sort_keys=True)\n \n \n-if __name__ == \'__main__\':\n+if __name__ == "__main__":\n main()\n' |
b |
diff -r adfd6bf710bd -r 3e73c97f025d data_manager/bakta_build_database.xml --- a/data_manager/bakta_build_database.xml Sun Apr 16 08:29:25 2023 +0000 +++ b/data_manager/bakta_build_database.xml Fri Jun 23 21:37:05 2023 +0000 |
[ |
@@ -20,6 +20,8 @@ <option value="3.0">V3.0_2021-08-05</option> <option value="3.1">V3.1_2022-02-03</option> <option value="4.0">V4.0_2022-08-29</option> + <option value="5.0">V5.0_2023-02-20</option> + <option value="5.0_light">V5.0_light_2023-02-20</option> </param> <param name="test_data_manager" type="hidden" value=""/> </inputs> @@ -31,13 +33,19 @@ <test expect_num_outputs="1"> <param name="test_data_manager" value="--test"/> <param name="database_select" value="1.0"/> - <output name="output_file" value="bakta_test_data_manager.json" /> + <output name="output_file" value="bakta_test_data_manager1.json" /> </test> <!-- Test 2 with the latest option --> <test expect_num_outputs="1"> <param name="test_data_manager" value="--test"/> <param name="database_select" value="latest"/> - <output name="output_file" value="bakta_test_data_manager_test2.json" /> + <output name="output_file" value="bakta_test_data_manager2.json" /> + </test> + <!-- Test 3 with light db --> + <test expect_num_outputs="1"> + <param name="test_data_manager" value="--test"/> + <param name="database_select" value="5.0_light"/> + <output name="output_file" value="bakta_test_data_manager3.json" /> </test> </tests> <help><![CDATA[ |
b |
diff -r adfd6bf710bd -r 3e73c97f025d data_manager/macro.xml --- a/data_manager/macro.xml Sun Apr 16 08:29:25 2023 +0000 +++ b/data_manager/macro.xml Fri Jun 23 21:37:05 2023 +0000 |
b |
@@ -1,8 +1,8 @@ <macros> - <token name="@TOOL_VERSION@">1.5.1</token> + <token name="@TOOL_VERSION@">1.8.1</token> <token name="@REQUESTS_VERSION@">2.27.1</token> <token name="@PYTHON_VERSION@">3.8</token> - <token name="@VERSION_SUFFIX@">0</token> + <token name="@VERSION_SUFFIX@">1</token> <token name="@PROFILE@">21.05</token> <xml name="requirements"> <requirements> |
b |
diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test.loc --- a/test-data/bakta_test.loc Sun Apr 16 08:29:25 2023 +0000 +++ b/test-data/bakta_test.loc Fri Jun 23 21:37:05 2023 +0000 |
b |
@@ -1,9 +1,6 @@ -# this is a tab separated file describing the location of bakta database -# -# the columns are: -# value, dbkey, bakta_version, path -# -# for example -7197299 V0.0_date_test 0.0 ${__HERE__} -V1.0_2022-10-12 7197299 1.4 /tmp/tmpiyh6lcqw/galaxy-dev/tool-data/bakta_database/7197299 -V2.0_2022-11-25 7360139 1.5 /tmp/tmpiyh6lcqw/galaxy-dev/tool-data/bakta_database/7360139 +V1.0_2022-10-12 7197299 1.4 /tmp/tmpq5t7s3c5/galaxy-dev/tool-data/bakta_database/7197299 +V5.0_2023-06-08 8021027 1.8 /tmp/tmpq5t7s3c5/galaxy-dev/tool-data/bakta_database/8021027 +V5.0light_2023-06-08 8021027 1.8 /tmp/tmpq5t7s3c5/galaxy-dev/tool-data/bakta_database/8021027 +V1.0_2022-10-12 7197299 1.4 /tmp/tmpydhjlpxl/galaxy-dev/tool-data/bakta_database/7197299 +V5.0_2023-06-08 8021027 1.8 /tmp/tmpydhjlpxl/galaxy-dev/tool-data/bakta_database/8021027 +V5.0light_2023-06-08 8021027 1.8 /tmp/tmpydhjlpxl/galaxy-dev/tool-data/bakta_database/8021027 |
b |
diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager.json --- a/test-data/bakta_test_data_manager.json Sun Apr 16 08:29:25 2023 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,1 +0,0 @@ -{"data_tables": {"bakta_database": [{"bakta_version": "1.4", "dbkey": "7197299", "path": "db", "value": "V1.0_2022-10-12"}]}} \ No newline at end of file |
b |
diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager1.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bakta_test_data_manager1.json Fri Jun 23 21:37:05 2023 +0000 |
[ |
@@ -0,0 +1,1 @@ +{"data_tables": {"bakta_database": [{"bakta_version": "1.4", "dbkey": "7197299", "path": "db", "value": "V1.0_2022-10-12"}]}} \ No newline at end of file |
b |
diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager2.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bakta_test_data_manager2.json Fri Jun 23 21:37:05 2023 +0000 |
[ |
@@ -0,0 +1,1 @@ +{"data_tables": {"bakta_database": [{"bakta_version": "1.8", "dbkey": "8021027", "path": "db", "value": "V5.0_2023-06-08"}]}} \ No newline at end of file |
b |
diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager3.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bakta_test_data_manager3.json Fri Jun 23 21:37:05 2023 +0000 |
[ |
@@ -0,0 +1,1 @@ +{"data_tables": {"bakta_database": [{"bakta_version": "1.8", "dbkey": "8021027", "path": "db", "value": "V5.0light_2023-06-08"}]}} \ No newline at end of file |
b |
diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager_test2.json --- a/test-data/bakta_test_data_manager_test2.json Sun Apr 16 08:29:25 2023 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,1 +0,0 @@ -{"data_tables": {"bakta_database": [{"bakta_version": "1.5", "dbkey": "7360139", "path": "db", "value": "V2.0_2022-11-25"}]}} \ No newline at end of file |
b |
diff -r adfd6bf710bd -r 3e73c97f025d test-data/db-versions.json --- a/test-data/db-versions.json Sun Apr 16 08:29:25 2023 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,26 +0,0 @@ -[ - { - "date": "2022-10-12", - "major": 1, - "minor": 0, - "doi": "10.5281/zenodo.7197299", - "record": "7197299", - "md5": "8b0250c17078742fc12207d4efb0fc1a", - "software-min": { - "major": 1, - "minor": 4 - } - }, - { - "date": "2022-11-25", - "major": 2, - "minor": 0, - "doi": "10.5281/zenodo.7360139", - "record": "7360139", - "md5": "ebdb799a6bd97e56ca359db781ab8bab", - "software-min": { - "major": 1, - "minor": 5 - } - } -] |