Next changeset 1:bb463043c93e (2023-03-02) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_bakta commit fba6deae1d3707e0c14202433d0495e157745afd |
added:
data_manager/bakta_build_database.py data_manager/bakta_build_database.xml data_manager/macro.xml data_manager_conf.xml test-data/bakta_test.loc test-data/bakta_test_data_manager.json test-data/bakta_test_data_manager_test2.json test-data/db-versions.json tool-data/bakta_database.loc tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
b |
diff -r 000000000000 -r a19189a128cb data_manager/bakta_build_database.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/bakta_build_database.py Sat Dec 10 21:52:28 2022 +0000 |
[ |
b'@@ -0,0 +1,232 @@\n+import argparse\n+import hashlib\n+import json\n+import os\n+import sys\n+import tarfile\n+from datetime import datetime\n+from pathlib import Path\n+\n+\n+import requests\n+\n+\n+class GetBaktaDatabaseInfo:\n+ """\n+ Extract bakta database information to make a json file for data_manager\n+ """\n+\n+ def __init__(self,\n+ data_table_name="bakta_database",\n+ db_name=Path.cwd().joinpath("db"),\n+ db_version="latest",\n+ test_mode=False):\n+ self.bakta_table_list = None\n+ self.db_url = None\n+ self.data_table_entry = None\n+ self.data_table_name = data_table_name\n+ self.db_name = db_name\n+ self.db_version = db_version\n+ self.DB_VERSIONS_URL = \'https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json\'\n+ self.DB_TEST_URL = \'https://zenodo.org/record/7360542/files/db-versions.json\'\n+ self.test_mode = test_mode\n+\n+ def get_data_table_format(self):\n+ """\n+ Skeleton of a data_table format\n+ return: a data table formated for json output\n+ """\n+ self.data_table_entry = {\n+ "data_tables": {\n+ self.data_table_name: {}\n+ }\n+ }\n+ return self.data_table_entry\n+\n+ def fetch_db_versions(self, db_version="latest"):\n+ """\n+ List bakta database info related to the db_version selected\n+ """\n+ if self.test_mode is True:\n+ self.DB_VERSIONS_URL = self.DB_TEST_URL\n+ try:\n+ with requests.get(self.DB_VERSIONS_URL) as resp:\n+ versions = json.loads(resp.content)\n+ except IOError as e:\n+ print(e, file=sys.stderr)\n+ raise e\n+ else:\n+ if db_version == "latest":\n+ db_date_list = []\n+ for db_dic in versions:\n+ db_date_list.append(datetime.strptime(db_dic["date"],\n+ \'%Y-%m-%d\').date())\n+ filtered_version = max(versions, key=lambda x: x[\'date\'])\n+ else:\n+ filtered_version = None\n+ for item in versions:\n+ if \'{0}.{1}\'.format(item["major"], item["minor"]) == db_version:\n+ filtered_version = item\n+ break\n+ if filtered_version is None:\n+ print("No matching version detected in the list")\n+ if filtered_version is not None:\n+ self.db_url = f"https://zenodo.org/record/" \\\n+ f"{filtered_version[\'record\']}/files/db.tar.gz"\n+ self.db_version = db_version\n+ return filtered_version\n+\n+ def get_data_manager(self, bakta_database_info):\n+ self.bakta_table_list = self.get_data_table_format()\n+ bakta_value = f"V{bakta_database_info[\'major\']}." \\\n+ f"{bakta_database_info[\'minor\']}_" \\\n+ f"{bakta_database_info[\'date\']}"\n+ tool_version = str(f"{bakta_database_info[\'software-min\'][\'major\']}."\n+ f"{bakta_database_info[\'software-min\'][\'minor\']}")\n+ data_info = dict(value=bakta_database_info[\'record\'],\n+ dbkey=bakta_value,\n+ bakta_version=tool_version,\n+ path="db")\n+ self.bakta_table_list["data_tables"][self.data_table_name] = [data_info]\n+ return self.bakta_table_list\n+\n+\n+class InstallBaktaDatabase(GetBaktaDatabaseInfo):\n+ """\n+ Download the bakta database,\n+ check md5 sum,\n+ untar the download db and update for the amrfinderplus database\n+ """\n+\n+ def __init__(self,\n+ db_dir=Path.cwd(),\n+ db_name="bakta",\n+ tarball_name="db.tar.gz",\n+ test_mode=False):\n+ super().__init__()\n+ self.md5 = None\n+ self.db_dir'..b'except IOError:\n+ print(f\'ERROR: Could not download file from Zenodo!\'\n+ f\' url={self.db_url}, path={self.tarball_name}\')\n+\n+ def untar(self):\n+ db_path = Path(self.db_dir).as_posix()\n+ try:\n+ with self.tarball_path.open(\'rb\') as fh_in, \\\n+ tarfile.open(fileobj=fh_in, mode=\'r:gz\') as tar_file:\n+ tar_file.extractall(path=db_path)\n+ print(f\'Untar the database in {db_path}\')\n+ return db_path\n+ except OSError:\n+ sys.exit(f\'ERROR: Could not extract {self.tarball_name} \'\n+ f\'to {self.db_name}\')\n+\n+ def calc_md5_sum(self, buffer_size=1048576):\n+ tarball_path = Path(self.db_dir).joinpath(self.tarball_name)\n+ self.md5 = self.fetch_db_versions(db_version=self.db_version)["md5"]\n+ md5 = hashlib.md5()\n+ with tarball_path.open(\'rb\') as fh:\n+ data = fh.read(buffer_size)\n+ while data:\n+ md5.update(data)\n+ data = fh.read(buffer_size)\n+ if md5.hexdigest() == self.md5:\n+ print(\'\\t...md5 control database OK\')\n+ else:\n+ print(f"Error: corrupt database file! "\n+ f"calculated md5 = {md5.hexdigest()}"\n+ f" different from {self.md5} ")\n+\n+\n+"""\n+This is the method to download the amrfinderplus database need by bakta.\n+Deprecated to use the amrfinderplus data_manager\n+ def update_amrfinderplus_db(self):\n+ amrfinderplus_db_path = f"{self.db_dir}/{self.db_name}/db/amrfinderplus-db"\n+ if self.db_version == "test":\n+ cmd = [\n+ \'amrfinder_update\',\n+ \'--database\', str(amrfinderplus_db_path),\n+ \'--force_update\',\n+ \'--help\'\n+ ]\n+ else:\n+ cmd = [\n+ \'amrfinder_update\',\n+ \'--database\', str(amrfinderplus_db_path),\n+ \'--force_update\'\n+ ]\n+ proc = sp.run(\n+ cmd,\n+ universal_newlines=True\n+ )\n+ if proc.returncode != 0:\n+ print(f"ERROR: AMRFinderPlus failed! "\n+ f"command: \'amrfinder_update --force_update"\n+ f" --database {amrfinderplus_db_path}\'")\n+ else:\n+ print("AMRFinderPlus database download")\n+"""\n+\n+\n+def parse_arguments():\n+ # parse options and arguments\n+ arg_parser = argparse.ArgumentParser()\n+ arg_parser.add_argument("data_manager_json")\n+ arg_parser.add_argument("-d", "--database_version",\n+ help=\'Select the database version \'\n+ \'(major and minor eg. 4.0),\'\n+ \'default is the latest version\',\n+ default="latest",\n+ required=True)\n+ arg_parser.add_argument("-t", "--test", action=\'store_true\',\n+ help="option to test the script with an empty database")\n+ return arg_parser.parse_args()\n+\n+\n+def main():\n+ all_args = parse_arguments()\n+ with open(all_args.data_manager_json) as fh:\n+ params = json.load(fh)\n+ target_dir = params[\'output_data\'][0][\'extra_files_path\']\n+ os.makedirs(target_dir)\n+ # init the class to download bakta db\n+ bakta_upload = InstallBaktaDatabase(test_mode=all_args.test)\n+ bakta_db = bakta_upload.fetch_db_versions(db_version=all_args.database_version)\n+ # update the path for galaxy\n+ bakta_upload.db_dir = target_dir\n+ # download the database\n+ bakta_upload.download()\n+ # check md5 sum\n+ bakta_upload.calc_md5_sum()\n+ # untar db\n+ bakta_upload.untar()\n+ # make the data_manager metadata\n+ bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db)\n+ with open(all_args.data_manager_json, \'w\') as fh:\n+ json.dump(bakta_data_manager, fh, sort_keys=True)\n+\n+\n+if __name__ == \'__main__\':\n+ main()\n' |
b |
diff -r 000000000000 -r a19189a128cb data_manager/bakta_build_database.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/bakta_build_database.xml Sat Dec 10 21:52:28 2022 +0000 |
[ |
@@ -0,0 +1,49 @@ +<tool id="bakta_build_database" name="Bakta" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>Bakta database builder</description> + <macros> + <import>macro.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"> + <![CDATA[ + python '$__tool_directory__/bakta_build_database.py' + '$output_file' + --database_version '$database_select' + $test_data_manager + ]]></command> + <inputs> + <param name="database_select" type="select" label="Database version" help="Choose a database version to download (default latest version)"> + <option value="latest" selected="true">Latest available version</option> + <option value="1.0">V1.0_2020-11-20</option> + <option value="1.1">V1.1_2020-12-18</option> + <option value="2.0">V2.0_2021-04-05</option> + <option value="3.0">V3.0_2021-08-05</option> + <option value="3.1">V3.1_2022-02-03</option> + <option value="4.0">V4.0_2022-08-29</option> + </param> + <param name="test_data_manager" type="hidden" value=""/> + </inputs> + <outputs> + <data name="output_file" format="data_manager_json"/> + </outputs> + <tests> + <!-- Test 1 with version 1.0 --> + <test expect_num_outputs="1"> + <param name="test_data_manager" value="--test"/> + <param name="database_select" value="1.0"/> + <output name="output_file" value="bakta_test_data_manager.json" /> + </test> + <!-- Test 2 with the latest option --> + <test expect_num_outputs="1"> + <param name="test_data_manager" value="--test"/> + <param name="database_select" value="latest"/> + <output name="output_file" value="bakta_test_data_manager_test2.json" /> + </test> + </tests> + <help><![CDATA[ + Download specific version of Bakta database <https://github.com/oschwengers/bakta#database> + ]]></help> + <citations> + <citation type="doi">10.1099/mgen.0.000685</citation> + </citations> +</tool> |
b |
diff -r 000000000000 -r a19189a128cb data_manager/macro.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/macro.xml Sat Dec 10 21:52:28 2022 +0000 |
b |
@@ -0,0 +1,13 @@ +<macros> + <token name="@TOOL_VERSION@">1.5.1</token> + <token name="@REQUESTS_VERSION@">2.27.1</token> + <token name="@PYTHON_VERSION@">3.8</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">21.05</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@PYTHON_VERSION@">python</requirement> + <requirement type="package" version="@REQUESTS_VERSION@">requests</requirement> + </requirements> + </xml> +</macros> |
b |
diff -r 000000000000 -r a19189a128cb data_manager_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Sat Dec 10 21:52:28 2022 +0000 |
b |
@@ -0,0 +1,19 @@ +<data_managers> + <data_manager tool_file="data_manager/bakta_build_database.xml" id="bakta_build_database" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> + <data_table name="bakta_database"> + <output> + <column name="value"/> + <column name="dbkey"/> + <column name="bakta_version"/> + <column name="path" output_ref="output_file"> + <move type="directory" relativize_symlinks="True"> + <source>${path}</source> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">bakta_database/${value}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/bakta_database/${value}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + </data_manager> +</data_managers> |
b |
diff -r 000000000000 -r a19189a128cb test-data/bakta_test.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bakta_test.loc Sat Dec 10 21:52:28 2022 +0000 |
b |
@@ -0,0 +1,11 @@ +# this is a tab separated file describing the location of bakta database +# +# the columns are: +# value, dbkey, bakta_version, path +# +# for example +7197299 V0.0_date_test 0.0 ${__HERE__} +7197299 V1.0_2022-10-12 1.4 /tmp/tmpxrkfnuec/galaxy-dev/tool-data/bakta_database/7197299 +7360139 V2.0_2022-11-25 1.5 /tmp/tmpxrkfnuec/galaxy-dev/tool-data/bakta_database/7360139 +7197299 V1.0_2022-10-12 1.4 /tmp/tmpwe9n4gyg/galaxy-dev/tool-data/bakta_database/7197299 +7360139 V2.0_2022-11-25 1.5 /tmp/tmpwe9n4gyg/galaxy-dev/tool-data/bakta_database/7360139 |
b |
diff -r 000000000000 -r a19189a128cb test-data/bakta_test_data_manager.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bakta_test_data_manager.json Sat Dec 10 21:52:28 2022 +0000 |
[ |
@@ -0,0 +1,1 @@ +{"data_tables": {"bakta_database": [{"bakta_version": "1.4", "dbkey": "V1.0_2022-10-12", "path": "db", "value": "7197299"}]}} \ No newline at end of file |
b |
diff -r 000000000000 -r a19189a128cb test-data/bakta_test_data_manager_test2.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bakta_test_data_manager_test2.json Sat Dec 10 21:52:28 2022 +0000 |
[ |
@@ -0,0 +1,1 @@ +{"data_tables": {"bakta_database": [{"bakta_version": "1.5", "dbkey": "V2.0_2022-11-25", "path": "db", "value": "7360139"}]}} \ No newline at end of file |
b |
diff -r 000000000000 -r a19189a128cb test-data/db-versions.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/db-versions.json Sat Dec 10 21:52:28 2022 +0000 |
[ |
@@ -0,0 +1,26 @@ +[ + { + "date": "2022-10-12", + "major": 1, + "minor": 0, + "doi": "10.5281/zenodo.7197299", + "record": "7197299", + "md5": "8b0250c17078742fc12207d4efb0fc1a", + "software-min": { + "major": 1, + "minor": 4 + } + }, + { + "date": "2022-11-25", + "major": 2, + "minor": 0, + "doi": "10.5281/zenodo.7360139", + "record": "7360139", + "md5": "ebdb799a6bd97e56ca359db781ab8bab", + "software-min": { + "major": 1, + "minor": 5 + } + } +] |
b |
diff -r 000000000000 -r a19189a128cb tool-data/bakta_database.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/bakta_database.loc Sat Dec 10 21:52:28 2022 +0000 |
b |
@@ -0,0 +1,7 @@ +# this is a tab separated file describing the location of bakta database +# +# the columns are: +# value, dbkey, bakta_version, path +# +# for example +#7197299 V0.0_date_test 0.0 ${__HERE__} |
b |
diff -r 000000000000 -r a19189a128cb tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Sat Dec 10 21:52:28 2022 +0000 |
b |
@@ -0,0 +1,7 @@ +<tables> + <!-- Locations of bakta database in the required format --> + <table name="bakta_database" comment_char="#"> + <columns>value, dbkey, bakta_version, path</columns> + <file path="tool-data/bakta_database.loc" /> + </table> +</tables> |
b |
diff -r 000000000000 -r a19189a128cb tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Sat Dec 10 21:52:28 2022 +0000 |
b |
@@ -0,0 +1,7 @@ +<tables> + <!-- Locations of bakta database in the required format --> + <table name="bakta_database" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, bakta_version, path</columns> + <file path="${__HERE__}/test-data/bakta_test.loc" /> + </table> +</tables> |