Repository 'data_manager_bakta'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/data_manager_bakta

Changeset 3:3e73c97f025d (2023-06-23)
Previous changeset 2:adfd6bf710bd (2023-04-16) Next changeset 4:d74850cf4e42 (2023-08-25)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_bakta_database commit 487cb35fe55883ac6eeb8dda58b56c9ca2ec0a85
modified:
data_manager/bakta_build_database.py
data_manager/bakta_build_database.xml
data_manager/macro.xml
test-data/bakta_test.loc
added:
test-data/bakta_test_data_manager1.json
test-data/bakta_test_data_manager2.json
test-data/bakta_test_data_manager3.json
removed:
test-data/bakta_test_data_manager.json
test-data/bakta_test_data_manager_test2.json
test-data/db-versions.json
b
diff -r adfd6bf710bd -r 3e73c97f025d data_manager/bakta_build_database.py
--- a/data_manager/bakta_build_database.py Sun Apr 16 08:29:25 2023 +0000
+++ b/data_manager/bakta_build_database.py Fri Jun 23 21:37:05 2023 +0000
[
b'@@ -2,6 +2,7 @@\n import hashlib\n import json\n import os\n+import re\n import sys\n import tarfile\n from datetime import datetime\n@@ -16,38 +17,50 @@\n     Extract bakta database information to make a json file for data_manager\n     """\n \n-    def __init__(self,\n-                 data_table_name="bakta_database",\n-                 db_name=Path.cwd().joinpath("db"),\n-                 db_version="latest",\n-                 test_mode=False):\n+    def __init__(\n+        self,\n+        data_table_name="bakta_database",\n+        db_name=Path.cwd().joinpath("db"),\n+        db_version="latest",\n+        tarball_name="db.tar.gz",\n+        test_mode=False,\n+    ):\n         self.bakta_table_list = None\n         self.db_url = None\n+        self.db_type = ""\n         self.data_table_entry = None\n         self.data_table_name = data_table_name\n         self.db_name = db_name\n+        self.tar_name = tarball_name\n         self.db_version = db_version\n-        self.DB_VERSIONS_URL = \'https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json\'\n-        self.DB_TEST_URL = \'https://zenodo.org/record/7360542/files/db-versions.json\'\n+        self.DB_VERSIONS_URL = "https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json"\n+        self.DB_TEST_URL = "https://zenodo.org/record/8021032/files/db-versions.json"\n         self.test_mode = test_mode\n \n+    def get_database_type(self):\n+        self.light_db = bool(re.search(pattern="light", string=self.db_version))\n+        self.db_version = self.db_version.split(sep="_")[0]\n+        if self.light_db:\n+            self.db_type = "light"\n+            self.tar_name = "db-light.tar.gz"\n+            self.md5 = self.fetch_db_versions()["md5-light"]\n+        else:\n+            self.md5 = self.fetch_db_versions()["md5"]\n+\n     def get_data_table_format(self):\n         """\n         Skeleton of a data_table format\n         return: a data table formated for json output\n         """\n-        self.data_table_entry = {\n-            "data_tables": {\n-                self.data_table_name: {}\n-            }\n-        }\n+        self.data_table_entry = {"data_tables": {self.data_table_name: {}}}\n         return self.data_table_entry\n \n-    def fetch_db_versions(self, db_version="latest"):\n+    def fetch_db_versions(self):\n         """\n         List bakta database info related to the db_version selected\n         """\n-        if self.test_mode is True:\n+\n+        if self.test_mode:\n             self.DB_VERSIONS_URL = self.DB_TEST_URL\n         try:\n             with requests.get(self.DB_VERSIONS_URL) as resp:\n@@ -55,38 +68,43 @@\n         except IOError as e:\n             print(e, file=sys.stderr)\n             raise e\n+\n+        if self.db_version == "latest":\n+            db_date_list = []\n+            for db_dic in versions:\n+                db_date_list.append(\n+                    datetime.strptime(db_dic["date"], "%Y-%m-%d").date()\n+                )\n+            filtered_version = max(versions, key=lambda x: x["date"])\n         else:\n-            if db_version == "latest":\n-                db_date_list = []\n-                for db_dic in versions:\n-                    db_date_list.append(datetime.strptime(db_dic["date"],\n-                                                          \'%Y-%m-%d\').date())\n-                filtered_version = max(versions, key=lambda x: x[\'date\'])\n-            else:\n-                filtered_version = None\n-                for item in versions:\n-                    if \'{0}.{1}\'.format(item["major"], item["minor"]) == db_version:\n-                        filtered_version = item\n-                        break\n-                if filtered_version is None:\n-                    print("No matching version detected in the list")\n-            if filtered_version is not None:\n-                self.db_url = f"https://zenodo.org/record/" \\\n-                              f"{filtered_version[\'record\']}/files/db.tar.gz"\n-                self.db_version = db_version\n-           '..b'n("rb") as fh:\n             data = fh.read(buffer_size)\n             while data:\n                 md5.update(data)\n                 data = fh.read(buffer_size)\n         if md5.hexdigest() == self.md5:\n-            print(\'\\t...md5 control database OK\')\n+            print("\\t...md5 control database OK")\n         else:\n-            print(f"Error: corrupt database file! "\n-                  f"calculated md5 = {md5.hexdigest()}"\n-                  f" different from {self.md5} ")\n-\n-\n-"""\n-This is the method to download the amrfinderplus database need by bakta.\n-Deprecated to use the amrfinderplus data_manager\n-    def update_amrfinderplus_db(self):\n-        amrfinderplus_db_path = f"{self.db_dir}/{self.db_name}/db/amrfinderplus-db"\n-        if self.db_version == "test":\n-            cmd = [\n-                \'amrfinder_update\',\n-                \'--database\', str(amrfinderplus_db_path),\n-                \'--force_update\',\n-                \'--help\'\n-            ]\n-        else:\n-            cmd = [\n-                \'amrfinder_update\',\n-                \'--database\', str(amrfinderplus_db_path),\n-                \'--force_update\'\n-            ]\n-        proc = sp.run(\n-            cmd,\n-            universal_newlines=True\n-        )\n-        if proc.returncode != 0:\n-            print(f"ERROR: AMRFinderPlus failed! "\n-                  f"command: \'amrfinder_update --force_update"\n-                  f" --database {amrfinderplus_db_path}\'")\n-        else:\n-            print("AMRFinderPlus database download")\n-"""\n+            print(\n+                f"Error: corrupt database file! "\n+                f"calculated md5 = {md5.hexdigest()}"\n+                f" different from {self.md5} "\n+            )\n \n \n def parse_arguments():\n     # parse options and arguments\n     arg_parser = argparse.ArgumentParser()\n     arg_parser.add_argument("data_manager_json")\n-    arg_parser.add_argument("-d", "--database_version",\n-                            help=\'Select the database version \'\n-                                 \'(major and minor eg. 4.0),\'\n-                                 \'default is the latest version\',\n-                            default="latest",\n-                            required=True)\n-    arg_parser.add_argument("-t", "--test", action=\'store_true\',\n-                            help="option to test the script with an empty database")\n+    arg_parser.add_argument(\n+        "-d",\n+        "--database_version",\n+        help="Select the database version "\n+        "(major and minor eg. 4.0),"\n+        "default is the latest version",\n+        default="latest",\n+        required=True,\n+    )\n+    arg_parser.add_argument(\n+        "-t",\n+        "--test",\n+        action="store_true",\n+        help="option to test the script with an empty database",\n+    )\n     return arg_parser.parse_args()\n \n \n@@ -209,11 +205,13 @@\n     all_args = parse_arguments()\n     with open(all_args.data_manager_json) as fh:\n         params = json.load(fh)\n-    target_dir = params[\'output_data\'][0][\'extra_files_path\']\n+    target_dir = params["output_data"][0]["extra_files_path"]\n     os.makedirs(target_dir)\n     # init the class to download bakta db\n-    bakta_upload = InstallBaktaDatabase(test_mode=all_args.test)\n-    bakta_db = bakta_upload.fetch_db_versions(db_version=all_args.database_version)\n+    bakta_upload = InstallBaktaDatabase(\n+        test_mode=all_args.test, db_version=all_args.database_version\n+    )\n+    bakta_db = bakta_upload.fetch_db_versions()\n     # update the path for galaxy\n     bakta_upload.db_dir = target_dir\n     # download the database\n@@ -224,9 +222,9 @@\n     bakta_upload.untar()\n     # make the data_manager metadata\n     bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db)\n-    with open(all_args.data_manager_json, \'w\') as fh:\n+    with open(all_args.data_manager_json, "w") as fh:\n         json.dump(bakta_data_manager, fh, sort_keys=True)\n \n \n-if __name__ == \'__main__\':\n+if __name__ == "__main__":\n     main()\n'
b
diff -r adfd6bf710bd -r 3e73c97f025d data_manager/bakta_build_database.xml
--- a/data_manager/bakta_build_database.xml Sun Apr 16 08:29:25 2023 +0000
+++ b/data_manager/bakta_build_database.xml Fri Jun 23 21:37:05 2023 +0000
[
@@ -20,6 +20,8 @@
             <option value="3.0">V3.0_2021-08-05</option>
             <option value="3.1">V3.1_2022-02-03</option>
             <option value="4.0">V4.0_2022-08-29</option>
+            <option value="5.0">V5.0_2023-02-20</option>
+            <option value="5.0_light">V5.0_light_2023-02-20</option>
         </param>
         <param name="test_data_manager" type="hidden" value=""/>
     </inputs>
@@ -31,13 +33,19 @@
         <test expect_num_outputs="1">
             <param name="test_data_manager" value="--test"/>
             <param name="database_select" value="1.0"/>
-            <output name="output_file" value="bakta_test_data_manager.json" />
+            <output name="output_file" value="bakta_test_data_manager1.json" />
         </test>
         <!-- Test 2 with the latest option -->
         <test expect_num_outputs="1">
             <param name="test_data_manager" value="--test"/>
             <param name="database_select" value="latest"/>
-            <output name="output_file" value="bakta_test_data_manager_test2.json" />
+            <output name="output_file" value="bakta_test_data_manager2.json" />
+        </test>
+        <!-- Test 3 with light db -->
+        <test expect_num_outputs="1">
+            <param name="test_data_manager" value="--test"/>
+            <param name="database_select" value="5.0_light"/>
+            <output name="output_file" value="bakta_test_data_manager3.json" />
         </test>
     </tests>
     <help><![CDATA[
b
diff -r adfd6bf710bd -r 3e73c97f025d data_manager/macro.xml
--- a/data_manager/macro.xml Sun Apr 16 08:29:25 2023 +0000
+++ b/data_manager/macro.xml Fri Jun 23 21:37:05 2023 +0000
b
@@ -1,8 +1,8 @@
 <macros>
-    <token name="@TOOL_VERSION@">1.5.1</token>
+    <token name="@TOOL_VERSION@">1.8.1</token>
     <token name="@REQUESTS_VERSION@">2.27.1</token>
     <token name="@PYTHON_VERSION@">3.8</token>
-    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@VERSION_SUFFIX@">1</token>
     <token name="@PROFILE@">21.05</token>
     <xml name="requirements">
         <requirements>
b
diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test.loc
--- a/test-data/bakta_test.loc Sun Apr 16 08:29:25 2023 +0000
+++ b/test-data/bakta_test.loc Fri Jun 23 21:37:05 2023 +0000
b
@@ -1,9 +1,6 @@
-# this is a tab separated file describing the location of bakta database
-#
-# the columns are:
-# value, dbkey, bakta_version, path
-#
-# for example
-7197299 V0.0_date_test 0.0 ${__HERE__}
-V1.0_2022-10-12 7197299 1.4 /tmp/tmpiyh6lcqw/galaxy-dev/tool-data/bakta_database/7197299
-V2.0_2022-11-25 7360139 1.5 /tmp/tmpiyh6lcqw/galaxy-dev/tool-data/bakta_database/7360139
+V1.0_2022-10-12 7197299 1.4 /tmp/tmpq5t7s3c5/galaxy-dev/tool-data/bakta_database/7197299
+V5.0_2023-06-08 8021027 1.8 /tmp/tmpq5t7s3c5/galaxy-dev/tool-data/bakta_database/8021027
+V5.0light_2023-06-08 8021027 1.8 /tmp/tmpq5t7s3c5/galaxy-dev/tool-data/bakta_database/8021027
+V1.0_2022-10-12 7197299 1.4 /tmp/tmpydhjlpxl/galaxy-dev/tool-data/bakta_database/7197299
+V5.0_2023-06-08 8021027 1.8 /tmp/tmpydhjlpxl/galaxy-dev/tool-data/bakta_database/8021027
+V5.0light_2023-06-08 8021027 1.8 /tmp/tmpydhjlpxl/galaxy-dev/tool-data/bakta_database/8021027
b
diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager.json
--- a/test-data/bakta_test_data_manager.json Sun Apr 16 08:29:25 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,1 +0,0 @@
-{"data_tables": {"bakta_database": [{"bakta_version": "1.4", "dbkey": "7197299", "path": "db", "value": "V1.0_2022-10-12"}]}}
\ No newline at end of file
b
diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager1.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bakta_test_data_manager1.json Fri Jun 23 21:37:05 2023 +0000
[
@@ -0,0 +1,1 @@
+{"data_tables": {"bakta_database": [{"bakta_version": "1.4", "dbkey": "7197299", "path": "db", "value": "V1.0_2022-10-12"}]}}
\ No newline at end of file
b
diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager2.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bakta_test_data_manager2.json Fri Jun 23 21:37:05 2023 +0000
[
@@ -0,0 +1,1 @@
+{"data_tables": {"bakta_database": [{"bakta_version": "1.8", "dbkey": "8021027", "path": "db", "value": "V5.0_2023-06-08"}]}}
\ No newline at end of file
b
diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager3.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bakta_test_data_manager3.json Fri Jun 23 21:37:05 2023 +0000
[
@@ -0,0 +1,1 @@
+{"data_tables": {"bakta_database": [{"bakta_version": "1.8", "dbkey": "8021027", "path": "db", "value": "V5.0light_2023-06-08"}]}}
\ No newline at end of file
b
diff -r adfd6bf710bd -r 3e73c97f025d test-data/bakta_test_data_manager_test2.json
--- a/test-data/bakta_test_data_manager_test2.json Sun Apr 16 08:29:25 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,1 +0,0 @@
-{"data_tables": {"bakta_database": [{"bakta_version": "1.5", "dbkey": "7360139", "path": "db", "value": "V2.0_2022-11-25"}]}}
\ No newline at end of file
b
diff -r adfd6bf710bd -r 3e73c97f025d test-data/db-versions.json
--- a/test-data/db-versions.json Sun Apr 16 08:29:25 2023 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,26 +0,0 @@
-[
-    {
-        "date": "2022-10-12",
-        "major": 1,
-        "minor": 0,
-        "doi": "10.5281/zenodo.7197299",
-        "record": "7197299",
-        "md5": "8b0250c17078742fc12207d4efb0fc1a",
-        "software-min": {
-            "major": 1,
-            "minor": 4
-        }
-    },
-    {
-        "date": "2022-11-25",
-        "major": 2,
-        "minor": 0,
-        "doi": "10.5281/zenodo.7360139",
-        "record": "7360139",
-        "md5": "ebdb799a6bd97e56ca359db781ab8bab",
-        "software-min": {
-            "major": 1,
-            "minor": 5
-        }
-    }
-]