changeset 0:a19189a128cb draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_bakta commit fba6deae1d3707e0c14202433d0495e157745afd
author iuc
date Sat, 10 Dec 2022 21:52:28 +0000
parents
children bb463043c93e
files data_manager/bakta_build_database.py data_manager/bakta_build_database.xml data_manager/macro.xml data_manager_conf.xml test-data/bakta_test.loc test-data/bakta_test_data_manager.json test-data/bakta_test_data_manager_test2.json test-data/db-versions.json tool-data/bakta_database.loc tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 11 files changed, 373 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/bakta_build_database.py	Sat Dec 10 21:52:28 2022 +0000
@@ -0,0 +1,232 @@
+import argparse
+import hashlib
+import json
+import os
+import sys
+import tarfile
+from datetime import datetime
+from pathlib import Path
+
+
+import requests
+
+
+class GetBaktaDatabaseInfo:
+    """
+    Extract bakta database information to make a json file for data_manager
+    """
+
+    def __init__(self,
+                 data_table_name="bakta_database",
+                 db_name=Path.cwd().joinpath("db"),
+                 db_version="latest",
+                 test_mode=False):
+        self.bakta_table_list = None
+        self.db_url = None
+        self.data_table_entry = None
+        self.data_table_name = data_table_name
+        self.db_name = db_name
+        self.db_version = db_version
+        self.DB_VERSIONS_URL = 'https://raw.githubusercontent.com/oschwengers/bakta/master/db-versions.json'
+        self.DB_TEST_URL = 'https://zenodo.org/record/7360542/files/db-versions.json'
+        self.test_mode = test_mode
+
+    def get_data_table_format(self):
+        """
+        Skeleton of a data_table format
+        return: a data table formated for json output
+        """
+        self.data_table_entry = {
+            "data_tables": {
+                self.data_table_name: {}
+            }
+        }
+        return self.data_table_entry
+
+    def fetch_db_versions(self, db_version="latest"):
+        """
+        List bakta database info related to the db_version selected
+        """
+        if self.test_mode is True:
+            self.DB_VERSIONS_URL = self.DB_TEST_URL
+        try:
+            with requests.get(self.DB_VERSIONS_URL) as resp:
+                versions = json.loads(resp.content)
+        except IOError as e:
+            print(e, file=sys.stderr)
+            raise e
+        else:
+            if db_version == "latest":
+                db_date_list = []
+                for db_dic in versions:
+                    db_date_list.append(datetime.strptime(db_dic["date"],
+                                                          '%Y-%m-%d').date())
+                filtered_version = max(versions, key=lambda x: x['date'])
+            else:
+                filtered_version = None
+                for item in versions:
+                    if '{0}.{1}'.format(item["major"], item["minor"]) == db_version:
+                        filtered_version = item
+                        break
+                if filtered_version is None:
+                    print("No matching version detected in the list")
+            if filtered_version is not None:
+                self.db_url = f"https://zenodo.org/record/" \
+                              f"{filtered_version['record']}/files/db.tar.gz"
+                self.db_version = db_version
+                return filtered_version
+
+    def get_data_manager(self, bakta_database_info):
+        self.bakta_table_list = self.get_data_table_format()
+        bakta_value = f"V{bakta_database_info['major']}." \
+                      f"{bakta_database_info['minor']}_" \
+                      f"{bakta_database_info['date']}"
+        tool_version = str(f"{bakta_database_info['software-min']['major']}."
+                           f"{bakta_database_info['software-min']['minor']}")
+        data_info = dict(value=bakta_database_info['record'],
+                         dbkey=bakta_value,
+                         bakta_version=tool_version,
+                         path="db")
+        self.bakta_table_list["data_tables"][self.data_table_name] = [data_info]
+        return self.bakta_table_list
+
+
+class InstallBaktaDatabase(GetBaktaDatabaseInfo):
+    """
+    Download the bakta database,
+    check md5 sum,
+    untar the download db and update for the amrfinderplus database
+    """
+
+    def __init__(self,
+                 db_dir=Path.cwd(),
+                 db_name="bakta",
+                 tarball_name="db.tar.gz",
+                 test_mode=False):
+        super().__init__()
+        self.md5 = None
+        self.db_dir = db_dir
+        self.db_name = db_name
+        self.tarball_name = tarball_name
+        self.tarball_path = None
+        self.test_mode = test_mode
+
+    def download(self):
+        self.db_name = f'{self.db_name}_{self.db_version}'
+        bakta_path = Path(self.db_dir).joinpath(self.tarball_name)
+        try:
+            with bakta_path.open('wb') as fh_out, \
+                    requests.get(self.db_url, stream=True) as resp:
+                total_length = resp.headers.get('content-length')
+                if total_length is None:  # no content length header
+                    for data in resp.iter_content(chunk_size=1024 * 1024):
+                        fh_out.write(data)
+                else:
+                    for data in resp.iter_content(chunk_size=1024 * 1024):
+                        fh_out.write(data)
+            print(f'Download bakta database {self.db_version}')
+            self.tarball_path = bakta_path
+        except IOError:
+            print(f'ERROR: Could not download file from Zenodo!'
+                  f' url={self.db_url}, path={self.tarball_name}')
+
+    def untar(self):
+        db_path = Path(self.db_dir).as_posix()
+        try:
+            with self.tarball_path.open('rb') as fh_in, \
+                    tarfile.open(fileobj=fh_in, mode='r:gz') as tar_file:
+                tar_file.extractall(path=db_path)
+                print(f'Untar the database in {db_path}')
+                return db_path
+        except OSError:
+            sys.exit(f'ERROR: Could not extract {self.tarball_name} '
+                     f'to {self.db_name}')
+
+    def calc_md5_sum(self, buffer_size=1048576):
+        tarball_path = Path(self.db_dir).joinpath(self.tarball_name)
+        self.md5 = self.fetch_db_versions(db_version=self.db_version)["md5"]
+        md5 = hashlib.md5()
+        with tarball_path.open('rb') as fh:
+            data = fh.read(buffer_size)
+            while data:
+                md5.update(data)
+                data = fh.read(buffer_size)
+        if md5.hexdigest() == self.md5:
+            print('\t...md5 control database OK')
+        else:
+            print(f"Error: corrupt database file! "
+                  f"calculated md5 = {md5.hexdigest()}"
+                  f" different from {self.md5} ")
+
+
+"""
+This is the method to download the amrfinderplus database need by bakta.
+Deprecated to use the amrfinderplus data_manager
+    def update_amrfinderplus_db(self):
+        amrfinderplus_db_path = f"{self.db_dir}/{self.db_name}/db/amrfinderplus-db"
+        if self.db_version == "test":
+            cmd = [
+                'amrfinder_update',
+                '--database', str(amrfinderplus_db_path),
+                '--force_update',
+                '--help'
+            ]
+        else:
+            cmd = [
+                'amrfinder_update',
+                '--database', str(amrfinderplus_db_path),
+                '--force_update'
+            ]
+        proc = sp.run(
+            cmd,
+            universal_newlines=True
+        )
+        if proc.returncode != 0:
+            print(f"ERROR: AMRFinderPlus failed! "
+                  f"command: 'amrfinder_update --force_update"
+                  f" --database {amrfinderplus_db_path}'")
+        else:
+            print("AMRFinderPlus database download")
+"""
+
+
+def parse_arguments():
+    # parse options and arguments
+    arg_parser = argparse.ArgumentParser()
+    arg_parser.add_argument("data_manager_json")
+    arg_parser.add_argument("-d", "--database_version",
+                            help='Select the database version '
+                                 '(major and minor eg. 4.0),'
+                                 'default is the latest version',
+                            default="latest",
+                            required=True)
+    arg_parser.add_argument("-t", "--test", action='store_true',
+                            help="option to test the script with an empty database")
+    return arg_parser.parse_args()
+
+
+def main():
+    all_args = parse_arguments()
+    with open(all_args.data_manager_json) as fh:
+        params = json.load(fh)
+    target_dir = params['output_data'][0]['extra_files_path']
+    os.makedirs(target_dir)
+    # init the class to download bakta db
+    bakta_upload = InstallBaktaDatabase(test_mode=all_args.test)
+    bakta_db = bakta_upload.fetch_db_versions(db_version=all_args.database_version)
+    # update the path for galaxy
+    bakta_upload.db_dir = target_dir
+    # download the database
+    bakta_upload.download()
+    # check md5 sum
+    bakta_upload.calc_md5_sum()
+    # untar db
+    bakta_upload.untar()
+    # make the data_manager metadata
+    bakta_data_manager = bakta_upload.get_data_manager(bakta_database_info=bakta_db)
+    with open(all_args.data_manager_json, 'w') as fh:
+        json.dump(bakta_data_manager, fh, sort_keys=True)
+
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/bakta_build_database.xml	Sat Dec 10 21:52:28 2022 +0000
@@ -0,0 +1,49 @@
+<tool id="bakta_build_database" name="Bakta" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Bakta database builder</description>
+    <macros>
+        <import>macro.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code">
+      <![CDATA[
+        python '$__tool_directory__/bakta_build_database.py'
+        '$output_file'
+        --database_version '$database_select'
+        $test_data_manager
+      ]]></command>
+    <inputs>
+        <param name="database_select" type="select" label="Database version" help="Choose a database version to download (default latest version)">
+            <option value="latest" selected="true">Latest available version</option>
+            <option value="1.0">V1.0_2020-11-20</option>
+            <option value="1.1">V1.1_2020-12-18</option>
+            <option value="2.0">V2.0_2021-04-05</option>
+            <option value="3.0">V3.0_2021-08-05</option>
+            <option value="3.1">V3.1_2022-02-03</option>
+            <option value="4.0">V4.0_2022-08-29</option>
+        </param>
+        <param name="test_data_manager" type="hidden" value=""/>
+    </inputs>
+    <outputs>
+        <data name="output_file" format="data_manager_json"/>
+    </outputs>
+    <tests>
+        <!-- Test 1 with version 1.0 -->
+        <test expect_num_outputs="1">
+            <param name="test_data_manager" value="--test"/>
+            <param name="database_select" value="1.0"/>
+            <output name="output_file" value="bakta_test_data_manager.json" />
+        </test>
+        <!-- Test 2 with the latest option -->
+        <test expect_num_outputs="1">
+            <param name="test_data_manager" value="--test"/>
+            <param name="database_select" value="latest"/>
+            <output name="output_file" value="bakta_test_data_manager_test2.json" />
+        </test>
+    </tests>
+    <help><![CDATA[
+        Download specific version of Bakta database <https://github.com/oschwengers/bakta#database>
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1099/mgen.0.000685</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/macro.xml	Sat Dec 10 21:52:28 2022 +0000
@@ -0,0 +1,13 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.5.1</token>
+    <token name="@REQUESTS_VERSION@">2.27.1</token>
+    <token name="@PYTHON_VERSION@">3.8</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">21.05</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@PYTHON_VERSION@">python</requirement>
+            <requirement type="package" version="@REQUESTS_VERSION@">requests</requirement>
+        </requirements>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Sat Dec 10 21:52:28 2022 +0000
@@ -0,0 +1,19 @@
+<data_managers>
+    <data_manager tool_file="data_manager/bakta_build_database.xml" id="bakta_build_database" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+        <data_table name="bakta_database">
+            <output>
+                <column name="value"/>
+                <column name="dbkey"/>
+                <column name="bakta_version"/>
+                <column name="path" output_ref="output_file">
+                    <move type="directory" relativize_symlinks="True">
+                        <source>${path}</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">bakta_database/${value}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/bakta_database/${value}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bakta_test.loc	Sat Dec 10 21:52:28 2022 +0000
@@ -0,0 +1,11 @@
+# this is a tab separated file describing the location of bakta database
+#
+# the columns are:
+# value, dbkey, bakta_version, path
+#
+# for example
+7197299	V0.0_date_test	0.0	${__HERE__}
+7197299	V1.0_2022-10-12	1.4	/tmp/tmpxrkfnuec/galaxy-dev/tool-data/bakta_database/7197299
+7360139	V2.0_2022-11-25	1.5	/tmp/tmpxrkfnuec/galaxy-dev/tool-data/bakta_database/7360139
+7197299	V1.0_2022-10-12	1.4	/tmp/tmpwe9n4gyg/galaxy-dev/tool-data/bakta_database/7197299
+7360139	V2.0_2022-11-25	1.5	/tmp/tmpwe9n4gyg/galaxy-dev/tool-data/bakta_database/7360139
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bakta_test_data_manager.json	Sat Dec 10 21:52:28 2022 +0000
@@ -0,0 +1,1 @@
+{"data_tables": {"bakta_database": [{"bakta_version": "1.4", "dbkey": "V1.0_2022-10-12", "path": "db", "value": "7197299"}]}}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bakta_test_data_manager_test2.json	Sat Dec 10 21:52:28 2022 +0000
@@ -0,0 +1,1 @@
+{"data_tables": {"bakta_database": [{"bakta_version": "1.5", "dbkey": "V2.0_2022-11-25", "path": "db", "value": "7360139"}]}}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/db-versions.json	Sat Dec 10 21:52:28 2022 +0000
@@ -0,0 +1,26 @@
+[
+    {
+        "date": "2022-10-12",
+        "major": 1,
+        "minor": 0,
+        "doi": "10.5281/zenodo.7197299",
+        "record": "7197299",
+        "md5": "8b0250c17078742fc12207d4efb0fc1a",
+        "software-min": {
+            "major": 1,
+            "minor": 4
+        }
+    },
+    {
+        "date": "2022-11-25",
+        "major": 2,
+        "minor": 0,
+        "doi": "10.5281/zenodo.7360139",
+        "record": "7360139",
+        "md5": "ebdb799a6bd97e56ca359db781ab8bab",
+        "software-min": {
+            "major": 1,
+            "minor": 5
+        }
+    }
+]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/bakta_database.loc	Sat Dec 10 21:52:28 2022 +0000
@@ -0,0 +1,7 @@
+# this is a tab separated file describing the location of bakta database
+#
+# the columns are:
+# value, dbkey, bakta_version, path
+#
+# for example
+#7197299	V0.0_date_test	0.0	${__HERE__}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Sat Dec 10 21:52:28 2022 +0000
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of bakta database in the required format -->
+    <table name="bakta_database" comment_char="#">
+        <columns>value, dbkey, bakta_version, path</columns>
+        <file path="tool-data/bakta_database.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Sat Dec 10 21:52:28 2022 +0000
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of bakta database in the required format -->
+    <table name="bakta_database" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, bakta_version, path</columns>
+        <file path="${__HERE__}/test-data/bakta_test.loc" />
+    </table>
+</tables>