Next changeset 1:592ef3959907 (2023-04-16) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_amrfinderplus commit 59077a173599fc9d355a5d36ad7875050dbe3e19 |
added:
data_manager/data_manager_build_amrfinderplus.py data_manager/data_manager_build_amrfinderplus.xml data_manager/macro.xml data_manager_conf.xml test-data/amrfinderplus.loc.test test-data/amrfinderplus_test_data_manager_1.json test-data/amrfinderplus_test_data_manager_2.json tool-data/amrfinderplus.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
b |
diff -r 000000000000 -r eea0c38a9afd data_manager/data_manager_build_amrfinderplus.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/data_manager_build_amrfinderplus.py Thu Jan 05 14:28:07 2023 +0000 |
[ |
b'@@ -0,0 +1,257 @@\n+import argparse\n+import json\n+import os\n+import subprocess as sp\n+from ftplib import FTP\n+from io import BytesIO\n+from pathlib import Path\n+\n+import pandas as pd\n+\n+\n+class GetAmrFinderPlusDataManager:\n+ """\n+ Create the json file with database information for galaxy data manager\n+ """\n+\n+ def __init__(self,\n+ amrfinderplus_database="amrfinderplus_database",\n+ db_name="amrfinderplus-db",\n+ amrfinderplus_version="latest",\n+ date_version=None):\n+ self.data_table_name = amrfinderplus_database\n+ self._db_name = db_name\n+ self._amrfinderplus_version = amrfinderplus_version\n+ self._amrfinderplus_date_version = date_version\n+ self.data_table_entry = None\n+ self.amrfinderplus_table_list = None\n+\n+ def get_data_table_format(self):\n+ """\n+ Skeleton of a data_table format\n+ return: a data table formatted for json output\n+ """\n+ self.data_table_entry = {\n+ "data_tables": {\n+ self.data_table_name: {}\n+ }\n+ }\n+ return self.data_table_entry\n+\n+ def get_data_manager(self):\n+ """\n+ Create the empty data table format and add all the information into\n+ return: The data table with database information\n+ """\n+ self.amrfinderplus_table_list = self.get_data_table_format()\n+ amrfinderplus_value = f"amrfinderplus_V{self._amrfinderplus_version}" \\\n+ f"_{self._amrfinderplus_date_version}"\n+ amrfinderplus_name = f"V{self._amrfinderplus_version}" \\\n+ f"-{self._amrfinderplus_date_version}"\n+ data_info = dict(value=amrfinderplus_value,\n+ name=amrfinderplus_name,\n+ path=self._db_name)\n+ self.amrfinderplus_table_list["data_tables"][self.data_table_name] = [data_info]\n+ return self.amrfinderplus_table_list\n+\n+\n+class DownloadAmrFinderPlusDatabase(GetAmrFinderPlusDataManager):\n+ """\n+ Download the amrfinderplus database from the ncbi.\n+ Make the database available with hmm and indexed files\n+ Build the data manager infos for galaxy\n+ """\n+\n+ def __init__(self,\n+ output_dir=Path.cwd(),\n+ ncbi_url="ftp.ncbi.nlm.nih.gov",\n+ ftp_login="anonymous",\n+ ftp_password="anonymous",\n+ amrfinderplus_database="amrfinderplus_database",\n+ db_name="amrfinderplus-db",\n+ amrfinderplus_version="latest",\n+ json_file_path=None,\n+ date_version=None,\n+ amrfinderplus_db_path=None,\n+ test_mode=False):\n+\n+ super().__init__()\n+ self.json_file_path = json_file_path\n+ self._output_dir = output_dir\n+ self._ncbi_ftp_url = ncbi_url\n+ self._ncbi_database_path = "pathogen/Antimicrobial_resistance/AMRFinderPlus/database"\n+ self._login = ftp_login\n+ self._password = ftp_password\n+ self._amrfinderplus_database = amrfinderplus_database\n+ self._db_name = db_name\n+ self._amrfinderplus_version = amrfinderplus_version\n+ self._amrfinderplus_date_version = date_version\n+ self.species_list = None\n+ self.test_mode = test_mode\n+ self.amrfinderplus_db_path = amrfinderplus_db_path\n+\n+ @staticmethod\n+ def subprocess_cmd(command, *args):\n+ """\n+ Method to call external tools with any parameters\n+ :param command: command name from the tool used (e.g. wget or makeblastdb)\n+ :param args: free number of argument need for the command tool (e.g. -r, -P ...)\n+ :return: launch the command line from the system\n+ """\n+ cmd = [command]\n+ [cmd.append(i) for i in args]\n+ proc = sp.run(cmd, stdout=sp.PIPE, stderr=sp.PIPE)\n+ if proc.returncode != 0:\n+ print('..b'e species")\n+\n+ def make_blastdb(self):\n+ """\n+ Index fasta file for blast\n+ """\n+ self.extract_filelist_makeblast()\n+ nucl_file_db_list = [f\'{self.amrfinderplus_db_path}/AMR_DNA-{specie}\' for specie in self.species_list]\n+ amr_dna = f\'{self.amrfinderplus_db_path}/AMR_CDS\'\n+ amr_prot = f\'{self.amrfinderplus_db_path}/AMRProt\'\n+ os.chdir(self.amrfinderplus_db_path)\n+ if Path(amr_dna).exists():\n+ nucl_file_db_list.append(amr_dna)\n+ else:\n+ print("No file AMR_CDS detected for indexing")\n+ if Path(amr_prot).exists():\n+ self.subprocess_cmd("makeblastdb", "-in", amr_prot, "-dbtype", "prot")\n+ else:\n+ print("No file AMRProt detected for indexing")\n+ [self.subprocess_cmd("makeblastdb", "-in", file, "-dbtype", "nucl") for file in nucl_file_db_list]\n+\n+ def get_amrfinderplus_version(self, version_file="version.txt",\n+ database_version_file="database_format_version.txt"):\n+ """\n+ Check the version when latest if provided and update the number\n+ param version_file: name of the file containing version information\n+ param database_version_file: name of the file containing date version information\n+ """\n+ ftp = FTP(self._ncbi_ftp_url)\n+ ftp.login(self._login, self._password)\n+ ftp.cwd(f"{self._ncbi_database_path}/{self._amrfinderplus_version}")\n+ db_version = BytesIO()\n+ db_date_version = BytesIO()\n+ ftp.retrbinary(f\'RETR {version_file}\', db_version.write)\n+ ftp.retrbinary(f\'RETR {database_version_file}\', db_date_version.write)\n+ self._amrfinderplus_date_version = db_version.getvalue().decode("utf-8").splitlines()[0]\n+ self._amrfinderplus_version = \'.\'.join(\n+ db_date_version.getvalue().decode("utf-8").splitlines()[0].split(".")[:2])\n+\n+ def read_json_input_file(self):\n+ """\n+ Import the json file\n+ """\n+ with open(self.json_file_path) as fh:\n+ params = json.load(fh)\n+ target_dir = params[\'output_data\'][0][\'extra_files_path\']\n+ os.makedirs(target_dir)\n+ self._output_dir = target_dir\n+\n+ def write_json_infos(self):\n+ """\n+ Write in the imported json file\n+ """\n+ with open(self.json_file_path, \'w\') as fh:\n+ json.dump(self.get_data_manager(), fh, sort_keys=True)\n+\n+\n+def parse_arguments():\n+ """\n+ List of arguments provided by the user\n+ return: parsed arguments\n+ """\n+ # parse options and arguments\n+ arg_parser = argparse.ArgumentParser()\n+ arg_parser.add_argument("data_manager_json",\n+ help="json file from galaxy")\n+ arg_parser.add_argument("--db_version", default="latest",\n+ help="select the major version of the database (e.g. 3.10, 3.8), default is latest")\n+ arg_parser.add_argument("--db_date",\n+ help="select the date into the database version (e.g. 2022-10-11.2)")\n+ arg_parser.add_argument("--test", action=\'store_true\',\n+ help="option to test the script with an lighted database")\n+ return arg_parser.parse_args()\n+\n+\n+def main():\n+ all_args = parse_arguments()\n+ amrfinderplus_download = DownloadAmrFinderPlusDatabase(amrfinderplus_version=all_args.db_version,\n+ date_version=all_args.db_date,\n+ json_file_path=all_args.data_manager_json,\n+ test_mode=all_args.test)\n+ amrfinderplus_download.read_json_input_file()\n+ amrfinderplus_download.download_amrfinderplus_db()\n+ amrfinderplus_download.make_hmm_profile()\n+ amrfinderplus_download.make_blastdb()\n+ amrfinderplus_download.write_json_infos()\n+\n+\n+if __name__ == \'__main__\':\n+ main()\n' |
b |
diff -r 000000000000 -r eea0c38a9afd data_manager/data_manager_build_amrfinderplus.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/data_manager_build_amrfinderplus.xml Thu Jan 05 14:28:07 2023 +0000 |
[ |
@@ -0,0 +1,92 @@ +<tool id="data_manager_build_amrfinderplus" name="amrfinderplus_datamanager" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>AMRfinderplus database builder</description> + <macros> + <import>macro.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"> + <![CDATA[ + python '$__tool_directory__/data_manager_build_amrfinderplus.py' + '$output_file' + --db_version '$database_list.database_version_select' + #if $database_list.database_version_select != 'latest': + --db_date '$database_list.database_date_select' + #end if + $test_data_manager + ]]></command> + <inputs> + <conditional name="database_list"> + <param name="database_version_select" type="select" label="Database version"> + <option value="latest" selected="true">Latest available version</option> + <option value="3.10">V3.10</option> + <option value="3.9">V3.9</option> + <option value="3.8">V3.8</option> + <option value="3.6">V3.6</option> + </param> + <when value="latest"> + </when> + <when value="3.10"> + <param name="database_date_select" type="select" label="Date version"> + <option value="2022-10-11.2" selected="true">2022-10-11.2</option> + <option value="2022-08-09.1">2022-08-09.1</option> + <option value="2022-05-26.1">2022-05-26.1</option> + <option value="2022-04-04.1">2022-04-04.1</option> + <option value="2021-12-21.1">2021-12-21.1</option> + <option value="2021-09-30.1">2021-09-30.1</option> + <option value="2021-09-30.1">2021-08-11.1</option> + <option value="2021-09-30.1">2021-06-01.1</option> + <option value="2021-09-30.1">2021-03-01.1</option> + </param> + </when> + <when value="3.9"> + <param name="database_date_select" type="select" label="Date version"> + <option value="2020-11-09.1" selected="true">2020-11-09.1</option> + <option value="2020-12-17.1">2020-12-17.1</option> + </param> + </when> + <when value="3.8"> + <param name="database_date_select" type="select" label="Date version"> + <option value="2020-09-30.1" selected="true">2020-09-30.1</option> + <option value="2020-09-22.2">2020-09-22.2</option> + <option value="2020-07-16.2">2020-07-16.2</option> + <option value="2020-06-11.1">2020-06-11.1</option> + <option value="2020-05-04.1">2020-05-04.1</option> + </param> + </when> + <when value="3.6"> + <param name="database_date_select" type="select" label="Date version"> + <option value="2020-01-22.1" selected="true">2020-01-22.1</option> + <option value="2020-03-20.1">2020-03-20.1</option> + </param> + </when> + </conditional> + <param name="test_data_manager" type="hidden" value=""/> + </inputs> + <outputs> + <data name="output_file" format="data_manager_json"/> + </outputs> + <tests> + <!-- Test_1 DB latest --> + <test expect_num_outputs="1"> + <param name="test_data_manager" value="--test"/> + <output name="output_file" value="amrfinderplus_test_data_manager_1.json"/> + </test> + <!-- Test_2 DB 3.2 --> + <test expect_num_outputs="1"> + <param name="test_data_manager" value="--test"/> + <conditional name="database_list"> + <param name="database_version_select" value="3.6"/> + <param name="database_date_select" value="2020-03-20.1"/> + </conditional> + <output name="output_file" value="amrfinderplus_test_data_manager_2.json"/> + </test> + + + </tests> + <help><![CDATA[ + Download amrfinderplus database from the NCBI server + ]]></help> + <citations> + <citation type="doi">10.1038/s41598-021-91456-0</citation> + </citations> +</tool> |
b |
diff -r 000000000000 -r eea0c38a9afd data_manager/macro.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/macro.xml Thu Jan 05 14:28:07 2023 +0000 |
b |
@@ -0,0 +1,15 @@ +<?xml version="1.0"?> +<macros> + <token name="@TOOL_VERSION@">3.10.45</token> + <token name="@PYTHON_VERSION@">3.10.6</token> + <token name="@PANDAS@">1.5.1</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">21.05</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">ncbi-amrfinderplus</requirement> + <requirement type="package" version="@PYTHON_VERSION@">python</requirement> + <requirement type="package" version="@PANDAS@">pandas</requirement> + </requirements> + </xml> +</macros> |
b |
diff -r 000000000000 -r eea0c38a9afd data_manager_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Thu Jan 05 14:28:07 2023 +0000 |
b |
@@ -0,0 +1,19 @@ +<?xml version="1.0"?> +<data_managers> + <data_manager tool_file="data_manager/data_manager_build_amrfinderplus.xml" id="data_manager_build_amrfinderplus" version="@TOOL_VERSION@"> + <data_table name="amrfinderplus_database"> + <output> + <column name="value" /> + <column name="name" /> + <column name="path" output_ref="output_file"> + <move type="directory" relativize_symlinks="True"> + <source>${path}</source> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">amrfinderplus-db/${value}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/amrfinderplus-db/${value}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + </data_manager> +</data_managers> |
b |
diff -r 000000000000 -r eea0c38a9afd test-data/amrfinderplus.loc.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/amrfinderplus.loc.test Thu Jan 05 14:28:07 2023 +0000 |
b |
@@ -0,0 +1,8 @@ +# this is a tab separated file describing the location of amrfinderplus database +# +# the columns are: +# value, name, path +# +# for example +amrfinderplus_V3.10_2022-10-11.2 V3.10-2022-10-11.2 amrfinderplus-db +amrfinderplus_V3.6_2020-03-20.1 V3.6-2020-03-20.1 amrfinderplus-db |
b |
diff -r 000000000000 -r eea0c38a9afd test-data/amrfinderplus_test_data_manager_1.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/amrfinderplus_test_data_manager_1.json Thu Jan 05 14:28:07 2023 +0000 |
[ |
@@ -0,0 +1,1 @@ +{"data_tables": {"amrfinderplus_database": [{"name": "V3.11-2022-12-19.1", "path": "amrfinderplus-db", "value": "amrfinderplus_V3.11_2022-12-19.1"}]}} \ No newline at end of file |
b |
diff -r 000000000000 -r eea0c38a9afd test-data/amrfinderplus_test_data_manager_2.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/amrfinderplus_test_data_manager_2.json Thu Jan 05 14:28:07 2023 +0000 |
[ |
@@ -0,0 +1,1 @@ +{"data_tables": {"amrfinderplus_database": [{"name": "V3.6-2020-03-20.1", "path": "amrfinderplus-db", "value": "amrfinderplus_V3.6_2020-03-20.1"}]}} \ No newline at end of file |
b |
diff -r 000000000000 -r eea0c38a9afd tool-data/amrfinderplus.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/amrfinderplus.loc.sample Thu Jan 05 14:28:07 2023 +0000 |
b |
@@ -0,0 +1,7 @@ +# this is a tab separated file describing the location of amrfinderplus database +# +# the columns are: +# value, name, path +# +# for example +#amrfinderplus_V3.6_2020-03-20.1 V3.6-2020-03-20.1 amrfinderplus-db |
b |
diff -r 000000000000 -r eea0c38a9afd tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Jan 05 14:28:07 2023 +0000 |
b |
@@ -0,0 +1,7 @@ +<tables> + <!-- Locations of amrfinderplus database in the required format --> + <table name="amrfinderplus_database" comment_char="#"> + <columns>value, name, path</columns> + <file path="tool-data/amrfinderplus.loc" /> + </table> +</tables> \ No newline at end of file |
b |
diff -r 000000000000 -r eea0c38a9afd tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Thu Jan 05 14:28:07 2023 +0000 |
b |
@@ -0,0 +1,7 @@ +<tables> + <!-- Locations of amrfinderplus database in the required format --> + <table name="amrfinderplus_database" comment_char="#"> + <columns>value, name, path</columns> + <file path="${__HERE__}/test-data/amrfinderplus.loc.test"/> + </table> +</tables> |