Mercurial > repos > iuc > data_manager_gtdbtk_database_installer
changeset 7:3b1d503c6260 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_gtdbtk_database_installer commit 671e8c706fa211b6ec1c476d3d1a36d098822fe5
author | iuc |
---|---|
date | Thu, 03 Oct 2024 12:42:22 +0000 |
parents | df84aaed4769 |
children | 750d902de22c |
files | data_manager/gtdbtk_database_installer.py data_manager/gtdbtk_database_installer.xml test-data/gtdbtk_database_metadata_versioned.loc test-data/gtdbtk_database_versioned.loc |
diffstat | 3 files changed, 66 insertions(+), 76 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/gtdbtk_database_installer.py Mon Sep 09 09:01:39 2024 +0000 +++ b/data_manager/gtdbtk_database_installer.py Thu Oct 03 12:42:22 2024 +0000 @@ -7,7 +7,7 @@ import shutil import sys import tarfile -from datetime import datetime +from datetime import date from urllib.parse import urlparse from urllib.request import HTTPError, Request, urlopen @@ -118,7 +118,19 @@ return target_directory -def download(database_name, release, meta, test, out_file): +def create_data_manager_entry(database_name, release, file_path): + time = date.today().strftime("%Y-%m-%d") + data_manager_entry = {} + data_manager_entry["value"] = ( + f"{database_name.replace(' ', '_').lower()}_release_{release}_downloaded_{time}" + ) + data_manager_entry["name"] = f"{database_name} - release {release} ({time})" + data_manager_entry["path"] = file_path + data_manager_entry["version"] = release + return data_manager_entry + + +def download(release, meta, test, out_file): with open(out_file) as fh: params = json.load(fh) @@ -137,72 +149,56 @@ for url in items.values(): assert is_urlfile(url) - # download both taxonomy metadata tables + data_manager_json = {"data_tables": {}} + + # download taxonomy metadata tables if meta: url = urls[release]["meta_ar"] - file_path = url_download(url, target_directory, meta) + url_download(url, target_directory, meta) url = urls[release]["meta_bac"] file_path = url_download(url, target_directory, meta) + + data_manager_json["data_tables"]["gtdbtk_database_metadata_versioned"] = [ + create_data_manager_entry("Metadata Tables", release, file_path) + ] # download the full DB else: url = urls[release]["full"] file_path = url_download(url, target_directory, meta) - - time = datetime.utcnow().strftime("%Y-%m-%d") - - data_manager_json = {"data_tables": {}} - data_manager_entry = {} - data_manager_entry["value"] = f"{database_name}_release_{release}_downloaded_{time}" - data_manager_entry["name"] = database_name - data_manager_entry["path"] = file_path - data_manager_entry["version"] = release + data_manager_json["data_tables"]["gtdbtk_database_versioned"] = [ + create_data_manager_entry("Full Database", release, file_path) + ] # store in dedicated metadata table - if meta: - data_manager_json["data_tables"][ - "gtdbtk_database_metadata_versioned" - ] = data_manager_entry - else: - data_manager_json["data_tables"][ - "gtdbtk_database_versioned" - ] = data_manager_entry - with open(out_file, "w") as fh: json.dump(data_manager_json, fh, sort_keys=True) -parser = argparse.ArgumentParser() - -parser.add_argument( - "--database_name", dest="database_name", help="GTDB-Tk database display name" -) - -parser.add_argument("--version", dest="version", help="DB version") +if __name__ == "__main__": + parser = argparse.ArgumentParser() -parser.add_argument( - "--release", dest="release", help="Release of the GTDB-Tk database version" -) -parser.add_argument("--out_file", dest="out_file", help="JSON output file") -parser.add_argument( - "--meta", - dest="meta", - action="store_true", - help="Store meta data flag", -) + parser.add_argument("--version", dest="version", help="DB version") + parser.add_argument( + "--release", dest="release", help="Release of the GTDB-Tk database version" + ) + parser.add_argument("--out_file", dest="out_file", help="JSON output file") + parser.add_argument( + "--meta", + dest="meta", + action="store_true", + help="Store meta data flag", + ) + parser.add_argument( + "--test", + dest="test", + action="store_true", + help="Run test", + ) + args = parser.parse_args() -parser.add_argument( - "--test", - dest="test", - action="store_true", - help="Run test", -) - -args = parser.parse_args() - -download( - args.database_name, - args.release, - args.meta, - args.test, - args.out_file, -) + download( + args.release, + args.meta, + args.test, + args.out_file, + )
--- a/data_manager/gtdbtk_database_installer.xml Mon Sep 09 09:01:39 2024 +0000 +++ b/data_manager/gtdbtk_database_installer.xml Thu Oct 03 12:42:22 2024 +0000 @@ -2,7 +2,7 @@ <description></description> <macros> <token name="@TOOL_VERSION@">202</token> - <token name="@VERSION_SUFFIX@">2</token> + <token name="@VERSION_SUFFIX@">3</token> <token name="@PROFILE@">20.09</token> </macros> <requirements> @@ -10,18 +10,16 @@ </requirements> <command> <![CDATA[ - python '$__tool_directory__/gtdbtk_database_installer.py' - --database_name '$database_name' - --release '$release' - --out_file '$out_file' - $meta - $test +python '$__tool_directory__/gtdbtk_database_installer.py' + --release '$release' + --out_file '$out_file' + $meta + $test ]]> </command> <inputs> - <param name="database_name" type="text" value="" label="Database name or description" help="This value will be displayed in the GTDB-Tk Database select list"/> - <param name="meta" type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Only store GTDBTK metadata in a dedicated data table. " /> - <param name="test" type="hidden" value="" checked="false" label="Run a dry test run !" /> + <param name="meta" type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Only store GTDBTK metadata in a dedicated data table. " /> + <param name="test" type="hidden" value="" checked="false" label="Run a dry test run!" /> <param name="release" type="select" multiple="false" label="GTDB Release"> <option value="202">202</option> <option value="207">207</option> @@ -34,40 +32,36 @@ </outputs> <tests> <test> - <!-- TODO --> <!-- Not actually installing a huge GTDB-Tk database --> <!-- but it will check if all urls exist --> <param name="release" value="202"/> - <param name="database_name" value="GTDB-Tk database release 202"/> <param name="test" value="--test"/> <output name="out_file"> <assert_contents> - <has_text text="GTDB-Tk database release 202"/> - <has_text text="release_202"/> + <has_text text="Full Database - release 202 "/> + <has_text text="full_database_release_202_downloaded"/> </assert_contents> </output> </test> <test> <!-- Test meta data download with tsv.gz--> <param name="release" value="220"/> - <param name="database_name" value="GTDB-Tk database release 220 metadata"/> <param name="meta" value="true"/> <output name="out_file"> <assert_contents> - <has_text text="GTDB-Tk database release 220 metadata"/> - <has_text text="release_220"/> + <has_text text="Metadata Tables - release 220"/> + <has_text text="metadata_tables_release_220_downloaded_"/> </assert_contents> </output> </test> <test> <!-- Test meta data download with tar.gz --> <param name="release" value="207"/> - <param name="database_name" value="GTDB-Tk database release 207 metadata"/> <param name="meta" value="true"/> <output name="out_file"> <assert_contents> - <has_text text="GTDB-Tk database release 207 metadata"/> - <has_text text="release_207"/> + <has_text text="Metadata Tables - release 207"/> + <has_text text="metadata_tables_release_207_downloaded_"/> </assert_contents> </output> </test>
--- a/test-data/gtdbtk_database_metadata_versioned.loc Mon Sep 09 09:01:39 2024 +0000 +++ b/test-data/gtdbtk_database_metadata_versioned.loc Thu Oct 03 12:42:22 2024 +0000 @@ -2,4 +2,4 @@ # to use a directory of GTDB-Tk databases. The gtdbtk_databases.loc # file has this format (longer white space characters are TAB characters): # -# <unique_build_id> <display_name> <version> <directory_path> +# <unique_build_id> <display_name> <version> <directory_path> \ No newline at end of file