# HG changeset patch
# User iuc
# Date 1728463987 0
# Node ID 35cef758050c8486ce2d87976b2e147614b130d0
# Parent 6be6e6198ac3af1bc533b82a04e84e549fc8e63a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_ncbi_fcs_gx_database_downloader commit 25c9d8d297d0e10f92e373f6a959274dedc10433
diff -r 6be6e6198ac3 -r 35cef758050c data_manager/data_manager_ncbi_fcs_gx_database_downloader.py
--- a/data_manager/data_manager_ncbi_fcs_gx_database_downloader.py Fri Jan 12 22:11:17 2024 +0000
+++ b/data_manager/data_manager_ncbi_fcs_gx_database_downloader.py Wed Oct 09 08:53:07 2024 +0000
@@ -4,14 +4,15 @@
import json
import os
import subprocess
+import typing
-def main():
+def main() -> None:
opts = parse_args()
output_dict = {
"data_tables": {
- "ncbi_fcs_gx_databases": sync_files(opts),
+ "ncbi_fcs_gx_databases_ext": sync_files(opts),
"ncbi_fcs_gx_divisions": get_divisions(opts),
}
}
@@ -20,17 +21,23 @@
print(json.dumps(output_dict, sort_keys=True, indent=2), file=f)
-def parse_args():
+def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser()
- parser.add_argument("--tag", required=True)
- parser.add_argument("--source_manifest", required=True)
+
+ parser.add_argument("--tag", required=True, help="Unique identifier for this database")
+ parser.add_argument("--description", required=True, help="Description for this database")
+ parser.add_argument("--source_manifest", required=True, help="Should the tool use the source manifest")
+ parser.add_argument("--use_source_manifest", action="store_true", help="Manifest file for this database")
+ parser.add_argument("--phone_home", action="store_true", help="Should phone home be enabled")
+ parser.add_argument("--phone_home_label", default="", help="Phone home label")
+ parser.add_argument("--node_cache_dir", required=True, help="Directory to copy database to local node")
parser.add_argument("--output_file", required=True)
parser.add_argument("--output_dir", required=True)
return parser.parse_args()
-def sync_files(opts):
+def sync_files(opts: argparse.Namespace) -> typing.Dict[str, typing.List[typing.Dict[str, str]]]:
os.makedirs(opts.output_dir, exist_ok=True)
args = [
@@ -51,8 +58,12 @@
"add": [
{
"value": opts.tag,
+ "description": opts.description,
"source_manifest": opts.source_manifest,
- "name": opts.output_dir,
+ "use_source_manifest": "1" if opts.use_source_manifest else "0",
+ "phone_home": "1" if opts.phone_home else "0",
+ "phone_home_label": opts.phone_home_label,
+ "local_manifest": opts.output_dir,
}
]
}
@@ -60,7 +71,7 @@
return entries_dict
-def get_divisions(opts):
+def get_divisions(opts: argparse.Namespace) -> typing.Dict[str, typing.List[typing.Dict[str, str]]]:
# descriptions for the top-level gx divisions
top_level_description = {
"anml": "Animals (Metazoa)",
@@ -99,10 +110,10 @@
# add an element to support unknown/unclassified samples
elements.append(("Unknown / Unclassified", "unkn:unknown"))
- entries_dict = {"add": []}
+ entries_dict: typing.Dict[str, typing.List[typing.Dict[str, str]]] = {"add": []}
for name, gx_div in sorted(elements):
- entries_dict["add"].append({"value": gx_div, "tag": opts.tag, "name": name})
+ entries_dict["add"].append({"value": gx_div, "tag": opts.tag, "description": name})
return entries_dict
diff -r 6be6e6198ac3 -r 35cef758050c data_manager/data_manager_ncbi_fcs_gx_database_downloader.xml
--- a/data_manager/data_manager_ncbi_fcs_gx_database_downloader.xml Fri Jan 12 22:11:17 2024 +0000
+++ b/data_manager/data_manager_ncbi_fcs_gx_database_downloader.xml Wed Oct 09 08:53:07 2024 +0000
@@ -7,29 +7,129 @@
-
-
+
+
+
+
+
+
-
+
+
+
+
`_. The current database is about 470 GiB in total. Each database includes a json-formatted manifest file with contains details about each database file. A sample manifest file can be found below.
+
+The data manager downloads the GX database given a manifest file. It takes six inputs:
+
+1. **tag** - unique identifier for this database chosen by the Galaxy Admin
+2. **description** - description for this database seen and selectable by the user when running the NCBI FCS GX tool
+3. **source_manifest** - manifest file for this database (url or filesystem path)
+4. **use_source_manifest** - when true, the compute node will download the GX database itself instead of using the local copy
+5. **phone_home** - when true, the NCBI FCS GX tool will send analytics to NCBI about the run, The code for this can be seen `here `_. It sends the following information:
+
+ 1. version of the gx executable
+ 2. build date of the GX database
+ 3. the platform the software is running on
+ 4. the version of the Python interpreter
+ 5. the size of physical memory in GiB
+ 6. the duration of the run
+ 7. the run’s exit status (0 for success, otherwise 1)
+ 8. **phone_home_label**
+
+6. **phone_home_label** - arbitrary string set by the Galaxy Admin to identify the analytics data sent to NCBI
+
+The data manager also creates a lookup table for the NCBI FCS GX tool based on the `taxa.tsv `_ file in the database.
+
+Sample Manifest File
+====================
+
+.. code-block:: JSON
-See https://github.com/ncbi/fcs/wiki/FCS-GX#b-download-the-database
+ {
+ "version": 1,
+ "totalFiles": 8,
+ "timeStamp": "2023-01-24T16:18:22.220812",
+ "fileDetails": [
+ {
+ "fileName": "all.blast_div.tsv.gz",
+ "fileSize": 8241107,
+ "hashAlgorithm": "md5",
+ "hashValue": "a6b08c85c46da76548fff6ed220f8f9d"
+ },
+ {
+ "fileName": "all.assemblies.tsv",
+ "fileSize": 8887448,
+ "hashAlgorithm": "md5",
+ "hashValue": "441beceb8c467593fa6b87a071c5ec6b"
+ },
+ {
+ "fileName": "all.taxa.tsv",
+ "fileSize": 6385518,
+ "hashAlgorithm": "md5",
+ "hashValue": "c94d1fc80f81dbbf30b114d4cdaf29ad"
+ },
+ {
+ "fileName": "all.gxs",
+ "fileSize": 177317125807,
+ "hashAlgorithm": "md5",
+ "hashValue": "da205626565a61be6dfd8c9b5ed1a9b7"
+ },
+ {
+ "fileName": "all.meta.jsonl",
+ "fileSize": 59,
+ "hashAlgorithm": "md5",
+ "hashValue": "c2096cdb8106d44a310052b06a23836c"
+ },
+ {
+ "fileName": "all.gxi",
+ "fileSize": 321216733352,
+ "hashAlgorithm": "md5",
+ "hashValue": "36bf346693e2b9de693de38efe219aa7"
+ },
+ {
+ "fileName": "all.seq_info.tsv.gz",
+ "fileSize": 22549956,
+ "hashAlgorithm": "md5",
+ "hashValue": "6a760eed5a94aaf46d4dd8c75f370875"
+ },
+ {
+ "fileName": "all.README.txt",
+ "fileSize": 187,
+ "hashAlgorithm": "md5",
+ "hashValue": "7deb2d4fa5241f95a25073fb43147cb1"
+ }
+ ]
+ }
]]>
diff -r 6be6e6198ac3 -r 35cef758050c data_manager/macros.xml
--- a/data_manager/macros.xml Fri Jan 12 22:11:17 2024 +0000
+++ b/data_manager/macros.xml Wed Oct 09 08:53:07 2024 +0000
@@ -2,10 +2,9 @@
ncbi-fcs-gx
-
- 0.5.0
+ 0.5.4
0
21.05
@@ -16,7 +15,6 @@
10.1101/2023.06.02.543519
-
diff -r 6be6e6198ac3 -r 35cef758050c data_manager_conf.xml
--- a/data_manager_conf.xml Fri Jan 12 22:11:17 2024 +0000
+++ b/data_manager_conf.xml Wed Oct 09 08:53:07 2024 +0000
@@ -1,18 +1,22 @@
-
+