# HG changeset patch
# User iuc
# Date 1705097477 0
# Node ID 6be6e6198ac3af1bc533b82a04e84e549fc8e63a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/data_managers/data_manager_ncbi_fcs_gx_database_downloader commit 4a6561ed00e004260be3f3c29d81e814c60e20af
diff -r 000000000000 -r 6be6e6198ac3 data_manager/data_manager_ncbi_fcs_gx_database_downloader.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_ncbi_fcs_gx_database_downloader.py Fri Jan 12 22:11:17 2024 +0000
@@ -0,0 +1,111 @@
+#!/usr/bin/env python
+
+import argparse
+import json
+import os
+import subprocess
+
+
+def main():
+ opts = parse_args()
+
+ output_dict = {
+ "data_tables": {
+ "ncbi_fcs_gx_databases": sync_files(opts),
+ "ncbi_fcs_gx_divisions": get_divisions(opts),
+ }
+ }
+
+ with open(opts.output_file, "w") as f:
+ print(json.dumps(output_dict, sort_keys=True, indent=2), file=f)
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--tag", required=True)
+ parser.add_argument("--source_manifest", required=True)
+ parser.add_argument("--output_file", required=True)
+ parser.add_argument("--output_dir", required=True)
+
+ return parser.parse_args()
+
+
+def sync_files(opts):
+ os.makedirs(opts.output_dir, exist_ok=True)
+
+ args = [
+ "sync_files.py",
+ "--mft",
+ opts.source_manifest,
+ "--dir",
+ opts.output_dir,
+ "get",
+ ]
+
+ try:
+ subprocess.run(args, capture_output=True, check=True)
+ except subprocess.CalledProcessError:
+ raise
+
+ entries_dict = {
+ "add": [
+ {
+ "value": opts.tag,
+ "source_manifest": opts.source_manifest,
+ "name": opts.output_dir,
+ }
+ ]
+ }
+
+ return entries_dict
+
+
+def get_divisions(opts):
+ # descriptions for the top-level gx divisions
+ top_level_description = {
+ "anml": "Animals (Metazoa)",
+ "arch": "Archaea",
+ "fung": "Fungi",
+ "plnt": "Plants (Viridiplantae)",
+ "prok": "Bacteria",
+ "prst": "Protists (other Eukaryota)",
+ "synt": "Synthetic",
+ "virs": "Virus",
+ }
+
+ # get the pathname for the taxa file
+ manifest_filename = os.path.basename(opts.source_manifest)
+ assert manifest_filename.lower().endswith(
+ ".manifest"
+ ), 'source_manifest does not end with ".manifest"'
+ manifest_tag = manifest_filename[:-9]
+ taxa_pathname = os.path.join(opts.output_dir, f"{manifest_tag}.taxa.tsv")
+
+ gx_divisions = set()
+ with open(taxa_pathname) as f:
+ for line in f:
+ if line.startswith("#"):
+ continue
+ line = line.rstrip("\n")
+ tax_id, species, common_name, blast_div, div = line.split("\t", 4)
+ gx_divisions.add(div)
+
+ elements = []
+ for division in gx_divisions:
+ top, bottom = division.split(":", 1)
+ description = f"{top_level_description[top]} - {bottom}"
+ elements.append((description, division))
+
+ # add an element to support unknown/unclassified samples
+ elements.append(("Unknown / Unclassified", "unkn:unknown"))
+
+ entries_dict = {"add": []}
+
+ for name, gx_div in sorted(elements):
+ entries_dict["add"].append({"value": gx_div, "tag": opts.tag, "name": name})
+
+ return entries_dict
+
+
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r 6be6e6198ac3 data_manager/data_manager_ncbi_fcs_gx_database_downloader.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_ncbi_fcs_gx_database_downloader.xml Fri Jan 12 22:11:17 2024 +0000
@@ -0,0 +1,35 @@
+
+ Downoad the NCBI FCS GX database
+
+ macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 6be6e6198ac3 data_manager/macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/macros.xml Fri Jan 12 22:11:17 2024 +0000
@@ -0,0 +1,22 @@
+
+
+
+ ncbi-fcs-gx
+
+
+
+ 0.5.0
+ 0
+ 21.05
+
+
+ operation_3187
+
+
+
+
+ 10.1101/2023.06.02.543519
+
+
+
+
diff -r 000000000000 -r 6be6e6198ac3 data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Fri Jan 12 22:11:17 2024 +0000
@@ -0,0 +1,29 @@
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 6be6e6198ac3 test-data/ncbi_fcs_gx_databases.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ncbi_fcs_gx_databases.loc Fri Jan 12 22:11:17 2024 +0000
@@ -0,0 +1,1 @@
+#tag source_manifest local_manifest
diff -r 000000000000 -r 6be6e6198ac3 test-data/ncbi_fcs_gx_divisions.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ncbi_fcs_gx_divisions.tsv Fri Jan 12 22:11:17 2024 +0000
@@ -0,0 +1,1 @@
+#gx_div tag description
diff -r 000000000000 -r 6be6e6198ac3 test-data/test.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.json Fri Jan 12 22:11:17 2024 +0000
@@ -0,0 +1,27 @@
+\{
+ "data_tables": \{
+ "ncbi_fcs_gx_databases": \{
+ "add": \[
+ \{
+ "name": "[^"]+",
+ "source_manifest": "https://ncbi-fcs-gx.s3.amazonaws.com/gxdb/test-only/test-only.manifest",
+ "value": "test-only"
+ \}
+ \]
+ \},
+ "ncbi_fcs_gx_divisions": \{
+ "add": \[
+ \{
+ "name": "Bacteria - CFB group bacteria",
+ "tag": "test-only",
+ "value": "prok:CFB group bacteria"
+ \},
+ \{
+ "name": "Unknown / Unclassified",
+ "tag": "test-only",
+ "value": "unkn:unknown"
+ \}
+ \]
+ \}
+ \}
+\}
diff -r 000000000000 -r 6be6e6198ac3 tool-data/ncbi_fcs_gx_databases.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/ncbi_fcs_gx_databases.loc.sample Fri Jan 12 22:11:17 2024 +0000
@@ -0,0 +1,8 @@
+## NCBI FCS GX Databases
+#
+#tag manifest path
+#r2022-01-24 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/r2022-01-24/all.manifest /big/data/dir/ncbi_fcs_gx_databases/r2022-01-24/all.manifest
+#r2022-07-08 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/r2022-07-08/all.manifest /big/data/dir/ncbi_fcs_gx_databases/r2022-07-08/all.manifest
+#r2023-01-24 https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/r2023-01-24/all.manifest /big/data/dir/ncbi_fcs_gx_databases/r2023-01-24/all.manifest
+#latest https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/latest/all.manifest /big/data/dir/ncbi_fcs_gx_databases/latest/all.manifest
+#test-only https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/test-only/test-only.manifest /big/data/dir/ncbi_fcs_gx_databases/test-only/test-only.manifest
diff -r 000000000000 -r 6be6e6198ac3 tool-data/ncbi_fcs_gx_divisions.tsv.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/ncbi_fcs_gx_divisions.tsv.sample Fri Jan 12 22:11:17 2024 +0000
@@ -0,0 +1,64 @@
+#gx_div tag description
+#anml:amphibians latest Animals (Metazoa) - amphibians
+#anml:basal metazoans latest Animals (Metazoa) - basal metazoans
+#anml:birds latest Animals (Metazoa) - birds
+#anml:brachiopods latest Animals (Metazoa) - brachiopods
+#anml:crustaceans latest Animals (Metazoa) - crustaceans
+#anml:echinoderms latest Animals (Metazoa) - echinoderms
+#anml:fishes latest Animals (Metazoa) - fishes
+#anml:insects latest Animals (Metazoa) - insects
+#anml:mammals latest Animals (Metazoa) - mammals
+#anml:marsupials latest Animals (Metazoa) - marsupials
+#anml:molluscs latest Animals (Metazoa) - molluscs
+#anml:nematodes latest Animals (Metazoa) - nematodes
+#anml:primates latest Animals (Metazoa) - primates
+#anml:reptiles latest Animals (Metazoa) - reptiles
+#anml:rodents latest Animals (Metazoa) - rodents
+#anml:rotifers latest Animals (Metazoa) - rotifers
+#anml:tardigrades latest Animals (Metazoa) - tardigrades
+#anml:worms latest Animals (Metazoa) - worms
+#arch:archaea latest Archaea - archaea
+#prok:CFB group bacteria latest Bacteria - CFB group bacteria
+#prok:GNS bacteria latest Bacteria - GNS bacteria
+#prok:a-proteobacteria latest Bacteria - a-proteobacteria
+#prok:actinobacteria latest Bacteria - actinobacteria
+#prok:aquificales latest Bacteria - aquificales
+#prok:b-proteobacteria latest Bacteria - b-proteobacteria
+#prok:bacteria latest Bacteria - bacteria
+#prok:chlamydias latest Bacteria - chlamydias
+#prok:cyanobacteria latest Bacteria - cyanobacteria
+#prok:d-proteobacteria latest Bacteria - d-proteobacteria
+#prok:firmicutes latest Bacteria - firmicutes
+#prok:fusobacteria latest Bacteria - fusobacteria
+#prok:g-proteobacteria latest Bacteria - g-proteobacteria
+#prok:green sulfur bacteria latest Bacteria - green sulfur bacteria
+#prok:high GC Gram+ latest Bacteria - high GC Gram+
+#prok:mycoplasmas latest Bacteria - mycoplasmas
+#prok:planctomycetes latest Bacteria - planctomycetes
+#prok:proteobacteria latest Bacteria - proteobacteria
+#prok:spirochetes latest Bacteria - spirochetes
+#prok:thermotogales latest Bacteria - thermotogales
+#prok:verrucomicrobia latest Bacteria - verrucomicrobia
+#fung:ascomycetes latest Fungi - ascomycetes
+#fung:basidiomycetes latest Fungi - basidiomycetes
+#fung:budding yeasts latest Fungi - budding yeasts
+#fung:chytrids latest Fungi - chytrids
+#fung:fungi latest Fungi - fungi
+#fung:microsporidians latest Fungi - microsporidians
+#plnt:green algae latest Plants (Viridiplantae) - green algae
+#plnt:mosses latest Plants (Viridiplantae) - mosses
+#plnt:plants latest Plants (Viridiplantae) - plants
+#prst:algae latest Protists (other Eukaryota) - algae
+#prst:alveolates latest Protists (other Eukaryota) - alveolates
+#prst:cellular slime molds latest Protists (other Eukaryota) - cellular slime molds
+#prst:cercozoans latest Protists (other Eukaryota) - cercozoans
+#prst:choanoflagellates latest Protists (other Eukaryota) - choanoflagellates
+#prst:euglenoids latest Protists (other Eukaryota) - euglenoids
+#prst:monads latest Protists (other Eukaryota) - monads
+#prst:protists latest Protists (other Eukaryota) - protists
+#prst:slime nets latest Protists (other Eukaryota) - slime nets
+#synt:synthetic latest Synthetic - synthetic
+#unkn:unknown latest Unknown / Unclassified
+#virs:eukaryotic viruses latest Virus - eukaryotic viruses
+#virs:prokaryotic viruses latest Virus - prokaryotic viruses
+#virs:viruses latest Virus - viruses
diff -r 000000000000 -r 6be6e6198ac3 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Fri Jan 12 22:11:17 2024 +0000
@@ -0,0 +1,12 @@
+
+
+
+ value, source_manifest, name
+
+
+
+
+
diff -r 000000000000 -r 6be6e6198ac3 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Fri Jan 12 22:11:17 2024 +0000
@@ -0,0 +1,12 @@
+
+
+
+ value, source_manifest, name
+
+
+
+
+