diff data_manager/data_manager_ncbi_taxonomy_sqlite.py @ 0:7a76cd412252 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
author iuc
date Tue, 21 Jul 2020 17:04:41 -0400
parents
children 54a0869d0051
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_ncbi_taxonomy_sqlite.py	Tue Jul 21 17:04:41 2020 -0400
@@ -0,0 +1,79 @@
+from __future__ import division, print_function
+
+import argparse
+import datetime
+import json
+import os
+import os.path
+import shlex
+import subprocess
+
+DATA_TABLE_NAME = "ncbi_taxonomy_sqlite"
+
+
+def build_sqlite(taxonomy_dir, output_directory, name=None, description=None):
+    if not os.path.exists(output_directory):
+        os.mkdir(output_directory)
+    output_filename = os.path.join(output_directory, "tax.ncbitaxonomy.sqlite")
+    cmd_str = "taxonomy_util -d {} to_sqlite {}".format(output_filename, taxonomy_dir)
+    cmd = shlex.split(cmd_str)
+    subprocess.check_call(cmd)
+
+    today_str = datetime.date.today().strftime("%Y-%m-%d")
+    if name is None or name.strip() == "":
+        name = "ncbitaxonomy_build_" + today_str
+
+    if description is None or description.strip() == "":
+        description = "NCBI Taxonomy database (built on {})".format(today_str)
+
+    data = [dict(value=name, description=description, path=output_filename)]
+    return data
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Build SQLite database from NCBI taxonomy"
+    )
+    parser.add_argument(
+        "--output_directory", default="tmp", help="Directory to write output to"
+    )
+    parser.add_argument(
+        "taxonomy_dir",
+        help="Path to directory containing NCBI Taxonomy nodes.dml and names.dmp file"
+    )
+    parser.add_argument(
+        "name",
+        help="Name to use for the entry in the data table"
+    )
+    parser.add_argument(
+        "description",
+        help="Description to use for the entry in the data table"
+    )
+    parser.add_argument(
+        "galaxy_datamanager_filename",
+        help="Galaxy JSON format file describing data manager inputs",
+    )
+    args = parser.parse_args()
+
+    config = json.load(open(args.galaxy_datamanager_filename))
+    output_directory = config.get("output_data", [{}])[0].get("extra_files_path", None)
+    if output_directory is None:
+        output_directory = args.output_directory
+
+    if not os.path.isdir(output_directory):
+        os.makedirs(output_directory)
+
+    data_manager_dict = {}
+    data_manager_dict["data_tables"] = json.load(
+        open(args.galaxy_datamanager_filename)
+    ).get("data_tables", {})
+    data_manager_dict["data_tables"] = data_manager_dict.get("data_tables", {})
+    data_manager_dict["data_tables"][DATA_TABLE_NAME] = data_manager_dict[
+        "data_tables"
+    ].get(DATA_TABLE_NAME, [])
+
+    data = build_sqlite(args.taxonomy_dir, args.output_directory, args.name, args.description)
+
+    data_manager_dict["data_tables"][DATA_TABLE_NAME].extend(data)
+    print(json.dumps(data_manager_dict))
+    json.dump(data_manager_dict, open(args.galaxy_datamanager_filename, "w"))