Mercurial > repos > iuc > data_manager_ncbi_taxonomy_sqlite
view data_manager/data_manager_ncbi_taxonomy_sqlite.py @ 0:7a76cd412252 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
author | iuc |
---|---|
date | Tue, 21 Jul 2020 17:04:41 -0400 |
parents | |
children | 54a0869d0051 |
line wrap: on
line source
from __future__ import division, print_function import argparse import datetime import json import os import os.path import shlex import subprocess DATA_TABLE_NAME = "ncbi_taxonomy_sqlite" def build_sqlite(taxonomy_dir, output_directory, name=None, description=None): if not os.path.exists(output_directory): os.mkdir(output_directory) output_filename = os.path.join(output_directory, "tax.ncbitaxonomy.sqlite") cmd_str = "taxonomy_util -d {} to_sqlite {}".format(output_filename, taxonomy_dir) cmd = shlex.split(cmd_str) subprocess.check_call(cmd) today_str = datetime.date.today().strftime("%Y-%m-%d") if name is None or name.strip() == "": name = "ncbitaxonomy_build_" + today_str if description is None or description.strip() == "": description = "NCBI Taxonomy database (built on {})".format(today_str) data = [dict(value=name, description=description, path=output_filename)] return data if __name__ == "__main__": parser = argparse.ArgumentParser( description="Build SQLite database from NCBI taxonomy" ) parser.add_argument( "--output_directory", default="tmp", help="Directory to write output to" ) parser.add_argument( "taxonomy_dir", help="Path to directory containing NCBI Taxonomy nodes.dml and names.dmp file" ) parser.add_argument( "name", help="Name to use for the entry in the data table" ) parser.add_argument( "description", help="Description to use for the entry in the data table" ) parser.add_argument( "galaxy_datamanager_filename", help="Galaxy JSON format file describing data manager inputs", ) args = parser.parse_args() config = json.load(open(args.galaxy_datamanager_filename)) output_directory = config.get("output_data", [{}])[0].get("extra_files_path", None) if output_directory is None: output_directory = args.output_directory if not os.path.isdir(output_directory): os.makedirs(output_directory) data_manager_dict = {} data_manager_dict["data_tables"] = json.load( open(args.galaxy_datamanager_filename) ).get("data_tables", {}) data_manager_dict["data_tables"] = data_manager_dict.get("data_tables", {}) data_manager_dict["data_tables"][DATA_TABLE_NAME] = data_manager_dict[ "data_tables" ].get(DATA_TABLE_NAME, []) data = build_sqlite(args.taxonomy_dir, args.output_directory, args.name, args.description) data_manager_dict["data_tables"][DATA_TABLE_NAME].extend(data) print(json.dumps(data_manager_dict)) json.dump(data_manager_dict, open(args.galaxy_datamanager_filename, "w"))