annotate data_manager/data_manager_ncbi_taxonomy_sqlite.py @ 3:f9650b178bfd draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit 096286097ed5cdf189a1b68c3fc34d10f4142e54
author iuc
date Sun, 16 Apr 2023 08:31:02 +0000
parents 37560eebb5ce
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
1 from __future__ import division, print_function
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
2
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
3 import argparse
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
4 import datetime
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
5 import json
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
6 import os
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
7 import os.path
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
8 import shlex
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
9 import subprocess
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
10
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
11 DATA_TABLE_NAME = "ncbi_taxonomy_sqlite"
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
12
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
13
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
14 def build_sqlite(taxonomy_dir, output_directory, name=None, description=None):
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
15 if not os.path.exists(output_directory):
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
16 os.mkdir(output_directory)
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
17 output_filename = os.path.join(output_directory, "tax.ncbitaxonomy.sqlite")
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
18 cmd_str = "taxonomy_util -d {} to_sqlite {}".format(output_filename, taxonomy_dir)
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
19 cmd = shlex.split(cmd_str)
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
20 subprocess.check_call(cmd)
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
21
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
22 today_str = datetime.date.today().strftime("%Y-%m-%d")
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
23 if name is None or name.strip() == "":
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
24 name = "ncbitaxonomy_build_" + today_str
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
25
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
26 if description is None or description.strip() == "":
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
27 description = "NCBI Taxonomy database (built on {})".format(today_str)
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
28
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
29 data = [dict(value=name, description=description, path=output_filename)]
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
30 return data
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
31
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
32
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
33 if __name__ == "__main__":
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
34 parser = argparse.ArgumentParser(
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
35 description="Build SQLite database from NCBI taxonomy"
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
36 )
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
37 parser.add_argument(
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
38 "--output_directory", default="tmp", help="Directory to write output to"
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
39 )
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
40 parser.add_argument(
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
41 "taxonomy_dir",
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
42 help="Path to directory containing NCBI Taxonomy nodes.dml and names.dmp file"
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
43 )
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
44 parser.add_argument(
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
45 "name",
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
46 help="Name to use for the entry in the data table"
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
47 )
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
48 parser.add_argument(
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
49 "description",
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
50 help="Description to use for the entry in the data table"
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
51 )
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
52 parser.add_argument(
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
53 "galaxy_datamanager_filename",
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
54 help="Galaxy JSON format file describing data manager inputs",
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
55 )
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
56 args = parser.parse_args()
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
57
2
37560eebb5ce "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
iuc
parents: 1
diff changeset
58 with open(args.galaxy_datamanager_filename) as fh:
37560eebb5ce "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
iuc
parents: 1
diff changeset
59 config = json.load(fh)
0
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
60 output_directory = config.get("output_data", [{}])[0].get("extra_files_path", None)
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
61 if output_directory is None:
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
62 output_directory = args.output_directory
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
63
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
64 if not os.path.isdir(output_directory):
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
65 os.makedirs(output_directory)
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
66
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
67 data_manager_dict = {}
2
37560eebb5ce "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
iuc
parents: 1
diff changeset
68 data_manager_dict["data_tables"] = config.get("data_tables", {})
0
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
69 data_manager_dict["data_tables"][DATA_TABLE_NAME] = data_manager_dict[
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
70 "data_tables"
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
71 ].get(DATA_TABLE_NAME, [])
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
72
1
54a0869d0051 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit 0e29890e924b85f063c1dfe12707b309b3afd074"
iuc
parents: 0
diff changeset
73 data = build_sqlite(args.taxonomy_dir, output_directory, args.name, args.description)
0
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
74
7a76cd412252 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit bad9f3d1468b454b1ac073fa796e66b1d0164d38"
iuc
parents:
diff changeset
75 data_manager_dict["data_tables"][DATA_TABLE_NAME].extend(data)
2
37560eebb5ce "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
iuc
parents: 1
diff changeset
76 with open(args.galaxy_datamanager_filename, "w") as fh:
37560eebb5ce "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_ncbi_taxonomy_sqlite/ commit 02d2967f77e3fa5a18aea63dc84aa9ab418dc165"
iuc
parents: 1
diff changeset
77 json.dump(data_manager_dict, fh, sort_keys=True)