annotate data_manager/data_manager_cat.py @ 1:2bec6d7877fc draft default tip

planemo upload commit 68dbaa3df00fe628b7dc0310cd1d19605d0bb307-dirty
author jjohnson
date Tue, 26 Nov 2019 16:11:24 -0500
parents f59e7e242bde
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
1 #!/usr/bin/env python
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
2 from __future__ import print_function
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
3
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
4 import argparse
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
5 import json
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
6 import os.path
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
7 import subprocess
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
8 import tarfile
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
9 import tempfile
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
10 import zipfile
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
11 try:
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
12 # For Python 3.0 and later
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
13 from urllib.request import urlopen
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
14 except ImportError:
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
15 # Fall back to Python 2 imports
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
16 from urllib2 import urlopen
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
17
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
18
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
19 def url_download(url, workdir):
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
20 file_path = os.path.join(workdir, 'download.dat')
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
21 src = None
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
22 dst = None
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
23 try:
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
24 src = urlopen(url)
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
25 with open(file_path, 'wb') as dst:
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
26 while True:
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
27 chunk = src.read(2**10)
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
28 if chunk:
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
29 dst.write(chunk)
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
30 else:
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
31 break
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
32 finally:
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
33 if src:
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
34 src.close()
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
35 if tarfile.is_tarfile(file_path):
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
36 fh = tarfile.open(file_path, 'r:*')
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
37 elif zipfile.is_zipfile(file_path):
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
38 fh = zipfile.ZipFile(file_path, 'r')
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
39 else:
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
40 return
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
41 fh.extractall(workdir)
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
42 os.remove(file_path)
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
43
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
44
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
45 def cat_prepare(install_dir):
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
46 cmd = ['CAT', 'prepare' '--fresh', '-q']
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
47 cmd_stdout = tempfile.NamedTemporaryFile()
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
48 cmd_stderr = tempfile.NamedTemporaryFile()
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
49 return_code = subprocess.call(cmd, shell=True, cwd=install_dir,
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
50 stdout=cmd_stdout, stderr=cmd_stderr)
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
51 if return_code:
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
52 msg = "stdout:\n%s\nstderr:\n%s" % (cmd_stdout.read(),
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
53 cmd_stderr.read())
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
54 cmd_stdout.close()
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
55 cmd_stderr.close()
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
56 raise Exception('Error: (%s), returncode=%s %s'
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
57 % (' '.join(cmd), return_code, msg))
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
58
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
59
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
60 def main():
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
61 parser = argparse.ArgumentParser()
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
62 parser.add_argument('--config_file')
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
63 parser.add_argument('--install_path')
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
64 parser.add_argument('--db_url', default=None)
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
65 args = parser.parse_args()
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
66
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
67 if not os.path.exists(args.install_path):
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
68 os.makedirs(args.install_path)
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
69 if args.db_url:
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
70 url_download(args.db_url, args.install_path)
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
71 else:
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
72 cat_prepare(args.install_path)
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
73
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
74 cat_path = None
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
75 cat_db = None
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
76 tax_db = None
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
77 for root, dirs, files in os.walk(args.install_path):
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
78 for dname in dirs:
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
79 if dname.endswith('CAT_database'):
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
80 cat_db = dname
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
81 elif dname.endswith('taxonomy'):
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
82 tax_db = dname
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
83 if cat_db and tax_db:
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
84 cat_path = root
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
85 break
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
86
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
87 cat_dir = os.path.basename(cat_path)
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
88 # params = json.loads(open(args.config_file).read())
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
89 dm_dict = {}
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
90 dm_dict['data_tables'] = dm_dict.get('data_tables', {})
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
91 data_table = 'cat_database'
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
92 dm_dict['data_tables'][data_table]\
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
93 = dm_dict['data_tables'].get(data_table, [])
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
94 data_table_entry = dict(value=cat_dir, name=cat_dir,
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
95 database_folder=os.path.join(cat_dir, cat_db),
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
96 taxonomy_folder=os.path.join(cat_dir, tax_db))
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
97 dm_dict['data_tables'][data_table].append(data_table_entry)
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
98 # save info to json file
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
99 open(args.config_file, 'wb').write(json.dumps(dm_dict))
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
100
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
101
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
102 if __name__ == "__main__":
f59e7e242bde planemo upload commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff changeset
103 main()