Mercurial > repos > iuc > data_manager_gemini_database_downloader
comparison data_manager/data_manager_gemini_download.py @ 9:27a6a256cd23 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_gemini_database_downloader commit 275b7863ff4f8b0dff9cd7ea6c4b635694f0168d
author | iuc |
---|---|
date | Sat, 03 Dec 2022 10:37:24 +0000 |
parents | 52b6a4d98009 |
children |
comparison
equal
deleted
inserted
replaced
8:52b6a4d98009 | 9:27a6a256cd23 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python2 |
2 | |
3 # IMPORTANT: This will run using Python 2 still! | |
2 | 4 |
3 import datetime | 5 import datetime |
4 import json | 6 import json |
5 import os | 7 import os |
6 import subprocess | 8 import subprocess |
12 def write_gemini_config(config, config_file): | 14 def write_gemini_config(config, config_file): |
13 with open(config_file, 'w') as fo: | 15 with open(config_file, 'w') as fo: |
14 yaml.dump(config, fo, allow_unicode=False, default_flow_style=False) | 16 yaml.dump(config, fo, allow_unicode=False, default_flow_style=False) |
15 | 17 |
16 | 18 |
19 def load_gemini_config(config_file): | |
20 with open(config_file) as fi: | |
21 return yaml.load(fi) | |
22 | |
23 | |
17 def main(): | 24 def main(): |
18 today = datetime.date.today() | 25 today = datetime.date.today() |
19 with open(sys.argv[1]) as fh: | 26 with open(sys.argv[1]) as fh: |
20 params = json.load(fh) | 27 params = json.load(fh) |
21 target_directory = params['output_data'][0]['extra_files_path'] | 28 target_directory = params['output_data'][0]['extra_files_path'] |
22 os.mkdir(target_directory) | 29 os.mkdir(target_directory) |
23 | 30 |
24 # Generate a minimal configuration file for GEMINI update | 31 # Prepare the metadata for the new data table record |
25 # to instruct the tool to download the annotation data into a | |
26 # subfolder of the target directory. | |
27 config_file = os.path.join(target_directory, 'gemini-config.yaml') | |
28 anno_dir = os.path.join(target_directory, 'gemini/data') | |
29 gemini_bootstrap_config = {'annotation_dir': anno_dir} | |
30 write_gemini_config(gemini_bootstrap_config, config_file) | |
31 | |
32 # Now gemini update can be called to download the data. | |
33 # The GEMINI_CONFIG environment variable lets the tool discover | |
34 # the configuration file we prepared for it. | |
35 # Note that the tool will rewrite the file turning it into a | |
36 # complete gemini configuration file. | |
37 gemini_env = os.environ.copy() | |
38 gemini_env['GEMINI_CONFIG'] = target_directory | |
39 cmd = "gemini update --dataonly %s %s" % ( | |
40 params['param_dict']['gerp_bp'], | |
41 params['param_dict']['cadd'] | |
42 ) | |
43 subprocess.check_call(cmd, shell=True, env=gemini_env) | |
44 | |
45 # GEMINI tool wrappers that need access to the annotation files | |
46 # are supposed to symlink them into a gemini/data subfolder of | |
47 # the job working directory. To have GEMINI discover them there, | |
48 # we need to set this location as the 'annotation_dir' in the | |
49 # configuration file. | |
50 with open(config_file) as fi: | |
51 config = yaml.load(fi) | |
52 config['annotation_dir'] = 'gemini/data' | |
53 write_gemini_config(config, config_file) | |
54 | 32 |
55 # The name of the database should reflect whether it was built with or | 33 # The name of the database should reflect whether it was built with or |
56 # without the optional GERP-bp data, the CADD scores, or both. | 34 # without the optional GERP-bp data, the CADD scores, or both. |
57 # This builds up the correpsonding part of the name: | 35 # This builds up the correpsonding part of the name: |
58 anno_extras = [] | 36 anno_extras = [] |
63 if anno_extras: | 41 if anno_extras: |
64 anno_desc = ' w/ ' + ' & '.join(anno_extras) | 42 anno_desc = ' w/ ' + ' & '.join(anno_extras) |
65 else: | 43 else: |
66 anno_desc = '' | 44 anno_desc = '' |
67 | 45 |
68 # Finally, we prepare the metadata for the new data table record ... | |
69 data_manager_dict = { | 46 data_manager_dict = { |
70 'data_tables': { | 47 'data_tables': { |
71 'gemini_versioned_databases': [ | 48 'gemini_versioned_databases': [ |
72 { | 49 { |
73 'value': today.isoformat(), | 50 'value': today.isoformat(), |
81 } | 58 } |
82 ] | 59 ] |
83 } | 60 } |
84 } | 61 } |
85 | 62 |
86 # ... and save it to the json results file | 63 # Save the data table metadata to the json results file |
87 with open(sys.argv[1], 'w') as fh: | 64 with open(sys.argv[1], 'w') as fh: |
88 json.dump(data_manager_dict, fh, sort_keys=True) | 65 json.dump(data_manager_dict, fh, sort_keys=True) |
66 | |
67 # Generate a minimal configuration file for GEMINI update | |
68 # to instruct the tool to download the annotation data into a | |
69 # subfolder of the target directory. | |
70 config_file = os.path.join(target_directory, 'gemini-config.yaml') | |
71 anno_dir = os.path.join(target_directory, 'gemini/data') | |
72 gemini_bootstrap_config = {'annotation_dir': anno_dir} | |
73 write_gemini_config(gemini_bootstrap_config, config_file) | |
74 | |
75 # Verify that we can read the config_file just created as we need to do so | |
76 # after the data download has finished and it is very annoying to have this | |
77 # fail after dozens of Gbs of data have been downloaded | |
78 config = load_gemini_config(config_file) | |
79 | |
80 # Now gemini update can be called to download the data. | |
81 # The GEMINI_CONFIG environment variable lets the tool discover | |
82 # the configuration file we prepared for it. | |
83 # Note that the tool will rewrite the file turning it into a | |
84 # complete gemini configuration file. | |
85 gemini_env = os.environ.copy() | |
86 gemini_env['GEMINI_CONFIG'] = target_directory | |
87 cmd = ['gemini', 'update', '--dataonly'] | |
88 if params['param_dict']['gerp_bp']: | |
89 cmd += ['--extra', 'gerp_bp'] | |
90 if params['param_dict']['cadd']: | |
91 cmd += ['--extra', 'cadd_score'] | |
92 | |
93 if not params['param_dict']['test_data_manager']: | |
94 # This is not a test => Going to embark on a massive download now | |
95 subprocess.check_call(cmd, env=gemini_env) | |
96 | |
97 # GEMINI tool wrappers that need access to the annotation files | |
98 # are supposed to symlink them into a gemini/data subfolder of | |
99 # the job working directory. To have GEMINI discover them there, | |
100 # we need to set this location as the 'annotation_dir' in the | |
101 # configuration file. | |
102 config = load_gemini_config(config_file) | |
103 config['annotation_dir'] = 'gemini/data' | |
104 write_gemini_config(config, config_file) | |
89 | 105 |
90 | 106 |
91 if __name__ == "__main__": | 107 if __name__ == "__main__": |
92 main() | 108 main() |