comparison data_manager/data_manager_gemini_download.py @ 9:27a6a256cd23 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_gemini_database_downloader commit 275b7863ff4f8b0dff9cd7ea6c4b635694f0168d
author iuc
date Sat, 03 Dec 2022 10:37:24 +0000
parents 52b6a4d98009
children
comparison
equal deleted inserted replaced
8:52b6a4d98009 9:27a6a256cd23
1 #!/usr/bin/env python 1 #!/usr/bin/env python2
2
3 # IMPORTANT: This will run using Python 2 still!
2 4
3 import datetime 5 import datetime
4 import json 6 import json
5 import os 7 import os
6 import subprocess 8 import subprocess
12 def write_gemini_config(config, config_file): 14 def write_gemini_config(config, config_file):
13 with open(config_file, 'w') as fo: 15 with open(config_file, 'w') as fo:
14 yaml.dump(config, fo, allow_unicode=False, default_flow_style=False) 16 yaml.dump(config, fo, allow_unicode=False, default_flow_style=False)
15 17
16 18
19 def load_gemini_config(config_file):
20 with open(config_file) as fi:
21 return yaml.load(fi)
22
23
17 def main(): 24 def main():
18 today = datetime.date.today() 25 today = datetime.date.today()
19 with open(sys.argv[1]) as fh: 26 with open(sys.argv[1]) as fh:
20 params = json.load(fh) 27 params = json.load(fh)
21 target_directory = params['output_data'][0]['extra_files_path'] 28 target_directory = params['output_data'][0]['extra_files_path']
22 os.mkdir(target_directory) 29 os.mkdir(target_directory)
23 30
24 # Generate a minimal configuration file for GEMINI update 31 # Prepare the metadata for the new data table record
25 # to instruct the tool to download the annotation data into a
26 # subfolder of the target directory.
27 config_file = os.path.join(target_directory, 'gemini-config.yaml')
28 anno_dir = os.path.join(target_directory, 'gemini/data')
29 gemini_bootstrap_config = {'annotation_dir': anno_dir}
30 write_gemini_config(gemini_bootstrap_config, config_file)
31
32 # Now gemini update can be called to download the data.
33 # The GEMINI_CONFIG environment variable lets the tool discover
34 # the configuration file we prepared for it.
35 # Note that the tool will rewrite the file turning it into a
36 # complete gemini configuration file.
37 gemini_env = os.environ.copy()
38 gemini_env['GEMINI_CONFIG'] = target_directory
39 cmd = "gemini update --dataonly %s %s" % (
40 params['param_dict']['gerp_bp'],
41 params['param_dict']['cadd']
42 )
43 subprocess.check_call(cmd, shell=True, env=gemini_env)
44
45 # GEMINI tool wrappers that need access to the annotation files
46 # are supposed to symlink them into a gemini/data subfolder of
47 # the job working directory. To have GEMINI discover them there,
48 # we need to set this location as the 'annotation_dir' in the
49 # configuration file.
50 with open(config_file) as fi:
51 config = yaml.load(fi)
52 config['annotation_dir'] = 'gemini/data'
53 write_gemini_config(config, config_file)
54 32
55 # The name of the database should reflect whether it was built with or 33 # The name of the database should reflect whether it was built with or
56 # without the optional GERP-bp data, the CADD scores, or both. 34 # without the optional GERP-bp data, the CADD scores, or both.
57 # This builds up the correpsonding part of the name: 35 # This builds up the correpsonding part of the name:
58 anno_extras = [] 36 anno_extras = []
63 if anno_extras: 41 if anno_extras:
64 anno_desc = ' w/ ' + ' & '.join(anno_extras) 42 anno_desc = ' w/ ' + ' & '.join(anno_extras)
65 else: 43 else:
66 anno_desc = '' 44 anno_desc = ''
67 45
68 # Finally, we prepare the metadata for the new data table record ...
69 data_manager_dict = { 46 data_manager_dict = {
70 'data_tables': { 47 'data_tables': {
71 'gemini_versioned_databases': [ 48 'gemini_versioned_databases': [
72 { 49 {
73 'value': today.isoformat(), 50 'value': today.isoformat(),
81 } 58 }
82 ] 59 ]
83 } 60 }
84 } 61 }
85 62
86 # ... and save it to the json results file 63 # Save the data table metadata to the json results file
87 with open(sys.argv[1], 'w') as fh: 64 with open(sys.argv[1], 'w') as fh:
88 json.dump(data_manager_dict, fh, sort_keys=True) 65 json.dump(data_manager_dict, fh, sort_keys=True)
66
67 # Generate a minimal configuration file for GEMINI update
68 # to instruct the tool to download the annotation data into a
69 # subfolder of the target directory.
70 config_file = os.path.join(target_directory, 'gemini-config.yaml')
71 anno_dir = os.path.join(target_directory, 'gemini/data')
72 gemini_bootstrap_config = {'annotation_dir': anno_dir}
73 write_gemini_config(gemini_bootstrap_config, config_file)
74
75 # Verify that we can read the config_file just created as we need to do so
76 # after the data download has finished and it is very annoying to have this
77 # fail after dozens of Gbs of data have been downloaded
78 config = load_gemini_config(config_file)
79
80 # Now gemini update can be called to download the data.
81 # The GEMINI_CONFIG environment variable lets the tool discover
82 # the configuration file we prepared for it.
83 # Note that the tool will rewrite the file turning it into a
84 # complete gemini configuration file.
85 gemini_env = os.environ.copy()
86 gemini_env['GEMINI_CONFIG'] = target_directory
87 cmd = ['gemini', 'update', '--dataonly']
88 if params['param_dict']['gerp_bp']:
89 cmd += ['--extra', 'gerp_bp']
90 if params['param_dict']['cadd']:
91 cmd += ['--extra', 'cadd_score']
92
93 if not params['param_dict']['test_data_manager']:
94 # This is not a test => Going to embark on a massive download now
95 subprocess.check_call(cmd, env=gemini_env)
96
97 # GEMINI tool wrappers that need access to the annotation files
98 # are supposed to symlink them into a gemini/data subfolder of
99 # the job working directory. To have GEMINI discover them there,
100 # we need to set this location as the 'annotation_dir' in the
101 # configuration file.
102 config = load_gemini_config(config_file)
103 config['annotation_dir'] = 'gemini/data'
104 write_gemini_config(config, config_file)
89 105
90 106
91 if __name__ == "__main__": 107 if __name__ == "__main__":
92 main() 108 main()