data_manager_gemini_database_downloader: data_manager/data_manager_gemini

comparison data_manager/data_manager_gemini_download.py @ 9:27a6a256cd23 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_gemini_database_downloader commit 275b7863ff4f8b0dff9cd7ea6c4b635694f0168d

author	iuc
date	Sat, 03 Dec 2022 10:37:24 +0000
parents	52b6a4d98009
children

comparison

equal deleted inserted replaced

-:52b6a4d98009
+:27a6a256cd23
-#!/usr/bin/env python
+#!/usr/bin/env python2
+# IMPORTANT: This will run using Python 2 still!
 import datetime
 import json
 import os
 import subprocess
 def write_gemini_config(config, config_file):
 with open(config_file, 'w') as fo:
 yaml.dump(config, fo, allow_unicode=False, default_flow_style=False)
+def load_gemini_config(config_file):
+with open(config_file) as fi:
+return yaml.load(fi)
 def main():
 today = datetime.date.today()
 with open(sys.argv[1]) as fh:
 params = json.load(fh)
 target_directory = params['output_data'][0]['extra_files_path']
 os.mkdir(target_directory)
-# Generate a minimal configuration file for GEMINI update
+# Prepare the metadata for the new data table record
-# to instruct the tool to download the annotation data into a
-# subfolder of the target directory.
-config_file = os.path.join(target_directory, 'gemini-config.yaml')
-anno_dir = os.path.join(target_directory, 'gemini/data')
-gemini_bootstrap_config = {'annotation_dir': anno_dir}
-write_gemini_config(gemini_bootstrap_config, config_file)
-# Now gemini update can be called to download the data.
-# The GEMINI_CONFIG environment variable lets the tool discover
-# the configuration file we prepared for it.
-# Note that the tool will rewrite the file turning it into a
-# complete gemini configuration file.
-gemini_env = os.environ.copy()
-gemini_env['GEMINI_CONFIG'] = target_directory
-cmd = "gemini update --dataonly %s %s" % (
-params['param_dict']['gerp_bp'],
-params['param_dict']['cadd']
-)
-subprocess.check_call(cmd, shell=True, env=gemini_env)
-# GEMINI tool wrappers that need access to the annotation files
-# are supposed to symlink them into a gemini/data subfolder of
-# the job working directory. To have GEMINI discover them there,
-# we need to set this location as the 'annotation_dir' in the
-# configuration file.
-with open(config_file) as fi:
-config = yaml.load(fi)
-config['annotation_dir'] = 'gemini/data'
-write_gemini_config(config, config_file)
 # The name of the database should reflect whether it was built with or
 # without the optional GERP-bp data, the CADD scores, or both.
 # This builds up the correpsonding part of the name:
 anno_extras = []
 if anno_extras:
 anno_desc = ' w/ ' + ' & '.join(anno_extras)
 else:
 anno_desc = ''
-# Finally, we prepare the metadata for the new data table record ...
 data_manager_dict = {
 'data_tables': {
 'gemini_versioned_databases': [
 {
 'value': today.isoformat(),
 }
 ]
 }
 }
-# ... and save it to the json results file
+# Save the data table metadata to the json results file
 with open(sys.argv[1], 'w') as fh:
 json.dump(data_manager_dict, fh, sort_keys=True)
+# Generate a minimal configuration file for GEMINI update
+# to instruct the tool to download the annotation data into a
+# subfolder of the target directory.
+config_file = os.path.join(target_directory, 'gemini-config.yaml')
+anno_dir = os.path.join(target_directory, 'gemini/data')
+gemini_bootstrap_config = {'annotation_dir': anno_dir}
+write_gemini_config(gemini_bootstrap_config, config_file)
+# Verify that we can read the config_file just created as we need to do so
+# after the data download has finished and it is very annoying to have this
+# fail after dozens of Gbs of data have been downloaded
+config = load_gemini_config(config_file)
+# Now gemini update can be called to download the data.
+# The GEMINI_CONFIG environment variable lets the tool discover
+# the configuration file we prepared for it.
+# Note that the tool will rewrite the file turning it into a
+# complete gemini configuration file.
+gemini_env = os.environ.copy()
+gemini_env['GEMINI_CONFIG'] = target_directory
+cmd = ['gemini', 'update', '--dataonly']
+if params['param_dict']['gerp_bp']:
+cmd += ['--extra', 'gerp_bp']
+if params['param_dict']['cadd']:
+cmd += ['--extra', 'cadd_score']
+if not params['param_dict']['test_data_manager']:
+# This is not a test => Going to embark on a massive download now
+subprocess.check_call(cmd, env=gemini_env)
+# GEMINI tool wrappers that need access to the annotation files
+# are supposed to symlink them into a gemini/data subfolder of
+# the job working directory. To have GEMINI discover them there,
+# we need to set this location as the 'annotation_dir' in the
+# configuration file.
+config = load_gemini_config(config_file)
+config['annotation_dir'] = 'gemini/data'
+write_gemini_config(config, config_file)
 if __name__ == "__main__":
 main()

Mercurial > repos > iuc > data_manager_gemini_database_downloader

comparison data_manager/data_manager_gemini_download.py @ 9:27a6a256cd23 draft default tip