view data_manager/data_manager_gemini_download.py @ 5:b4b2b284230a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_gemini_database_downloader commit 9981ac1338c91a3ab46557ce6b821da3d5b4bc86
author iuc
date Wed, 19 Dec 2018 12:36:22 -0500
parents fe5a9a7d95b0
children f57426daa04d
line wrap: on
line source

#!/usr/bin/env python

import datetime
import json
import os
import subprocess
import sys

import yaml


def write_gemini_config(config, config_file):
    with open(config_file, 'w') as fo:
        yaml.dump(config, fo, allow_unicode=False, default_flow_style=False)


def main():
    today = datetime.date.today()
    params = json.loads( open( sys.argv[1] ).read() )
    target_directory = params[ 'output_data' ][0]['extra_files_path']
    os.mkdir( target_directory )

    # Generate a minimal configuration file for GEMINI update
    # to instruct the tool to download the annotation data into a
    # subfolder of the target directory.
    config_file = os.path.join(target_directory, 'gemini-config.yaml')
    anno_dir = os.path.join(target_directory, 'gemini/data')
    gemini_bootstrap_config = {'annotation_dir': anno_dir}
    write_gemini_config(gemini_bootstrap_config, config_file)

    # Now gemini update can be called to download the data.
    # The GEMINI_CONFIG environment variable lets the tool discover
    # the configuration file we prepared for it.
    # Note that the tool will rewrite the file turning it into a
    # complete gemini configuration file.
    gemini_env = os.environ.copy()
    gemini_env['GEMINI_CONFIG'] = target_directory
    cmd = "gemini update --dataonly %s %s" % (
        params['param_dict']['gerp_bp'],
        params['param_dict']['cadd']
    )
    subprocess.check_call( cmd, shell=True, env=gemini_env )

    # GEMINI tool wrappers that need access to the annotation files
    # are supposed to symlink them into a gemini/data subfolder of
    # the job working directory. To have GEMINI discover them there,
    # we need to set this location as the 'annotation_dir' in the
    # configuration file.
    with open(config_file) as fi:
        config = yaml.load(fi)
    config['annotation_dir'] = 'gemini/data'
    write_gemini_config(config, config_file)

    # Finally, we prepare the metadata for the new data table record ...
    data_manager_dict = {
        'data_tables': {
            'gemini_versioned_databases': [
                {
                    'value': today.isoformat(),
                    'dbkey': 'hg19',
                    'version': params['param_dict']['gemini_db_version'],
                    'name':
                        'GEMINI annotations (%s snapshot)' % today.isoformat(),
                    'path': './%s' % today.isoformat()
                }
            ]
        }
    }

    # ... and save it to the json results file
    with open( sys.argv[1], 'wb' ) as out:
        out.write( json.dumps( data_manager_dict ) )


if __name__ == "__main__":
    main()