changeset 4:fe5a9a7d95b0 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_gemini_database_downloader commit 283362494058ed64143b1f27afb447b8a1cb4313
author iuc
date Fri, 14 Dec 2018 12:40:15 -0500
parents 172815da3d41
children b4b2b284230a
files data_manager/data_manager_gemini_download.py data_manager/data_manager_gemini_download.xml data_manager_conf.xml tool-data/gemini_databases.loc.sample tool-data/gemini_versioned_databases.loc.sample tool_data_table_conf.xml.sample
diffstat 6 files changed, 61 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/data_manager_gemini_download.py	Tue Apr 04 18:09:05 2017 -0400
+++ b/data_manager/data_manager_gemini_download.py	Fri Dec 14 12:40:15 2018 -0500
@@ -6,18 +6,51 @@
 import subprocess
 import sys
 
+import yaml
+
 
 def main():
     today = datetime.date.today()
     params = json.loads( open( sys.argv[1] ).read() )
     target_directory = params[ 'output_data' ][0]['extra_files_path']
     os.mkdir( target_directory )
-    cmd = "gemini --annotation-dir %s update --dataonly %s %s" % (target_directory, params['param_dict']['gerp_bp'], params['param_dict']['cadd'] )
-    subprocess.check_call( cmd, shell=True )
+    # The target_directory needs to be specified twice for the following
+    # invocation of gemini.
+    # In essence, the GEMINI_CONFIG environment variable makes gemini store
+    # its yaml configuration file in that directory, while the
+    # --annotation-dir argument makes it write the same path into the yaml
+    # file, which is then used for determining where the actual annotation
+    # files should be stored.
+    gemini_env = os.environ.copy()
+    gemini_env['GEMINI_CONFIG'] = target_directory
+    cmd = "gemini --annotation-dir %s update --dataonly %s %s" % (
+        target_directory,
+        params['param_dict']['gerp_bp'],
+        params['param_dict']['cadd']
+    )
+    subprocess.check_call( cmd, shell=True, env=gemini_env )
+
+    # modify the newly created gemini config file to contain a relative
+    # annotation dir path, which will be interpreted as relative to
+    # the job working directory at runtime by any gemini tool
+    config_file = os.path.join(target_directory, 'gemini-config.yaml')
+    with open(config_file) as fi:
+        config = yaml.load(fi)
+    config['annotation_dir'] = 'gemini/data'
+    with open(config_file, 'w') as fo:
+        yaml.dump(config, fo, allow_unicode=False, default_flow_style=False)
+
     data_manager_dict = {
         'data_tables': {
-            'gemini_databases': [
-                {'value': today.isoformat(), 'dbkey': 'hg19', 'name': 'GEMINI annotations (%s)' % today.isoformat(), 'path': './%s' % today.isoformat() }
+            'gemini_versioned_databases': [
+                {
+                    'value': today.isoformat(),
+                    'dbkey': 'hg19',
+                    'version': params['param_dict']['gemini_db_version'],
+                    'name':
+                        'GEMINI annotations (%s snapshot)' % today.isoformat(),
+                    'path': './%s' % today.isoformat()
+                }
             ]
         }
     }
--- a/data_manager/data_manager_gemini_download.xml	Tue Apr 04 18:09:05 2017 -0400
+++ b/data_manager/data_manager_gemini_download.xml	Fri Dec 14 12:40:15 2018 -0500
@@ -1,7 +1,11 @@
-<tool id="data_manager_gemini_download" name="GEMINI Download" version="0.18.1" tool_type="manage_data">
-    <description>Download a new database</description>
+<tool id="data_manager_gemini_download" name="GEMINI Download" version="@VERSION@.1" tool_type="manage_data">
+    <description>the annotation files required by the GEMINI suite of tools</description>
+    <macros>
+        <token name="@VERSION@">0.18.1</token>
+        <token name="@DB_VERSION@">181</token>
+    </macros>
     <requirements>
-        <requirement type="package" version="0.18.1">gemini</requirement>
+        <requirement type="package" version="@VERSION@">gemini</requirement>
     </requirements>
     <command detect_errors="exit_code">
         python '$__tool_directory__/data_manager_gemini_download.py' '$out_file'
@@ -11,6 +15,7 @@
             label="Download CADD scores for GEMINI database annotation" help="(--extra cadd_score)"/>
         <param name="gerp_bp" type="boolean" truevalue="--extra gerp_bp" falsevalue="" checked="True"
             label="Download GERP for GEMINI database annotation" help="(--extra gerp_bp)"/>
+        <param name="gemini_db_version" type="hidden" value="@DB_VERSION@"/>
     </inputs>
     <outputs>
         <data name="out_file" format="data_manager_json" label="${tool.name}"/>
@@ -18,9 +23,13 @@
     <tests>
     </tests>
     <help>
-This tool downloads the GEMINI databases.
+This tool downloads the GEMINI annotation files and makes them available to
+corresponding versions of the GEMINI suite of tools. Downloading the (very large) CADD_ scores and GERP_ annotation files is optional.
 
-For details about this tool, please go to http://gemini.readthedocs.org
+Please visit http://gemini.readthedocs.io for details about GEMINI.
+
+.. _GERP: http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html
+.. _CADD: https://cadd.gs.washington.edu/
     </help>
     <citations>
         <citation type="doi">10.1371/journal.pcbi.1003153</citation>
--- a/data_manager_conf.xml	Tue Apr 04 18:09:05 2017 -0400
+++ b/data_manager_conf.xml	Fri Dec 14 12:40:15 2018 -0500
@@ -1,16 +1,17 @@
 <?xml version="1.0"?>
 <data_managers>
     <data_manager tool_file="data_manager/data_manager_gemini_download.xml" id="data_manager_gemini_download" >
-        <data_table name="gemini_databases">  <!-- Defines a Data Table to be modified. -->
+        <data_table name="gemini_versioned_databases">  <!-- Defines a Data Table to be modified. -->
             <output> <!-- Handle the output of the Data Manager Tool -->
                 <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool -->
                 <column name="dbkey" /> <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="version" /> <!-- columns that are going to be specified by the Data Manager Tool -->
                 <column name="name" />  <!-- columns that are going to be specified by the Data Manager Tool -->
                 <column name="path" output_ref="out_file" >
                     <move type="directory" relativize_symlinks="True">
-                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">gemini/data/${dbkey}/${value}</target>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">gemini/${version}/${dbkey}/${value}</target>
                     </move>
-                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/gemini/data/${dbkey}/${value}/gemini/data/</value_translation>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/gemini/${version}/${dbkey}/${value}/</value_translation>
                     <value_translation type="function">abspath</value_translation>
                 </column>
             </output>
--- a/tool-data/gemini_databases.loc.sample	Tue Apr 04 18:09:05 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-## GEMINI databases
-#Version	dbkey	Description
-#08_08_2014	hg19	Database (08-08-2014)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gemini_versioned_databases.loc.sample	Fri Dec 14 12:40:15 2018 -0500
@@ -0,0 +1,3 @@
+## GEMINI versioned databases
+#DownloadDate	dbkey	DBversion	Description
+#2018-07-08	hg19	181	GEMINI annotations (2018-07-08 snapshot)
--- a/tool_data_table_conf.xml.sample	Tue Apr 04 18:09:05 2017 -0400
+++ b/tool_data_table_conf.xml.sample	Fri Dec 14 12:40:15 2018 -0500
@@ -1,7 +1,7 @@
 <tables>
-    <table name="gemini_databases" comment_char="#" allow_duplicate_entries="False">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/gemini_databases.loc" />
+    <table name="gemini_versioned_databases" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, version, name, path</columns>
+        <file path="tool-data/gemini_versioned_databases.loc" />
     </table>
 </tables>