Repository 'data_manager_rsync_g2'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/data_manager_rsync_g2

Changeset 0:0a3a6f862104 (2015-10-14)
Next changeset 1:8ff92bd7e2a3 (2017-04-04)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_rsync_g2 commit 704060ebdf7399ecce9e0e8bd7262727fe750c27-dirty
added:
README
data_manager/data_manager_rsync.py
data_manager/data_manager_rsync.xml
data_manager_conf.xml
test-data/sacCer2_rsync_all_fasta.data_manager_json
tool_data_table_conf.xml.sample
b
diff -r 000000000000 -r 0a3a6f862104 README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README Wed Oct 14 13:48:12 2015 -0400
b
@@ -0,0 +1,1 @@
+This Data Manager will connect to the Galaxy Project's rsync server to install reference data.
b
diff -r 000000000000 -r 0a3a6f862104 data_manager/data_manager_rsync.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_rsync.py Wed Oct 14 13:48:12 2015 -0400
[
b'@@ -0,0 +1,390 @@\n+#!/usr/bin/env python\n+#Dan Blankenberg\n+\n+import sys\n+import os\n+import tempfile\n+import shutil\n+import optparse\n+import urllib2\n+import subprocess\n+import datetime\n+from os.path import basename\n+from json import loads, dumps\n+from xml.etree.ElementTree import tostring\n+\n+import logging\n+_log_name = __name__\n+if _log_name == \'__builtin__\':\n+    _log_name = \'toolshed.installed.g2.rsync.data.manager\'\n+log = logging.getLogger( _log_name )\n+\n+# Get the Data from the Galaxy Project rsync server\n+RSYNC_CMD = \'rsync\'\n+RSYNC_SERVER = "rsync://datacache.g2.bx.psu.edu/"\n+LOCATION_DIR = "location"\n+INDEX_DIR = "indexes"\n+\n+# Pull the Tool Data Table files from github\n+# FIXME: These files should be accessible from the rsync server directly.\n+TOOL_DATA_TABLE_CONF_XML_URLS = { \'main\':"https://raw.githubusercontent.com/galaxyproject/usegalaxy-playbook/master/files/galaxy/usegalaxy.org/config/tool_data_table_conf.xml",\n+                                  \'test\':"https://raw.githubusercontent.com/galaxyproject/usegalaxy-playbook/master/files/galaxy/test.galaxyproject.org/config/tool_data_table_conf.xml" }\n+\n+# Replace data table source entries with local temporary location\n+GALAXY_DATA_CANONICAL_PATH = "/galaxy/data/"\n+TOOL_DATA_TABLE_CONF_XML_REPLACE_SOURCE = \'<file path="%slocation/\' % ( GALAXY_DATA_CANONICAL_PATH )\n+TOOL_DATA_TABLE_CONF_XML_REPLACE_TARGET = \'<file path="%s/\'\n+\n+# Some basic Caching, so we don\'t have to reload and download everything every time\n+CACHE_TIME = datetime.timedelta( minutes=10 )\n+TOOL_DATA_TABLES_LOADED_BY_URL = {}\n+\n+# Entries will not be selected by default\n+DEFAULT_SELECTED = False\n+\n+# Exclude data managers without \'path\' column or that are in the manual exclude list\n+PATH_COLUMN_NAMES = [\'path\']\n+EXCLUDE_DATA_TABLES = []\n+# TODO: Make additional handler actions available for tables that can\'t fit into the the basic\n+# "take the value of path" as a dir and copy contents.\n+# e.g. mafs. Although this maf table is goofy and doesn\'t have path defined in <table> def,\n+# but it does exit in the .loc.\n+\n+# --- These methods are called by/within the Galaxy Application\n+\n+def exec_before_job( app, inp_data, out_data, param_dict, tool=None, **kwd ):\n+    # Look for any data tables that haven\'t been defined for this data manager before and dynamically add them to Galaxy\n+    param_dict = dict( **param_dict )\n+    param_dict[\'data_table_entries\'] = param_dict.get( \'data_table_entries\', [] )\n+    if not isinstance( param_dict[\'data_table_entries\'], list ):\n+        param_dict[\'data_table_entries\'] = [param_dict[\'data_table_entries\']]\n+    param_dict[\'data_table_entries\'] = ",".join( param_dict[\'data_table_entries\'] )\n+    if tool:\n+        tool_shed_repository = tool.tool_shed_repository\n+    else:\n+        tool_shed_repository = None\n+    tdtm = None\n+    data_manager = app.data_managers.get_manager( tool.data_manager_id, None )\n+    data_table_entries = get_data_table_entries( param_dict )\n+    data_tables = load_data_tables_from_url( data_table_class=app.tool_data_tables.__class__ ).get( \'data_tables\' )\n+    for data_table_name, entries in data_table_entries.iteritems():\n+        #get data table managed by this data Manager\n+        has_data_table = app.tool_data_tables.get_tables().get( data_table_name )\n+        if has_data_table:\n+            has_data_table = bool( has_data_table.get_filename_for_source( data_manager, None ) )\n+        if not has_data_table:\n+            if tdtm is None:\n+                from tool_shed.tools import data_table_manager\n+                tdtm = data_table_manager.ToolDataTableManager( app )\n+                target_dir, tool_path, relative_target_dir = tdtm.get_target_install_dir( tool_shed_repository )\n+            #Dynamically add this data table\n+            log.debug( "Attempting to dynamically create a missing Tool Data Table named %s." % data_table_name )\n+            data_table = data_tables[data_table_name]\n+            repo_info = tdtm.generate_repository_in'..b'A_CANONICAL_PATH ):]\n+    make_path = path\n+    rsync_source = rsync_urljoin( rsync_urljoin( RSYNC_SERVER, INDEX_DIR ), path )\n+    if rsync_source.endswith( \'/\' ):\n+        rsync_source = rsync_source[:-1]\n+    try:\n+        dir_list = rsync_list_dir( rsync_source + "/" )\n+    except Exception, e:\n+        dir_list = None\n+    while not dir_list or \'.\' not in dir_list:\n+        head, tail = os.path.split( make_path )\n+        if not head:\n+            head = tail\n+        make_path = head\n+        rsync_source = rsync_urljoin( rsync_urljoin( RSYNC_SERVER, INDEX_DIR ), head ) #if we error here, likely due to a connection issue\n+        if rsync_source.endswith( \'/\' ):\n+            rsync_source = rsync_source[:-1]\n+        dir_list = rsync_list_dir( rsync_source + "/" )\n+    split_path = split_path_all( make_path )\n+    target_path = data_root_dir\n+    for p in split_path[:-1]:\n+        target_path = os.path.join( target_path, p )\n+        if not os.path.exists( target_path ):\n+            os.mkdir( target_path )\n+    rsync_sync_to_dir( rsync_source, target_path )\n+    return path\n+\n+def get_data_and_munge_path( data_table_name, data_table_entry, data_root_dir ):\n+    path_cols = []\n+    for key, value in data_table_entry.iteritems():\n+        if key in PATH_COLUMN_NAMES:\n+            path_cols.append( ( key, value ) )\n+    found_data = False\n+    if path_cols:\n+        for col_name, value in path_cols:\n+            #GALAXY_DATA_CANONICAL_PATH\n+            if value.startswith( GALAXY_DATA_CANONICAL_PATH ):\n+                data_table_entry[col_name] = get_data_for_path( value, data_root_dir )\n+                found_data = True\n+            else:\n+                print \'unable to determine location of rsync data for\', data_table_name, data_table_entry\n+    return data_table_entry\n+\n+def fulfill_data_table_entries( data_table_entries, data_manager_dict, data_root_dir ):\n+    for data_table_name, entries in data_table_entries.iteritems():\n+        for entry in entries:\n+            entry = get_data_and_munge_path( data_table_name, entry, data_root_dir )\n+            _add_data_table_entry( data_manager_dict, data_table_name, entry )\n+    return data_manager_dict\n+\n+def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ):\n+    data_manager_dict[\'data_tables\'] = data_manager_dict.get( \'data_tables\', {} )\n+    data_manager_dict[\'data_tables\'][data_table_name] = data_manager_dict[\'data_tables\'].get( data_table_name, [] )\n+    data_manager_dict[\'data_tables\'][data_table_name].append( data_table_entry )\n+    return data_manager_dict\n+\n+def cleanup_before_exit( tmp_dir ):\n+    if tmp_dir and os.path.exists( tmp_dir ):\n+        shutil.rmtree( tmp_dir )\n+\n+def get_data_table_entries( params ):\n+    rval = {}\n+    data_table_entries = params.get( \'data_table_entries\', None )\n+    if data_table_entries :\n+        for entry_text in data_table_entries.split( \',\' ):\n+            entry_text = entry_text.strip().decode( \'base64\' )\n+            entry_dict = loads( entry_text )\n+            data_table_name = entry_dict[\'name\']\n+            data_table_entry = entry_dict[\'entry\']\n+            rval[ data_table_name ] = rval.get( data_table_name, [] )\n+            rval[ data_table_name ].append( data_table_entry )\n+    return rval\n+\n+def main():\n+    #Parse Command Line\n+    parser = optparse.OptionParser()\n+    (options, args) = parser.parse_args()\n+    \n+    filename = args[0]\n+    \n+    params = loads( open( filename ).read() )\n+    target_directory = params[ \'output_data\' ][0][\'extra_files_path\']\n+    os.mkdir( target_directory )\n+    data_manager_dict = {}\n+    \n+    data_table_entries = get_data_table_entries( params[\'param_dict\'] )\n+    \n+    # Populate the data Tables\n+    data_manager_dict = fulfill_data_table_entries( data_table_entries, data_manager_dict, target_directory )\n+    \n+    #save info to json file\n+    open( filename, \'wb\' ).write( dumps( data_manager_dict ) )\n+        \n+if __name__ == "__main__": main()\n'
b
diff -r 000000000000 -r 0a3a6f862104 data_manager/data_manager_rsync.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_rsync.xml Wed Oct 14 13:48:12 2015 -0400
b
@@ -0,0 +1,48 @@
+<tool id="data_manager_rsync_g2" name="Rsync with g2" version="0.0.1" tool_type="manage_data">
+    <options sanitize="False" />
+    <description>fetching</description>
+    <command interpreter="python">data_manager_rsync.py "${out_file}"</command>
+    <stdio>
+        <exit_code range="1:" err_level="fatal" />
+        <exit_code range=":-1" err_level="fatal" />
+    </stdio>
+    <inputs>
+        
+        <param name="dbkey" type="genomebuild" label="dbkey to search for Reference Data" help="Specify ? to show all"/>
+
+        <param name="data_table_names" type="select" display="checkboxes" multiple="True" optional="True" 
+        label="Choose Desired Data Tables" dynamic_options="galaxy_code_get_available_data_tables( __trans__ )"
+        refresh_on_change="dbkey"/>
+        
+        
+        <param name="data_table_entries" type="select" display="checkboxes" multiple="True" optional="False" 
+        label="Choose Desired Data Tables Entries" dynamic_options="galaxy_code_get_available_data_tables_entries( __trans__, dbkey, data_table_names )"
+        refresh_on_change="dbkey"/>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json" dbkey="dbkey"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="dbkey" value="sacCer2"/>
+            <param name="data_table_names" value="all_fasta"/>
+            <param name="data_table_entries" value="eyJlbnRyeSI6IHsicGF0aCI6ICIvZ2FsYXh5L2RhdGEvc2FjQ2VyMi9zZXEvc2FjQ2VyMi5mYSIs ICJkYmtleSI6ICJzYWNDZXIyIiwgInZhbHVlIjogInNhY0NlcjIiLCAibmFtZSI6ICJZZWFzdCAo U2FjY2hhcm9teWNlcyBjZXJldmlzaWFlKTogc2FjQ2VyMiJ9LCAibmFtZSI6ICJhbGxfZmFzdGEi fQ=="/>
+            <output name="out_file" file="sacCer2_rsync_all_fasta.data_manager_json"/>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+This tool connects to the Galaxy Project's rsync reference data repository to download data and populate tool data tables.
+
+------
+
+
+
+.. class:: infomark
+
+**Notice:** If you do not have a particular data table defined, then it will be created and persisted dynamically. 
+
+    </help>
+    <code file="data_manager_rsync.py" />
+</tool>
b
diff -r 000000000000 -r 0a3a6f862104 data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Wed Oct 14 13:48:12 2015 -0400
b
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/data_manager_rsync.xml" id="rsync_data_manager_g2" undeclared_tables="True">
+       
+    </data_manager>
+</data_managers>
+
b
diff -r 000000000000 -r 0a3a6f862104 test-data/sacCer2_rsync_all_fasta.data_manager_json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sacCer2_rsync_all_fasta.data_manager_json Wed Oct 14 13:48:12 2015 -0400
[
@@ -0,0 +1,1 @@
+{"data_tables": {"all_fasta": [{"path": "sacCer2/seq/sacCer2.fa", "value": "sacCer2", "dbkey": "sacCer2", "name": "Yeast (Saccharomyces cerevisiae): sacCer2"}]}}
\ No newline at end of file
b
diff -r 000000000000 -r 0a3a6f862104 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Oct 14 13:48:12 2015 -0400
b
@@ -0,0 +1,2 @@
+<tables>
+</tables>