Mercurial > repos > devteam > data_manager_rsync_g2

--- a/data_manager/data_manager_rsync.py	Wed Oct 14 13:48:12 2015 -0400
+++ b/data_manager/data_manager_rsync.py	Tue Apr 04 18:13:26 2017 -0400
@@ -1,19 +1,27 @@
 #!/usr/bin/env python
-#Dan Blankenberg
+# Dan Blankenberg
+from __future__ import print_function

-import sys
-import os
-import tempfile
-import shutil
+import datetime
+import logging
 import optparse
-import urllib2
+import os
+import shutil
 import subprocess
-import datetime
+import tempfile
+from json import (
+    dumps,
+    loads
+)
 from os.path import basename
-from json import loads, dumps
 from xml.etree.ElementTree import tostring
+try:
+    # For Python 3.0 and later
+    from urllib.request import urlopen
+except ImportError:
+    # Fall back to Python 2 imports
+    from urllib2 import urlopen

-import logging
 _log_name = __name__
 if _log_name == '__builtin__':
     _log_name = 'toolshed.installed.g2.rsync.data.manager'
@@ -27,8 +35,8 @@

 # Pull the Tool Data Table files from github
 # FIXME: These files should be accessible from the rsync server directly.
-TOOL_DATA_TABLE_CONF_XML_URLS = { 'main':"https://raw.githubusercontent.com/galaxyproject/usegalaxy-playbook/master/files/galaxy/usegalaxy.org/config/tool_data_table_conf.xml",
-                                  'test':"https://raw.githubusercontent.com/galaxyproject/usegalaxy-playbook/master/files/galaxy/test.galaxyproject.org/config/tool_data_table_conf.xml" }
+TOOL_DATA_TABLE_CONF_XML_URLS = {'main': "https://raw.githubusercontent.com/galaxyproject/usegalaxy-playbook/master/files/galaxy/usegalaxy.org/config/tool_data_table_conf.xml",
+                                 'test': "https://raw.githubusercontent.com/galaxyproject/usegalaxy-playbook/master/files/galaxy/test.galaxyproject.org/config/tool_data_table_conf.xml" }

 # Replace data table source entries with local temporary location
 GALAXY_DATA_CANONICAL_PATH = "/galaxy/data/"
@@ -50,8 +58,8 @@
 # e.g. mafs. Although this maf table is goofy and doesn't have path defined in <table> def,
 # but it does exit in the .loc.

+
 # --- These methods are called by/within the Galaxy Application
-
 def exec_before_job( app, inp_data, out_data, param_dict, tool=None, **kwd ):
     # Look for any data tables that haven't been defined for this data manager before and dynamically add them to Galaxy
     param_dict = dict( **param_dict )
@@ -67,8 +75,8 @@
     data_manager = app.data_managers.get_manager( tool.data_manager_id, None )
     data_table_entries = get_data_table_entries( param_dict )
     data_tables = load_data_tables_from_url( data_table_class=app.tool_data_tables.__class__ ).get( 'data_tables' )
-    for data_table_name, entries in data_table_entries.iteritems():
-        #get data table managed by this data Manager
+    for data_table_name, entries in data_table_entries.items():
+        # get data table managed by this data Manager
         has_data_table = app.tool_data_tables.get_tables().get( data_table_name )
         if has_data_table:
             has_data_table = bool( has_data_table.get_filename_for_source( data_manager, None ) )
@@ -77,7 +85,7 @@
                 from tool_shed.tools import data_table_manager
                 tdtm = data_table_manager.ToolDataTableManager( app )
                 target_dir, tool_path, relative_target_dir = tdtm.get_target_install_dir( tool_shed_repository )
-            #Dynamically add this data table
+            # Dynamically add this data table
             log.debug( "Attempting to dynamically create a missing Tool Data Table named %s." % data_table_name )
             data_table = data_tables[data_table_name]
             repo_info = tdtm.generate_repository_info_elem_from_repository( tool_shed_repository, parent_elem=None )
@@ -89,21 +97,23 @@
             app.tool_data_tables.add_new_entries_from_config_file( tmp_file.name, None, app.config.shed_tool_data_table_config, persist=True )
             tmp_file.close()

+
 def galaxy_code_get_available_data_tables( trans ):
-    #list of data tables
+    # list of data tables
     found_tables = get_available_tables( trans )
-    rval = map( lambda x: ( ( x, x, DEFAULT_SELECTED ) ), found_tables )
+    rval = [ ( x, x, DEFAULT_SELECTED ) for x in found_tables]
     return rval

+
 def galaxy_code_get_available_data_tables_entries( trans, dbkey, data_table_names ):
-    #available entries, optionally filtered by dbkey and table names
+    # available entries, optionally filtered by dbkey and table names
     if dbkey in [ None, '', '?' ]:
         dbkey = None
     if data_table_names in [ None, '', '?' ]:
         data_table_names = None
     found_tables = get_available_tables_for_dbkey( trans, dbkey, data_table_names )
     dbkey_text = '(%s) ' % ( dbkey ) if dbkey else ''
-    rval = map( lambda x: ( "%s%s" % ( dbkey_text, x[0] ), dumps( dict( name=x[0].split( ': ' )[0], entry=x[1] ) ).encode( 'base64' ).rstrip(), DEFAULT_SELECTED ), found_tables.items() )
+    rval = [( "%s%s" % ( dbkey_text, x[0] ), dumps( dict( name=x[0].split( ': ' )[0], entry=x[1] ) ).encode( 'base64' ).rstrip(), DEFAULT_SELECTED ) for x in found_tables.items()]
     return rval

 # --- End Galaxy called Methods ---
@@ -116,8 +126,9 @@
     url = url.lstrip( '/' )
     return "%s/%s" % ( base, url )

+
 def rsync_list_dir( server, dir=None, skip_names=[] ):
-    #drwxr-xr-x          50 2014/05/16 20:58:11 .
+    # drwxr-xr-x          50 2014/05/16 20:58:11 .
     if dir:
         dir = rsync_urljoin( server, dir )
     else:
@@ -153,6 +164,7 @@
     rsync_response.close()
     return rval

+
 def rsync_sync_to_dir( source, target ):
     rsync_response = tempfile.NamedTemporaryFile()
     rsync_stderr = tempfile.NamedTemporaryFile()
@@ -176,31 +188,31 @@
     if cached_data_table is None:
         return True, {}
     if datetime.datetime.now() - cached_data_table.get( 'time_loaded' ) > CACHE_TIME:
-        data_table_text = urllib2.urlopen( url ).read()
+        data_table_text = urlopen( url ).read()
         if cached_data_table.get( 'data_table_text', None ) != data_table_text:
-            return True, {'data_table_text':data_table_text}
+            return True, {'data_table_text': data_table_text}
         loc_file_attrs = rsync_list_dir( RSYNC_SERVER, LOCATION_DIR )
-        if cached_data_table.get( 'loc_file_attrs', None ) !=  loc_file_attrs:
-            return True, {'loc_file_attrs':loc_file_attrs}
+        if cached_data_table.get( 'loc_file_attrs', None ) != loc_file_attrs:
+            return True, {'loc_file_attrs': loc_file_attrs}
     return False, {}

+
 def load_data_tables_from_url( url=None, site='main', data_table_class=None  ):
     if not url:
         url = TOOL_DATA_TABLE_CONF_XML_URLS.get( site, None )
     assert url, ValueError( 'You must provide either a URL or a site=name.' )
-
+
     cached_data_table = TOOL_DATA_TABLES_LOADED_BY_URL.get( url, None )
     refresh, attribs = data_table_needs_refresh( cached_data_table, url )
     if refresh:
-        data_table_text = attribs.get( 'data_table_text' )or urllib2.urlopen( url ).read()
+        data_table_text = attribs.get( 'data_table_text' )or urlopen( url ).read()
         loc_file_attrs = attribs.get( 'loc_file_attrs' ) or rsync_list_dir( RSYNC_SERVER, LOCATION_DIR )
-
+
         tmp_dir = tempfile.mkdtemp( prefix='rsync_g2_' )
         tmp_loc_dir = os.path.join( tmp_dir, 'location' )
         os.mkdir( tmp_loc_dir )
         rsync_sync_to_dir( rsync_urljoin( RSYNC_SERVER, LOCATION_DIR ), os.path.abspath( tmp_loc_dir ) )
-
-
+
         new_data_table_text = data_table_text.replace( TOOL_DATA_TABLE_CONF_XML_REPLACE_SOURCE, TOOL_DATA_TABLE_CONF_XML_REPLACE_TARGET % ( tmp_loc_dir ) )
         data_table_fh = tempfile.NamedTemporaryFile( dir=tmp_dir, prefix='rysnc_data_manager_data_table_conf_' )
         data_table_fh.write( new_data_table_text )
@@ -208,17 +220,18 @@
         tmp_data_dir = os.path.join( tmp_dir, 'tool-data' )
         os.mkdir( tmp_data_dir )
         data_tables = data_table_class( tmp_data_dir, config_filename=data_table_fh.name )
-        for name, data_table in data_tables.data_tables.items():
+        for name, data_table in list(data_tables.data_tables.items()):
             if name in EXCLUDE_DATA_TABLES or not data_table_has_path_column( data_table ):
                 log.debug( 'Removing data table "%s" because it is excluded by name or does not have a defined "path" column.', name )
                 del data_tables.data_tables[name]
         cached_data_table = { 'data_tables': data_tables, 'tmp_dir': tmp_dir, 'data_table_text': data_table_text, 'tmp_loc_dir': tmp_loc_dir, 'loc_file_attrs': loc_file_attrs, 'time_loaded': datetime.datetime.now() }
         TOOL_DATA_TABLES_LOADED_BY_URL[ url ] = cached_data_table
-        #delete the files
+        # delete the files
         data_table_fh.close()
         cleanup_before_exit( tmp_dir )
     return cached_data_table

+
 def data_table_has_path_column( data_table ):
     col_names = data_table.get_column_name_list()
     for name in PATH_COLUMN_NAMES:
@@ -226,10 +239,12 @@
             return True
     return False

+
 def get_available_tables( trans ):
-    #list of data tables
+    # list of data tables
     data_tables = load_data_tables_from_url( data_table_class=trans.app.tool_data_tables.__class__ )
-    return data_tables.get( 'data_tables' ).get_tables().keys()
+    return list(data_tables.get( 'data_tables' ).get_tables().keys())
+

 def get_new_xml_definition( app, data_table, data_manager, repo_info=None, location_file_dir=None ):
     sub_dict = { 'table_name': data_table.name, 'comment_char': '', 'columns': '', 'file_path': '' }
@@ -242,7 +257,7 @@
     location_file_dir = location_file_dir or app.config.galaxy_data_manager_data_path
     for filename in data_table.filenames.keys():
         sub_dict['file_path'] = basename( filename )
-        sub_dict['file_path'] = os.path.join( location_file_dir, sub_dict['file_path'] ) #os.path.abspath?
+        sub_dict['file_path'] = os.path.join( location_file_dir, sub_dict['file_path'] )  # os.path.abspath?
         if not os.path.exists( sub_dict['file_path'] ):
             # Create empty file
             open( sub_dict['file_path'], 'wb+' ).close()
@@ -256,22 +271,23 @@
             </table></tables>
            """ % sub_dict

+
 def get_available_tables_for_dbkey( trans, dbkey, data_table_names ):
-    my_data_tables = trans.app.tool_data_tables.get_tables()
     data_tables = load_data_tables_from_url( data_table_class=trans.app.tool_data_tables.__class__ )
     rval = {}
-    for name, data_table in data_tables.get( 'data_tables' ).get_tables().iteritems():
-        if ( not data_table_names or name in data_table_names ): #name in my_data_tables.keys() and
-            #TODO: check that columns are similiar
+    for name, data_table in data_tables.get( 'data_tables' ).get_tables().items():
+        if ( not data_table_names or name in data_table_names ):
+            # TODO: check that columns are similiar
             if not dbkey:
                 entry_getter = data_table.get_named_fields_list()
             else:
                 entry_getter = data_table.get_entries( 'dbkey', dbkey, None, default=[] )
             for entry in entry_getter:
-                 name = "%s: %s" % ( data_table.name, dumps( entry ) )
-                 rval[name] = entry
+                name = "%s: %s" % ( data_table.name, dumps( entry ) )
+                rval[name] = entry
     return rval

+
 def split_path_all( path ):
     rval = []
     path = path.rstrip( '/' )
@@ -286,12 +302,13 @@
         else:
             break
     rval.reverse()
-    return rval
+    return rval
+

 def get_data_for_path( path, data_root_dir ):
     # We list dir with a /, but copy data without
     # listing with / gives a . entry when its a dir
-    # cloning without the / will copy that whole directory into the target,
+    # cloning without the / will copy that whole directory into the target,
     # instead of just that target's contents
     if path.startswith( GALAXY_DATA_CANONICAL_PATH ):
         path = path[ len( GALAXY_DATA_CANONICAL_PATH ):]
@@ -301,14 +318,14 @@
         rsync_source = rsync_source[:-1]
     try:
         dir_list = rsync_list_dir( rsync_source + "/" )
-    except Exception, e:
+    except Exception:
         dir_list = None
     while not dir_list or '.' not in dir_list:
         head, tail = os.path.split( make_path )
         if not head:
             head = tail
         make_path = head
-        rsync_source = rsync_urljoin( rsync_urljoin( RSYNC_SERVER, INDEX_DIR ), head ) #if we error here, likely due to a connection issue
+        rsync_source = rsync_urljoin( rsync_urljoin( RSYNC_SERVER, INDEX_DIR ), head )  # if we error here, likely due to a connection issue
         if rsync_source.endswith( '/' ):
             rsync_source = rsync_source[:-1]
         dir_list = rsync_list_dir( rsync_source + "/" )
@@ -321,43 +338,45 @@
     rsync_sync_to_dir( rsync_source, target_path )
     return path

+
 def get_data_and_munge_path( data_table_name, data_table_entry, data_root_dir ):
     path_cols = []
-    for key, value in data_table_entry.iteritems():
+    for key, value in data_table_entry.items():
         if key in PATH_COLUMN_NAMES:
             path_cols.append( ( key, value ) )
-    found_data = False
     if path_cols:
         for col_name, value in path_cols:
-            #GALAXY_DATA_CANONICAL_PATH
             if value.startswith( GALAXY_DATA_CANONICAL_PATH ):
                 data_table_entry[col_name] = get_data_for_path( value, data_root_dir )
-                found_data = True
             else:
-                print 'unable to determine location of rsync data for', data_table_name, data_table_entry
+                print('unable to determine location of rsync data for', data_table_name, data_table_entry)
     return data_table_entry

+
 def fulfill_data_table_entries( data_table_entries, data_manager_dict, data_root_dir ):
-    for data_table_name, entries in data_table_entries.iteritems():
+    for data_table_name, entries in data_table_entries.items():
         for entry in entries:
             entry = get_data_and_munge_path( data_table_name, entry, data_root_dir )
             _add_data_table_entry( data_manager_dict, data_table_name, entry )
     return data_manager_dict

+
 def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ):
     data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
     data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get( data_table_name, [] )
     data_manager_dict['data_tables'][data_table_name].append( data_table_entry )
     return data_manager_dict

+
 def cleanup_before_exit( tmp_dir ):
     if tmp_dir and os.path.exists( tmp_dir ):
         shutil.rmtree( tmp_dir )

+
 def get_data_table_entries( params ):
     rval = {}
     data_table_entries = params.get( 'data_table_entries', None )
-    if data_table_entries :
+    if data_table_entries:
         for entry_text in data_table_entries.split( ',' ):
             entry_text = entry_text.strip().decode( 'base64' )
             entry_dict = loads( entry_text )
@@ -367,24 +386,26 @@
             rval[ data_table_name ].append( data_table_entry )
     return rval

+
 def main():
-    #Parse Command Line
     parser = optparse.OptionParser()
     (options, args) = parser.parse_args()
-
+
     filename = args[0]
-
+
     params = loads( open( filename ).read() )
     target_directory = params[ 'output_data' ][0]['extra_files_path']
     os.mkdir( target_directory )
     data_manager_dict = {}
-
+
     data_table_entries = get_data_table_entries( params['param_dict'] )
-
+
     # Populate the data Tables
     data_manager_dict = fulfill_data_table_entries( data_table_entries, data_manager_dict, target_directory )
-
-    #save info to json file
+
+    # save info to json file
     open( filename, 'wb' ).write( dumps( data_manager_dict ) )
-
-if __name__ == "__main__": main()
+
+
+if __name__ == "__main__":
+    main()
--- a/data_manager/data_manager_rsync.xml	Wed Oct 14 13:48:12 2015 -0400
+++ b/data_manager/data_manager_rsync.xml	Tue Apr 04 18:13:26 2017 -0400
@@ -1,23 +1,18 @@
 <tool id="data_manager_rsync_g2" name="Rsync with g2" version="0.0.1" tool_type="manage_data">
     <options sanitize="False" />
     <description>fetching</description>
-    <command interpreter="python">data_manager_rsync.py "${out_file}"</command>
-    <stdio>
-        <exit_code range="1:" err_level="fatal" />
-        <exit_code range=":-1" err_level="fatal" />
-    </stdio>
+    <command detect_errors="exit_code"><![CDATA[
+        python '$__tool_directory__/data_manager_rsync.py'
+        '${out_file}'
+    ]]></command>
     <inputs>
-
         <param name="dbkey" type="genomebuild" label="dbkey to search for Reference Data" help="Specify ? to show all"/>

-        <param name="data_table_names" type="select" display="checkboxes" multiple="True" optional="True"
-        label="Choose Desired Data Tables" dynamic_options="galaxy_code_get_available_data_tables( __trans__ )"
-        refresh_on_change="dbkey"/>
-
-
-        <param name="data_table_entries" type="select" display="checkboxes" multiple="True" optional="False"
-        label="Choose Desired Data Tables Entries" dynamic_options="galaxy_code_get_available_data_tables_entries( __trans__, dbkey, data_table_names )"
-        refresh_on_change="dbkey"/>
+        <param name="data_table_names" type="select" display="checkboxes" multiple="True" optional="True" refresh_on_change="dbkey"
+            label="Choose Desired Data Tables" dynamic_options="galaxy_code_get_available_data_tables( __trans__ )" />
+
+        <param name="data_table_entries" type="select" display="checkboxes" multiple="true" optional="false" refresh_on_change="dbkey"
+            label="Choose Desired Data Tables Entries" dynamic_options="galaxy_code_get_available_data_tables_entries( __trans__, dbkey, data_table_names )" />
     </inputs>
     <outputs>
         <data name="out_file" format="data_manager_json" dbkey="dbkey"/>
@@ -37,12 +32,9 @@

 ------

-
-
 .. class:: infomark

-**Notice:** If you do not have a particular data table defined, then it will be created and persisted dynamically.
-
+**Notice:** If you do not have a particular data table defined, then it will be created and persisted dynamically.
     </help>
     <code file="data_manager_rsync.py" />
 </tool>