# HG changeset patch
# User iuc
# Date 1574974067 18000
# Node ID e0329ab30f6d0041f1ea2063db43476ceb8e3980
# Parent 8ff92bd7e2a374f993f19393ea29d89049467ede
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_rsync_g2 commit 57f71aa633a43ab02bbf05acd0c6d7f406e01f1e"
diff -r 8ff92bd7e2a3 -r e0329ab30f6d data_manager/data_manager_rsync.py
--- a/data_manager/data_manager_rsync.py Tue Apr 04 18:13:26 2017 -0400
+++ b/data_manager/data_manager_rsync.py Thu Nov 28 15:47:47 2019 -0500
@@ -2,6 +2,7 @@
# Dan Blankenberg
from __future__ import print_function
+import base64
import datetime
import logging
import optparse
@@ -25,7 +26,7 @@
_log_name = __name__
if _log_name == '__builtin__':
_log_name = 'toolshed.installed.g2.rsync.data.manager'
-log = logging.getLogger( _log_name )
+log = logging.getLogger(_log_name)
# Get the Data from the Galaxy Project rsync server
RSYNC_CMD = 'rsync'
@@ -35,16 +36,16 @@
# Pull the Tool Data Table files from github
# FIXME: These files should be accessible from the rsync server directly.
-TOOL_DATA_TABLE_CONF_XML_URLS = {'main': "https://raw.githubusercontent.com/galaxyproject/usegalaxy-playbook/master/files/galaxy/usegalaxy.org/config/tool_data_table_conf.xml",
- 'test': "https://raw.githubusercontent.com/galaxyproject/usegalaxy-playbook/master/files/galaxy/test.galaxyproject.org/config/tool_data_table_conf.xml" }
+TOOL_DATA_TABLE_CONF_XML_URLS = {'main': "https://raw.githubusercontent.com/galaxyproject/usegalaxy-playbook/master/env/main/files/galaxy/config/tool_data_table_conf.xml",
+ 'test': "https://raw.githubusercontent.com/galaxyproject/usegalaxy-playbook/master/env/test/files/galaxy/config/tool_data_table_conf.xml"}
# Replace data table source entries with local temporary location
GALAXY_DATA_CANONICAL_PATH = "/galaxy/data/"
-TOOL_DATA_TABLE_CONF_XML_REPLACE_SOURCE = '<file path="%slocation/' % ( GALAXY_DATA_CANONICAL_PATH )
+TOOL_DATA_TABLE_CONF_XML_REPLACE_SOURCE = '<file path="%slocation/' % (GALAXY_DATA_CANONICAL_PATH)
TOOL_DATA_TABLE_CONF_XML_REPLACE_TARGET = '<file path="%s/'
# Some basic Caching, so we don't have to reload and download everything every time
-CACHE_TIME = datetime.timedelta( minutes=10 )
+CACHE_TIME = datetime.timedelta(minutes=10)
TOOL_DATA_TABLES_LOADED_BY_URL = {}
# Entries will not be selected by default
@@ -60,179 +61,179 @@
# --- These methods are called by/within the Galaxy Application
-def exec_before_job( app, inp_data, out_data, param_dict, tool=None, **kwd ):
+def exec_before_job(app, inp_data, out_data, param_dict, tool=None, **kwd):
# Look for any data tables that haven't been defined for this data manager before and dynamically add them to Galaxy
- param_dict = dict( **param_dict )
- param_dict['data_table_entries'] = param_dict.get( 'data_table_entries', [] )
- if not isinstance( param_dict['data_table_entries'], list ):
+ param_dict = dict(**param_dict)
+ param_dict['data_table_entries'] = param_dict.get('data_table_entries', [])
+ if not isinstance(param_dict['data_table_entries'], list):
param_dict['data_table_entries'] = [param_dict['data_table_entries']]
- param_dict['data_table_entries'] = ",".join( param_dict['data_table_entries'] )
+ param_dict['data_table_entries'] = ",".join(param_dict['data_table_entries'])
if tool:
tool_shed_repository = tool.tool_shed_repository
else:
tool_shed_repository = None
tdtm = None
- data_manager = app.data_managers.get_manager( tool.data_manager_id, None )
- data_table_entries = get_data_table_entries( param_dict )
- data_tables = load_data_tables_from_url( data_table_class=app.tool_data_tables.__class__ ).get( 'data_tables' )
+ data_manager = app.data_managers.get_manager(tool.data_manager_id, None)
+ data_table_entries = get_data_table_entries(param_dict)
+ data_tables = load_data_tables_from_url(data_table_class=app.tool_data_tables.__class__).get('data_tables')
for data_table_name, entries in data_table_entries.items():
# get data table managed by this data Manager
- has_data_table = app.tool_data_tables.get_tables().get( data_table_name )
+ has_data_table = app.tool_data_tables.get_tables().get(str(data_table_name))
if has_data_table:
- has_data_table = bool( has_data_table.get_filename_for_source( data_manager, None ) )
+ has_data_table = bool(has_data_table.get_filename_for_source(data_manager, None))
if not has_data_table:
if tdtm is None:
from tool_shed.tools import data_table_manager
- tdtm = data_table_manager.ToolDataTableManager( app )
- target_dir, tool_path, relative_target_dir = tdtm.get_target_install_dir( tool_shed_repository )
+ tdtm = data_table_manager.ToolDataTableManager(app)
+ target_dir, tool_path, relative_target_dir = tdtm.get_target_install_dir(tool_shed_repository)
# Dynamically add this data table
- log.debug( "Attempting to dynamically create a missing Tool Data Table named %s." % data_table_name )
+ log.debug("Attempting to dynamically create a missing Tool Data Table named %s." % data_table_name)
data_table = data_tables[data_table_name]
- repo_info = tdtm.generate_repository_info_elem_from_repository( tool_shed_repository, parent_elem=None )
+ repo_info = tdtm.generate_repository_info_elem_from_repository(tool_shed_repository, parent_elem=None)
if repo_info is not None:
- repo_info = tostring( repo_info )
- tmp_file = tempfile.NamedTemporaryFile()
- tmp_file.write( get_new_xml_definition( app, data_table, data_manager, repo_info, target_dir ) )
+ repo_info = tostring(repo_info)
+ tmp_file = tempfile.NamedTemporaryFile(mode="w")
+ tmp_file.write(get_new_xml_definition(app, data_table, data_manager, repo_info, target_dir))
tmp_file.flush()
- app.tool_data_tables.add_new_entries_from_config_file( tmp_file.name, None, app.config.shed_tool_data_table_config, persist=True )
+ app.tool_data_tables.add_new_entries_from_config_file(tmp_file.name, None, app.config.shed_tool_data_table_config, persist=True)
tmp_file.close()
-def galaxy_code_get_available_data_tables( trans ):
+def galaxy_code_get_available_data_tables(trans):
# list of data tables
- found_tables = get_available_tables( trans )
- rval = [ ( x, x, DEFAULT_SELECTED ) for x in found_tables]
+ found_tables = get_available_tables(trans)
+ rval = [(x, x, DEFAULT_SELECTED) for x in found_tables]
return rval
-def galaxy_code_get_available_data_tables_entries( trans, dbkey, data_table_names ):
+def galaxy_code_get_available_data_tables_entries(trans, dbkey, data_table_names):
# available entries, optionally filtered by dbkey and table names
- if dbkey in [ None, '', '?' ]:
+ if dbkey in [None, '', '?']:
dbkey = None
- if data_table_names in [ None, '', '?' ]:
+ if data_table_names in [None, '', '?']:
data_table_names = None
- found_tables = get_available_tables_for_dbkey( trans, dbkey, data_table_names )
- dbkey_text = '(%s) ' % ( dbkey ) if dbkey else ''
- rval = [( "%s%s" % ( dbkey_text, x[0] ), dumps( dict( name=x[0].split( ': ' )[0], entry=x[1] ) ).encode( 'base64' ).rstrip(), DEFAULT_SELECTED ) for x in found_tables.items()]
+ found_tables = get_available_tables_for_dbkey(trans, dbkey, data_table_names)
+ dbkey_text = '(%s) ' % (dbkey) if dbkey else ''
+ rval = [("%s%s" % (dbkey_text, x[0]), base64.b64encode(dumps(dict(name=x[0].split(': ')[0], entry=x[1]), sort_keys=True).rstrip().encode('utf-8')).decode('utf-8'), DEFAULT_SELECTED) for x in found_tables.items()]
return rval
# --- End Galaxy called Methods ---
-def rsync_urljoin( base, url ):
+def rsync_urljoin(base, url):
# urlparse.urljoin doesn't work correctly for our use-case
- # probably because it doesn't recognize the rsync scheme
- base = base.rstrip( '/' )
- url = url.lstrip( '/' )
- return "%s/%s" % ( base, url )
+ # probably because it doesn't recognize rhe rsync scheme
+ base = base.rstrip('/')
+ url = url.lstrip('/')
+ return "%s/%s" % (base, url)
-def rsync_list_dir( server, dir=None, skip_names=[] ):
+def rsync_list_dir(server, dir=None, skip_names=[]):
# drwxr-xr-x 50 2014/05/16 20:58:11 .
if dir:
- dir = rsync_urljoin( server, dir )
+ dir = rsync_urljoin(server, dir)
else:
dir = server
rsync_response = tempfile.NamedTemporaryFile()
rsync_stderr = tempfile.NamedTemporaryFile()
- rsync_cmd = [ RSYNC_CMD, '--list-only', dir ]
- return_code = subprocess.call( rsync_cmd, stdout=rsync_response, stderr=rsync_stderr )
+ rsync_cmd = [RSYNC_CMD, '--list-only', dir]
+ return_code = subprocess.call(rsync_cmd, stdout=rsync_response, stderr=rsync_stderr)
rsync_response.flush()
rsync_response.seek(0)
rsync_stderr.flush()
rsync_stderr.seek(0)
if return_code:
- msg = "stdout:\n%s\nstderr:\n%s" % ( rsync_response.read(), rsync_stderr.read() )
+ msg = "stdout:\n%s\nstderr:\n%s" % (rsync_response.read(), rsync_stderr.read())
rsync_response.close()
rsync_stderr.close()
- raise Exception( 'Failed to execute rsync command (%s), returncode=%s. Rsync_output:\n%s' % ( rsync_cmd, return_code, msg ) )
+ raise Exception('Failed to execute rsync command (%s), returncode=%s. Rsync_output:\n%s' % (rsync_cmd, return_code, msg))
rsync_stderr.close()
rval = {}
for line in rsync_response:
- perms, line = line.split( None, 1 )
+ perms, line = line.split(None, 1)
line = line.strip()
- size, line = line.split( None, 1 )
+ size, line = line.split(None, 1)
line = line.strip()
- date, line = line.split( None, 1 )
+ date, line = line.split(None, 1)
line = line.strip()
- time, line = line.split( None, 1 )
+ time, line = line.split(None, 1)
name = line.strip()
if name in skip_names:
continue
size = line.strip()
- rval[ name ] = dict( name=name, permissions=perms, bytes=size, date=date, time=time )
+ rval[name] = dict(name=name, permissions=perms, bytes=size, date=date, time=time)
rsync_response.close()
return rval
-def rsync_sync_to_dir( source, target ):
+def rsync_sync_to_dir(source, target):
rsync_response = tempfile.NamedTemporaryFile()
rsync_stderr = tempfile.NamedTemporaryFile()
- rsync_cmd = [ RSYNC_CMD, '-avzP', source, target ]
- return_code = subprocess.call( rsync_cmd, stdout=rsync_response, stderr=rsync_stderr )
+ rsync_cmd = [RSYNC_CMD, '-avzP', source, target]
+ return_code = subprocess.call(rsync_cmd, stdout=rsync_response, stderr=rsync_stderr)
rsync_response.flush()
rsync_response.seek(0)
rsync_stderr.flush()
rsync_stderr.seek(0)
if return_code:
- msg = "stdout:\n%s\nstderr:\n%s" % ( rsync_response.read(), rsync_stderr.read() )
+ msg = "stdout:\n%s\nstderr:\n%s" % (rsync_response.read(), rsync_stderr.read())
rsync_response.close()
rsync_stderr.close()
- raise Exception( 'Failed to execute rsync command (%s), returncode=%s. Rsync_output:\n%s' % ( rsync_cmd, return_code, msg ) )
+ raise Exception('Failed to execute rsync command (%s), returncode=%s. Rsync_output:\n%s' % (rsync_cmd, return_code, msg))
rsync_response.close()
rsync_stderr.close()
return return_code
-def data_table_needs_refresh( cached_data_table, url ):
+def data_table_needs_refresh(cached_data_table, url):
if cached_data_table is None:
return True, {}
- if datetime.datetime.now() - cached_data_table.get( 'time_loaded' ) > CACHE_TIME:
- data_table_text = urlopen( url ).read()
- if cached_data_table.get( 'data_table_text', None ) != data_table_text:
+ if datetime.datetime.now() - cached_data_table.get('time_loaded') > CACHE_TIME:
+ data_table_text = urlopen(url).read().decode('utf-8')
+ if cached_data_table.get('data_table_text', None) != data_table_text:
return True, {'data_table_text': data_table_text}
- loc_file_attrs = rsync_list_dir( RSYNC_SERVER, LOCATION_DIR )
- if cached_data_table.get( 'loc_file_attrs', None ) != loc_file_attrs:
+ loc_file_attrs = rsync_list_dir(RSYNC_SERVER, LOCATION_DIR)
+ if cached_data_table.get('loc_file_attrs', None) != loc_file_attrs:
return True, {'loc_file_attrs': loc_file_attrs}
return False, {}
-def load_data_tables_from_url( url=None, site='main', data_table_class=None ):
+def load_data_tables_from_url(url=None, site='main', data_table_class=None):
if not url:
- url = TOOL_DATA_TABLE_CONF_XML_URLS.get( site, None )
- assert url, ValueError( 'You must provide either a URL or a site=name.' )
+ url = TOOL_DATA_TABLE_CONF_XML_URLS.get(site, None)
+ assert url, ValueError('You must provide either a URL or a site=name.')
- cached_data_table = TOOL_DATA_TABLES_LOADED_BY_URL.get( url, None )
- refresh, attribs = data_table_needs_refresh( cached_data_table, url )
+ cached_data_table = TOOL_DATA_TABLES_LOADED_BY_URL.get(url, None)
+ refresh, attribs = data_table_needs_refresh(cached_data_table, url)
if refresh:
- data_table_text = attribs.get( 'data_table_text' )or urlopen( url ).read()
- loc_file_attrs = attribs.get( 'loc_file_attrs' ) or rsync_list_dir( RSYNC_SERVER, LOCATION_DIR )
+ data_table_text = attribs.get('data_table_text') or urlopen(url).read().decode('utf-8')
+ loc_file_attrs = attribs.get('loc_file_attrs') or rsync_list_dir(RSYNC_SERVER, LOCATION_DIR)
- tmp_dir = tempfile.mkdtemp( prefix='rsync_g2_' )
- tmp_loc_dir = os.path.join( tmp_dir, 'location' )
- os.mkdir( tmp_loc_dir )
- rsync_sync_to_dir( rsync_urljoin( RSYNC_SERVER, LOCATION_DIR ), os.path.abspath( tmp_loc_dir ) )
+ tmp_dir = tempfile.mkdtemp(prefix='rsync_g2_')
+ tmp_loc_dir = os.path.join(tmp_dir, 'location')
+ os.mkdir(tmp_loc_dir)
+ rsync_sync_to_dir(rsync_urljoin(RSYNC_SERVER, LOCATION_DIR), os.path.abspath(tmp_loc_dir))
- new_data_table_text = data_table_text.replace( TOOL_DATA_TABLE_CONF_XML_REPLACE_SOURCE, TOOL_DATA_TABLE_CONF_XML_REPLACE_TARGET % ( tmp_loc_dir ) )
- data_table_fh = tempfile.NamedTemporaryFile( dir=tmp_dir, prefix='rysnc_data_manager_data_table_conf_' )
- data_table_fh.write( new_data_table_text )
+ new_data_table_text = data_table_text.replace(TOOL_DATA_TABLE_CONF_XML_REPLACE_SOURCE, TOOL_DATA_TABLE_CONF_XML_REPLACE_TARGET % (tmp_loc_dir))
+ data_table_fh = tempfile.NamedTemporaryFile(dir=tmp_dir, prefix='rysnc_data_manager_data_table_conf_', mode="w")
+ data_table_fh.write(new_data_table_text)
data_table_fh.flush()
- tmp_data_dir = os.path.join( tmp_dir, 'tool-data' )
- os.mkdir( tmp_data_dir )
- data_tables = data_table_class( tmp_data_dir, config_filename=data_table_fh.name )
+ tmp_data_dir = os.path.join(tmp_dir, 'tool-data')
+ os.mkdir(tmp_data_dir)
+ data_tables = data_table_class(tmp_data_dir, config_filename=data_table_fh.name)
for name, data_table in list(data_tables.data_tables.items()):
- if name in EXCLUDE_DATA_TABLES or not data_table_has_path_column( data_table ):
- log.debug( 'Removing data table "%s" because it is excluded by name or does not have a defined "path" column.', name )
+ if name in EXCLUDE_DATA_TABLES or not data_table_has_path_column(data_table):
+ log.debug('Removing data table "%s" because it is excluded by name or does not have a defined "path" column.', name)
del data_tables.data_tables[name]
- cached_data_table = { 'data_tables': data_tables, 'tmp_dir': tmp_dir, 'data_table_text': data_table_text, 'tmp_loc_dir': tmp_loc_dir, 'loc_file_attrs': loc_file_attrs, 'time_loaded': datetime.datetime.now() }
- TOOL_DATA_TABLES_LOADED_BY_URL[ url ] = cached_data_table
+ cached_data_table = {'data_tables': data_tables, 'tmp_dir': tmp_dir, 'data_table_text': data_table_text, 'tmp_loc_dir': tmp_loc_dir, 'loc_file_attrs': loc_file_attrs, 'time_loaded': datetime.datetime.now()}
+ TOOL_DATA_TABLES_LOADED_BY_URL[url] = cached_data_table
# delete the files
data_table_fh.close()
- cleanup_before_exit( tmp_dir )
+ cleanup_before_exit(tmp_dir)
return cached_data_table
-def data_table_has_path_column( data_table ):
+def data_table_has_path_column(data_table):
col_names = data_table.get_column_name_list()
for name in PATH_COLUMN_NAMES:
if name in col_names:
@@ -240,29 +241,29 @@
return False
-def get_available_tables( trans ):
+def get_available_tables(trans):
# list of data tables
- data_tables = load_data_tables_from_url( data_table_class=trans.app.tool_data_tables.__class__ )
- return list(data_tables.get( 'data_tables' ).get_tables().keys())
+ data_tables = load_data_tables_from_url(data_table_class=trans.app.tool_data_tables.__class__)
+ return data_tables.get('data_tables').get_tables().keys()
-def get_new_xml_definition( app, data_table, data_manager, repo_info=None, location_file_dir=None ):
- sub_dict = { 'table_name': data_table.name, 'comment_char': '', 'columns': '', 'file_path': '' }
- sub_dict.update( data_manager.get_tool_shed_repository_info_dict() )
+def get_new_xml_definition(app, data_table, data_manager, repo_info=None, location_file_dir=None):
+ sub_dict = {'table_name': data_table.name, 'comment_char': '', 'columns': '', 'file_path': ''}
+ sub_dict.update(data_manager.get_tool_shed_repository_info_dict())
if data_table.comment_char:
- sub_dict['comment_char'] = 'comment_char="%s"' % ( data_table.comment_char )
- for i, name in enumerate( data_table.get_column_name_list() ):
+ sub_dict['comment_char'] = 'comment_char="%s"' % (data_table.comment_char)
+ for i, name in enumerate(data_table.get_column_name_list()):
if name is not None:
- sub_dict['columns'] = "%s\n%s" % ( sub_dict['columns'], '<column name="%s" index="%s" />' % ( name, i ) )
+ sub_dict['columns'] = "%s\n%s" % (sub_dict['columns'], '<column name="%s" index="%s" />' % (name, i))
location_file_dir = location_file_dir or app.config.galaxy_data_manager_data_path
for filename in data_table.filenames.keys():
- sub_dict['file_path'] = basename( filename )
- sub_dict['file_path'] = os.path.join( location_file_dir, sub_dict['file_path'] ) # os.path.abspath?
- if not os.path.exists( sub_dict['file_path'] ):
+ sub_dict['file_path'] = basename(filename)
+ sub_dict['file_path'] = os.path.join(location_file_dir, sub_dict['file_path']) # os.path.abspath?
+ if not os.path.exists(sub_dict['file_path']):
# Create empty file
- open( sub_dict['file_path'], 'wb+' ).close()
+ open(sub_dict['file_path'], 'wb+').close()
break
- sub_dict[ 'repo_info' ] = repo_info or ''
+ sub_dict['repo_info'] = repo_info or ''
return """
<tables><table name="%(table_name)s" %(comment_char)s>
%(columns)s
@@ -272,32 +273,32 @@
""" % sub_dict
-def get_available_tables_for_dbkey( trans, dbkey, data_table_names ):
- data_tables = load_data_tables_from_url( data_table_class=trans.app.tool_data_tables.__class__ )
+def get_available_tables_for_dbkey(trans, dbkey, data_table_names):
+ data_tables = load_data_tables_from_url(data_table_class=trans.app.tool_data_tables.__class__)
rval = {}
- for name, data_table in data_tables.get( 'data_tables' ).get_tables().items():
- if ( not data_table_names or name in data_table_names ):
+ for name, data_table in data_tables.get('data_tables').get_tables().items():
+ if (not data_table_names or name in data_table_names):
# TODO: check that columns are similiar
if not dbkey:
entry_getter = data_table.get_named_fields_list()
else:
- entry_getter = data_table.get_entries( 'dbkey', dbkey, None, default=[] )
+ entry_getter = data_table.get_entries('dbkey', dbkey, None, default=[])
for entry in entry_getter:
- name = "%s: %s" % ( data_table.name, dumps( entry ) )
+ name = "%s: %s" % (data_table.name, dumps(entry))
rval[name] = entry
return rval
-def split_path_all( path ):
+def split_path_all(path):
rval = []
- path = path.rstrip( '/' )
+ path = path.rstrip('/')
while True:
- head, tail = os.path.split( path )
+ head, tail = os.path.split(path)
if tail:
- rval.append( tail )
+ rval.append(tail)
path = head
elif head:
- rval.append( head )
+ rval.append(head)
break
else:
break
@@ -305,85 +306,85 @@
return rval
-def get_data_for_path( path, data_root_dir ):
+def get_data_for_path(path, data_root_dir):
# We list dir with a /, but copy data without
# listing with / gives a . entry when its a dir
# cloning without the / will copy that whole directory into the target,
# instead of just that target's contents
- if path.startswith( GALAXY_DATA_CANONICAL_PATH ):
- path = path[ len( GALAXY_DATA_CANONICAL_PATH ):]
+ if path.startswith(GALAXY_DATA_CANONICAL_PATH):
+ path = path[len(GALAXY_DATA_CANONICAL_PATH):]
make_path = path
- rsync_source = rsync_urljoin( rsync_urljoin( RSYNC_SERVER, INDEX_DIR ), path )
- if rsync_source.endswith( '/' ):
+ rsync_source = rsync_urljoin(rsync_urljoin(RSYNC_SERVER, INDEX_DIR), path)
+ if rsync_source.endswith('/'):
rsync_source = rsync_source[:-1]
try:
- dir_list = rsync_list_dir( rsync_source + "/" )
+ dir_list = rsync_list_dir(rsync_source + "/")
except Exception:
dir_list = None
while not dir_list or '.' not in dir_list:
- head, tail = os.path.split( make_path )
+ head, tail = os.path.split(make_path)
if not head:
head = tail
make_path = head
- rsync_source = rsync_urljoin( rsync_urljoin( RSYNC_SERVER, INDEX_DIR ), head ) # if we error here, likely due to a connection issue
- if rsync_source.endswith( '/' ):
+ rsync_source = rsync_urljoin(rsync_urljoin(RSYNC_SERVER, INDEX_DIR), head) # if we error here, likely due to a connection issue
+ if rsync_source.endswith('/'):
rsync_source = rsync_source[:-1]
- dir_list = rsync_list_dir( rsync_source + "/" )
- split_path = split_path_all( make_path )
+ dir_list = rsync_list_dir(rsync_source + "/")
+ split_path = split_path_all(make_path)
target_path = data_root_dir
for p in split_path[:-1]:
- target_path = os.path.join( target_path, p )
- if not os.path.exists( target_path ):
- os.mkdir( target_path )
- rsync_sync_to_dir( rsync_source, target_path )
+ target_path = os.path.join(target_path, p)
+ if not os.path.exists(target_path):
+ os.mkdir(target_path)
+ rsync_sync_to_dir(rsync_source, target_path)
return path
-def get_data_and_munge_path( data_table_name, data_table_entry, data_root_dir ):
+def get_data_and_munge_path(data_table_name, data_table_entry, data_root_dir):
path_cols = []
for key, value in data_table_entry.items():
if key in PATH_COLUMN_NAMES:
- path_cols.append( ( key, value ) )
+ path_cols.append((key, value))
if path_cols:
for col_name, value in path_cols:
- if value.startswith( GALAXY_DATA_CANONICAL_PATH ):
- data_table_entry[col_name] = get_data_for_path( value, data_root_dir )
+ if value.startswith(GALAXY_DATA_CANONICAL_PATH):
+ data_table_entry[col_name] = get_data_for_path(value, data_root_dir)
else:
print('unable to determine location of rsync data for', data_table_name, data_table_entry)
return data_table_entry
-def fulfill_data_table_entries( data_table_entries, data_manager_dict, data_root_dir ):
+def fulfill_data_table_entries(data_table_entries, data_manager_dict, data_root_dir):
for data_table_name, entries in data_table_entries.items():
for entry in entries:
- entry = get_data_and_munge_path( data_table_name, entry, data_root_dir )
- _add_data_table_entry( data_manager_dict, data_table_name, entry )
+ entry = get_data_and_munge_path(data_table_name, entry, data_root_dir)
+ _add_data_table_entry(data_manager_dict, data_table_name, entry)
return data_manager_dict
-def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ):
- data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
- data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get( data_table_name, [] )
- data_manager_dict['data_tables'][data_table_name].append( data_table_entry )
+def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry):
+ data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
+ data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, [])
+ data_manager_dict['data_tables'][data_table_name].append(data_table_entry)
return data_manager_dict
-def cleanup_before_exit( tmp_dir ):
- if tmp_dir and os.path.exists( tmp_dir ):
- shutil.rmtree( tmp_dir )
+def cleanup_before_exit(tmp_dir):
+ if tmp_dir and os.path.exists(tmp_dir):
+ shutil.rmtree(tmp_dir)
-def get_data_table_entries( params ):
+def get_data_table_entries(params):
rval = {}
- data_table_entries = params.get( 'data_table_entries', None )
+ data_table_entries = params.get('data_table_entries', None)
if data_table_entries:
- for entry_text in data_table_entries.split( ',' ):
- entry_text = entry_text.strip().decode( 'base64' )
- entry_dict = loads( entry_text )
+ for entry_text in data_table_entries.split(','):
+ entry_text = base64.b64decode(entry_text.strip().encode('utf-8'))
+ entry_dict = loads(entry_text)
data_table_name = entry_dict['name']
data_table_entry = entry_dict['entry']
- rval[ data_table_name ] = rval.get( data_table_name, [] )
- rval[ data_table_name ].append( data_table_entry )
+ rval[data_table_name] = rval.get(data_table_name, [])
+ rval[data_table_name].append(data_table_entry)
return rval
@@ -393,18 +394,18 @@
filename = args[0]
- params = loads( open( filename ).read() )
- target_directory = params[ 'output_data' ][0]['extra_files_path']
- os.mkdir( target_directory )
+ params = loads(open(filename).read())
+ target_directory = params['output_data'][0]['extra_files_path']
+ os.mkdir(target_directory)
data_manager_dict = {}
- data_table_entries = get_data_table_entries( params['param_dict'] )
+ data_table_entries = get_data_table_entries(params['param_dict'])
# Populate the data Tables
- data_manager_dict = fulfill_data_table_entries( data_table_entries, data_manager_dict, target_directory )
+ data_manager_dict = fulfill_data_table_entries(data_table_entries, data_manager_dict, target_directory)
# save info to json file
- open( filename, 'wb' ).write( dumps( data_manager_dict ) )
+ open(filename, 'w').write(dumps(data_manager_dict, sort_keys=True))
if __name__ == "__main__":
diff -r 8ff92bd7e2a3 -r e0329ab30f6d data_manager/data_manager_rsync.xml
--- a/data_manager/data_manager_rsync.xml Tue Apr 04 18:13:26 2017 -0400
+++ b/data_manager/data_manager_rsync.xml Thu Nov 28 15:47:47 2019 -0500
@@ -1,17 +1,21 @@
-<tool id="data_manager_rsync_g2" name="Rsync with g2" version="0.0.1" tool_type="manage_data">
+<tool id="data_manager_rsync_g2" name="Rsync with g2" version="0.0.3" tool_type="manage_data" profile="19.05">
<options sanitize="False" />
<description>fetching</description>
+ <requirements>
+ <requirement type="package" version="3.7">python</requirement>
+ <requirement type="package" version="3.1.3">rsync</requirement>
+ </requirements>
<command detect_errors="exit_code"><![CDATA[
python '$__tool_directory__/data_manager_rsync.py'
'${out_file}'
]]></command>
<inputs>
- <param name="dbkey" type="genomebuild" label="dbkey to search for Reference Data" help="Specify ? to show all"/>
+ <param name="dbkey" type="genomebuild" label="dbkey to search for Reference Data" help="Specify ? to show all" refresh_on_change="True"/>
- <param name="data_table_names" type="select" display="checkboxes" multiple="True" optional="True" refresh_on_change="dbkey"
+ <param name="data_table_names" type="select" display="checkboxes" multiple="True" optional="True" refresh_on_change="True"
label="Choose Desired Data Tables" dynamic_options="galaxy_code_get_available_data_tables( __trans__ )" />
- <param name="data_table_entries" type="select" display="checkboxes" multiple="true" optional="false" refresh_on_change="dbkey"
+ <param name="data_table_entries" type="select" display="checkboxes" multiple="true" optional="false"
label="Choose Desired Data Tables Entries" dynamic_options="galaxy_code_get_available_data_tables_entries( __trans__, dbkey, data_table_names )" />
</inputs>
<outputs>
@@ -21,7 +25,7 @@
<test>
<param name="dbkey" value="sacCer2"/>
<param name="data_table_names" value="all_fasta"/>
- <param name="data_table_entries" value="eyJlbnRyeSI6IHsicGF0aCI6ICIvZ2FsYXh5L2RhdGEvc2FjQ2VyMi9zZXEvc2FjQ2VyMi5mYSIs ICJkYmtleSI6ICJzYWNDZXIyIiwgInZhbHVlIjogInNhY0NlcjIiLCAibmFtZSI6ICJZZWFzdCAo U2FjY2hhcm9teWNlcyBjZXJldmlzaWFlKTogc2FjQ2VyMiJ9LCAibmFtZSI6ICJhbGxfZmFzdGEi fQ=="/>
+ <param name="data_table_entries" value="eyJlbnRyeSI6IHsiZGJrZXkiOiAic2FjQ2VyMiIsICJuYW1lIjogIlllYXN0IChTYWNjaGFyb215Y2VzIGNlcmV2aXNpYWUpOiBzYWNDZXIyIiwgInBhdGgiOiAiL2N2bWZzL2RhdGEuZ2FsYXh5cHJvamVjdC5vcmcvYnloYW5kL3NhY0NlcjIvc2VxL3NhY0NlcjIuZmEiLCAidmFsdWUiOiAic2FjQ2VyMiJ9LCAibmFtZSI6ICJhbGxfZmFzdGEifQ=="/>
<output name="out_file" file="sacCer2_rsync_all_fasta.data_manager_json"/>
</test>
</tests>
diff -r 8ff92bd7e2a3 -r e0329ab30f6d test-data/all_fasta.loc
diff -r 8ff92bd7e2a3 -r e0329ab30f6d test-data/sacCer2_rsync_all_fasta.data_manager_json
--- a/test-data/sacCer2_rsync_all_fasta.data_manager_json Tue Apr 04 18:13:26 2017 -0400
+++ b/test-data/sacCer2_rsync_all_fasta.data_manager_json Thu Nov 28 15:47:47 2019 -0500
@@ -1,1 +1,1 @@
-{"data_tables": {"all_fasta": [{"path": "sacCer2/seq/sacCer2.fa", "value": "sacCer2", "dbkey": "sacCer2", "name": "Yeast (Saccharomyces cerevisiae): sacCer2"}]}}
\ No newline at end of file
+{"data_tables": {"all_fasta": [{"dbkey": "sacCer2", "name": "Yeast (Saccharomyces cerevisiae): sacCer2", "path": "/cvmfs/data.galaxyproject.org/byhand/sacCer2/seq/sacCer2.fa", "value": "sacCer2"}]}}
\ No newline at end of file
diff -r 8ff92bd7e2a3 -r e0329ab30f6d tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Thu Nov 28 15:47:47 2019 -0500
@@ -0,0 +1,8 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+ <!-- Locations of all fasta files under genome directory -->
+ <table name="all_fasta" comment_char="#">
+ <columns>value, dbkey, name, path</columns>
+ <file path="${__HERE__}/test-data/all_fasta.loc" />
+ </table>
+</tables>