| 
1
 | 
     1 #!/usr/bin/python
 | 
| 
 | 
     2 import os
 | 
| 
 | 
     3 import optparse
 | 
| 
 | 
     4 import sys
 | 
| 
 | 
     5 import time
 | 
| 
 | 
     6 import re
 | 
| 
 | 
     7 
 | 
| 
 | 
     8 import vdb_common 
 | 
| 
 | 
     9 import vdb_retrieval
 | 
| 
 | 
    10 
 | 
| 
 | 
    11 class MyParser(optparse.OptionParser):
 | 
| 
 | 
    12 	"""
 | 
| 
 | 
    13 	 From http://stackoverflow.com/questions/1857346/python-optparse-how-to-include-additional-info-in-usage-output
 | 
| 
 | 
    14 	 Provides a better display of formatted help info in epilog() portion of optParse.
 | 
| 
 | 
    15 	"""
 | 
| 
 | 
    16 	def format_epilog(self, formatter):
 | 
| 
 | 
    17 		return self.epilog
 | 
| 
 | 
    18 
 | 
| 
 | 
    19 
 | 
| 
 | 
    20 def stop_err( msg ):
 | 
| 
 | 
    21     sys.stderr.write("%s\n" % msg)
 | 
| 
 | 
    22     sys.exit(1)
 | 
| 
 | 
    23 
 | 
| 
 | 
    24 
 | 
| 
 | 
    25 class ReportEngine(object):
 | 
| 
 | 
    26 
 | 
| 
 | 
    27 	def __init__(self): pass
 | 
| 
 | 
    28 
 | 
| 
 | 
    29 	def __main__(self):
 | 
| 
 | 
    30 
 | 
| 
 | 
    31 		options, args = self.get_command_line()
 | 
| 
 | 
    32 		retrieval_obj = vdb_retrieval.VDBRetrieval()
 | 
| 
 | 
    33 		retrieval_obj.set_api(options.api_info_path)
 | 
| 
 | 
    34 		
 | 
| 
 | 
    35 		retrievals=[]
 | 
| 
 | 
    36 				
 | 
| 
 | 
    37 		for retrieval in options.retrievals.strip().strip('|').split('|'):
 | 
| 
 | 
    38 			# Normally xml form supplies "spec_file_id, [version list], [workflow_list]"
 | 
| 
 | 
    39 			params = retrieval.strip().split(',')
 | 
| 
 | 
    40 			
 | 
| 
 | 
    41 			spec_file_id = params[0]
 | 
| 
 | 
    42 			
 | 
| 
 | 
    43 			if spec_file_id == 'none':
 | 
| 
 | 
    44 				print 'Error: Form was selected without requesting a data store to retrieve!'
 | 
| 
 | 
    45 				sys.exit( 1 )
 | 
| 
 | 
    46 				
 | 
| 
 | 
    47 			# STEP 1:  Determine data store type and location
 | 
| 
 | 
    48 			data_store_spec = retrieval_obj.user_api.libraries.show_folder(retrieval_obj.library_id, spec_file_id)
 | 
| 
 | 
    49 			data_store_type = retrieval_obj.test_data_store_type(data_store_spec['name'])
 | 
| 
 | 
    50 			base_folder_id = data_store_spec['folder_id']			
 | 
| 
 | 
    51 		
 | 
| 
 | 
    52 			if not data_store_type:
 | 
| 
 | 
    53 				print 'Error: unrecognized data store type [' + data_store_type + ']'
 | 
| 
 | 
    54 				sys.exit( 1 )
 | 
| 
 | 
    55 	
 | 
| 
 | 
    56 			ds_obj = retrieval_obj.get_data_store_gateway(data_store_type, spec_file_id)
 | 
| 
 | 
    57 
 | 
| 
 | 
    58 			if len(params) > 1 and len(params[1].strip()) > 0:			
 | 
| 
 | 
    59 				_versionList = params[1].strip()
 | 
| 
 | 
    60 				version_id = _versionList.split()[0] # VersionList SHOULD just have 1 id
 | 
| 
 | 
    61 			else:
 | 
| 
 | 
    62 				# User didn't select version_id via "Add new retrieval"
 | 
| 
 | 
    63 				if options.globalRetrievalDate:
 | 
| 
 | 
    64 					_retrieval_date = vdb_common.parse_date(options.globalRetrievalDate)
 | 
| 
 | 
    65 					version_id = ds_obj.get_version_options(global_retrieval_date=_retrieval_date, selection=True)
 | 
| 
 | 
    66 					
 | 
| 
 | 
    67 				else:
 | 
| 
 | 
    68 					version_id = ''
 | 
| 
 | 
    69 			
 | 
| 
 | 
    70 			# Reestablishes file(s) if they don't exist on disk. Do data library links to it as well.
 | 
| 
 | 
    71 			ds_obj.get_version(version_id)
 | 
| 
 | 
    72 			if ds_obj.version_path == None:
 | 
| 
 | 
    73 			
 | 
| 
 | 
    74 					print "Error: unable to retrieve version [%s] from %s archive [%s].  Archive doesn't contain this version id?" % (version_id, data_store_type, ds_obj.library_version_path)
 | 
| 
 | 
    75 					sys.exit( 1 )
 | 
| 
 | 
    76 		
 | 
| 
 | 
    77 			# Version data file(s) are sitting in [ds_obj.version_path] ready for retrieval.
 | 
| 
 | 
    78 			library_dataset_ids = retrieval_obj.get_library_version_datasets(ds_obj.library_version_path, base_folder_id, ds_obj.version_label, ds_obj.version_path)
 | 
| 
 | 
    79 			
 | 
| 
 | 
    80 			# The only thing that doesn't have cache lookup is "folder" data that isn't linked in.
 | 
| 
 | 
    81 			# In that case try lookup directly.	
 | 
| 
 | 
    82 			if len(library_dataset_ids) == 0 and data_store_type == 'folder':
 | 
| 
 | 
    83 				library_version_datasets = retrieval_obj.get_library_folder_datasets(ds_obj.library_version_path)
 | 
| 
 | 
    84 				library_dataset_ids = [item['id'] for item in library_version_datasets]
 | 
| 
 | 
    85 				
 | 
| 
 | 
    86 			if len(library_dataset_ids) == 0:
 | 
| 
 | 
    87 			
 | 
| 
 | 
    88 					print 'Error: unable to retrieve version [%s] from %s archive [%s] ' % (version_id, data_store_type, ds_obj.library_version_path)
 | 
| 
 | 
    89 					sys.exit( 1 )
 | 
| 
 | 
    90 			
 | 
| 
 | 
    91 			# At this point we have references to the galaxy ids of the requested versioned dataset, after regeneration
 | 
| 
 | 
    92 			versioned_datasets = retrieval_obj.update_history(library_dataset_ids, ds_obj.library_version_path, version_id)
 | 
| 
 | 
    93 
 | 
| 
 | 
    94 			if len(params) > 2:
 | 
| 
 | 
    95 
 | 
| 
 | 
    96 				workflow_list = params[2].strip() 
 | 
| 
 | 
    97 			
 | 
| 
 | 
    98 				if len(workflow_list) > 0:
 | 
| 
 | 
    99 					# We have workflow run via admin_api and admin_api history.	
 | 
| 
 | 
   100 					retrieval_obj.get_workflow_data(workflow_list, versioned_datasets, version_id)
 | 
| 
 | 
   101 				
 | 
| 
 | 
   102 		
 | 
| 
 | 
   103 		result=retrievals
 | 
| 
 | 
   104 		
 | 
| 
 | 
   105 		# Output file needs to exist.  Otherwise Galaxy doesn't generate a placeholder file name for the output, and so we can't do things like check for [placeholder name]_files folder.  Add something to report on?
 | 
| 
 | 
   106 		with open(options.output,'w') as fw:
 | 
| 
 | 
   107 			fw.writelines(result)
 | 
| 
 | 
   108 
 | 
| 
 | 
   109 
 | 
| 
 | 
   110 	def get_command_line(self):
 | 
| 
 | 
   111 		## *************************** Parse Command Line *****************************
 | 
| 
 | 
   112 		parser = MyParser(
 | 
| 
 | 
   113 			description = 'This Galaxy tool retrieves versions of prepared data sources and places them in a galaxy "Versioned Data" library',
 | 
| 
 | 
   114 			usage = 'python versioned_data.py [options]',
 | 
| 
 | 
   115 			epilog="""Details:
 | 
| 
 | 
   116 
 | 
| 
 | 
   117 			This tool retrieves links to current or past versions of fasta (or other key-value text) databases from a cache kept in the data library called "Fasta Databases". It then places them into the current history so that subsequent tools can work with that data.
 | 
| 
 | 
   118 		""")
 | 
| 
 | 
   119 
 | 
| 
 | 
   120 		parser.add_option('-r', '--retrievals', type='string', dest='retrievals',
 | 
| 
 | 
   121 			help='List of datasources and their versions and galaxy workflows to return')
 | 
| 
 | 
   122 			
 | 
| 
 | 
   123 		parser.add_option('-o', '--output', type='string', dest='output', 
 | 
| 
 | 
   124 			help='Path of output log file to create')
 | 
| 
 | 
   125 
 | 
| 
 | 
   126 		parser.add_option('-O', '--output_id', type='string', dest='output_id', 
 | 
| 
 | 
   127 			help='Output identifier')
 | 
| 
 | 
   128 			
 | 
| 
 | 
   129 		parser.add_option('-d', '--date', type='string', dest='globalRetrievalDate', 
 | 
| 
 | 
   130 			help='Provide date/time for data recall.  Defaults to now.')
 | 
| 
 | 
   131 		
 | 
| 
 | 
   132 		parser.add_option('-v', '--version', dest='version', default=False, action='store_true', 
 | 
| 
 | 
   133 			help='Version number of this program.')
 | 
| 
 | 
   134 			
 | 
| 
 | 
   135 		parser.add_option('-s', '--api_info_path', type='string', dest='api_info_path', help='Galaxy user api key/path.')	
 | 
| 
 | 
   136 			
 | 
| 
 | 
   137 		return parser.parse_args()
 | 
| 
 | 
   138 			
 | 
| 
 | 
   139 
 | 
| 
 | 
   140 
 | 
| 
 | 
   141 if __name__ == '__main__':
 | 
| 
 | 
   142 
 | 
| 
 | 
   143 	time_start = time.time()
 | 
| 
 | 
   144 
 | 
| 
 | 
   145 	reportEngine = ReportEngine()
 | 
| 
 | 
   146 	reportEngine.__main__()
 | 
| 
 | 
   147 	
 | 
| 
 | 
   148 	print('Execution time (seconds): ' + str(int(time.time()-time_start)))
 | 
| 
 | 
   149 
 |