1
+ − 1 #!/usr/bin/python
+ − 2 import os
+ − 3 import optparse
+ − 4 import sys
+ − 5 import time
+ − 6 import re
+ − 7
+ − 8 import vdb_common
+ − 9 import vdb_retrieval
+ − 10
+ − 11 class MyParser(optparse.OptionParser):
+ − 12 """
+ − 13 From http://stackoverflow.com/questions/1857346/python-optparse-how-to-include-additional-info-in-usage-output
+ − 14 Provides a better display of formatted help info in epilog() portion of optParse.
+ − 15 """
+ − 16 def format_epilog(self, formatter):
+ − 17 return self.epilog
+ − 18
+ − 19
+ − 20 def stop_err( msg ):
+ − 21 sys.stderr.write("%s\n" % msg)
+ − 22 sys.exit(1)
+ − 23
+ − 24
+ − 25 class ReportEngine(object):
+ − 26
+ − 27 def __init__(self): pass
+ − 28
+ − 29 def __main__(self):
+ − 30
+ − 31 options, args = self.get_command_line()
+ − 32 retrieval_obj = vdb_retrieval.VDBRetrieval()
+ − 33 retrieval_obj.set_api(options.api_info_path)
+ − 34
+ − 35 retrievals=[]
+ − 36
+ − 37 for retrieval in options.retrievals.strip().strip('|').split('|'):
+ − 38 # Normally xml form supplies "spec_file_id, [version list], [workflow_list]"
+ − 39 params = retrieval.strip().split(',')
+ − 40
+ − 41 spec_file_id = params[0]
+ − 42
+ − 43 if spec_file_id == 'none':
+ − 44 print 'Error: Form was selected without requesting a data store to retrieve!'
+ − 45 sys.exit( 1 )
+ − 46
+ − 47 # STEP 1: Determine data store type and location
+ − 48 data_store_spec = retrieval_obj.user_api.libraries.show_folder(retrieval_obj.library_id, spec_file_id)
+ − 49 data_store_type = retrieval_obj.test_data_store_type(data_store_spec['name'])
+ − 50 base_folder_id = data_store_spec['folder_id']
+ − 51
+ − 52 if not data_store_type:
+ − 53 print 'Error: unrecognized data store type [' + data_store_type + ']'
+ − 54 sys.exit( 1 )
+ − 55
+ − 56 ds_obj = retrieval_obj.get_data_store_gateway(data_store_type, spec_file_id)
+ − 57
+ − 58 if len(params) > 1 and len(params[1].strip()) > 0:
+ − 59 _versionList = params[1].strip()
+ − 60 version_id = _versionList.split()[0] # VersionList SHOULD just have 1 id
+ − 61 else:
+ − 62 # User didn't select version_id via "Add new retrieval"
+ − 63 if options.globalRetrievalDate:
+ − 64 _retrieval_date = vdb_common.parse_date(options.globalRetrievalDate)
+ − 65 version_id = ds_obj.get_version_options(global_retrieval_date=_retrieval_date, selection=True)
+ − 66
+ − 67 else:
+ − 68 version_id = ''
+ − 69
+ − 70 # Reestablishes file(s) if they don't exist on disk. Do data library links to it as well.
+ − 71 ds_obj.get_version(version_id)
+ − 72 if ds_obj.version_path == None:
+ − 73
+ − 74 print "Error: unable to retrieve version [%s] from %s archive [%s]. Archive doesn't contain this version id?" % (version_id, data_store_type, ds_obj.library_version_path)
+ − 75 sys.exit( 1 )
+ − 76
+ − 77 # Version data file(s) are sitting in [ds_obj.version_path] ready for retrieval.
+ − 78 library_dataset_ids = retrieval_obj.get_library_version_datasets(ds_obj.library_version_path, base_folder_id, ds_obj.version_label, ds_obj.version_path)
+ − 79
+ − 80 # The only thing that doesn't have cache lookup is "folder" data that isn't linked in.
+ − 81 # In that case try lookup directly.
+ − 82 if len(library_dataset_ids) == 0 and data_store_type == 'folder':
+ − 83 library_version_datasets = retrieval_obj.get_library_folder_datasets(ds_obj.library_version_path)
+ − 84 library_dataset_ids = [item['id'] for item in library_version_datasets]
+ − 85
+ − 86 if len(library_dataset_ids) == 0:
+ − 87
+ − 88 print 'Error: unable to retrieve version [%s] from %s archive [%s] ' % (version_id, data_store_type, ds_obj.library_version_path)
+ − 89 sys.exit( 1 )
+ − 90
+ − 91 # At this point we have references to the galaxy ids of the requested versioned dataset, after regeneration
+ − 92 versioned_datasets = retrieval_obj.update_history(library_dataset_ids, ds_obj.library_version_path, version_id)
+ − 93
+ − 94 if len(params) > 2:
+ − 95
+ − 96 workflow_list = params[2].strip()
+ − 97
+ − 98 if len(workflow_list) > 0:
+ − 99 # We have workflow run via admin_api and admin_api history.
+ − 100 retrieval_obj.get_workflow_data(workflow_list, versioned_datasets, version_id)
+ − 101
+ − 102
+ − 103 result=retrievals
+ − 104
+ − 105 # Output file needs to exist. Otherwise Galaxy doesn't generate a placeholder file name for the output, and so we can't do things like check for [placeholder name]_files folder. Add something to report on?
+ − 106 with open(options.output,'w') as fw:
+ − 107 fw.writelines(result)
+ − 108
+ − 109
+ − 110 def get_command_line(self):
+ − 111 ## *************************** Parse Command Line *****************************
+ − 112 parser = MyParser(
+ − 113 description = 'This Galaxy tool retrieves versions of prepared data sources and places them in a galaxy "Versioned Data" library',
+ − 114 usage = 'python versioned_data.py [options]',
+ − 115 epilog="""Details:
+ − 116
+ − 117 This tool retrieves links to current or past versions of fasta (or other key-value text) databases from a cache kept in the data library called "Fasta Databases". It then places them into the current history so that subsequent tools can work with that data.
+ − 118 """)
+ − 119
+ − 120 parser.add_option('-r', '--retrievals', type='string', dest='retrievals',
+ − 121 help='List of datasources and their versions and galaxy workflows to return')
+ − 122
+ − 123 parser.add_option('-o', '--output', type='string', dest='output',
+ − 124 help='Path of output log file to create')
+ − 125
+ − 126 parser.add_option('-O', '--output_id', type='string', dest='output_id',
+ − 127 help='Output identifier')
+ − 128
+ − 129 parser.add_option('-d', '--date', type='string', dest='globalRetrievalDate',
+ − 130 help='Provide date/time for data recall. Defaults to now.')
+ − 131
+ − 132 parser.add_option('-v', '--version', dest='version', default=False, action='store_true',
+ − 133 help='Version number of this program.')
+ − 134
+ − 135 parser.add_option('-s', '--api_info_path', type='string', dest='api_info_path', help='Galaxy user api key/path.')
+ − 136
+ − 137 return parser.parse_args()
+ − 138
+ − 139
+ − 140
+ − 141 if __name__ == '__main__':
+ − 142
+ − 143 time_start = time.time()
+ − 144
+ − 145 reportEngine = ReportEngine()
+ − 146 reportEngine.__main__()
+ − 147
+ − 148 print('Execution time (seconds): ' + str(int(time.time()-time_start)))
+ − 149