1
|
1 #!/usr/bin/python
|
|
2 import os
|
|
3 import optparse
|
|
4 import sys
|
|
5 import time
|
|
6 import re
|
|
7
|
|
8 import vdb_common
|
|
9 import vdb_retrieval
|
|
10
|
|
11 class MyParser(optparse.OptionParser):
|
|
12 """
|
|
13 From http://stackoverflow.com/questions/1857346/python-optparse-how-to-include-additional-info-in-usage-output
|
|
14 Provides a better display of formatted help info in epilog() portion of optParse.
|
|
15 """
|
|
16 def format_epilog(self, formatter):
|
|
17 return self.epilog
|
|
18
|
|
19
|
|
20 def stop_err( msg ):
|
|
21 sys.stderr.write("%s\n" % msg)
|
|
22 sys.exit(1)
|
|
23
|
|
24
|
|
25 class ReportEngine(object):
|
|
26
|
|
27 def __init__(self): pass
|
|
28
|
|
29 def __main__(self):
|
|
30
|
|
31 options, args = self.get_command_line()
|
|
32 retrieval_obj = vdb_retrieval.VDBRetrieval()
|
|
33 retrieval_obj.set_api(options.api_info_path)
|
|
34
|
|
35 retrievals=[]
|
|
36
|
|
37 for retrieval in options.retrievals.strip().strip('|').split('|'):
|
|
38 # Normally xml form supplies "spec_file_id, [version list], [workflow_list]"
|
|
39 params = retrieval.strip().split(',')
|
|
40
|
|
41 spec_file_id = params[0]
|
|
42
|
|
43 if spec_file_id == 'none':
|
|
44 print 'Error: Form was selected without requesting a data store to retrieve!'
|
|
45 sys.exit( 1 )
|
|
46
|
|
47 # STEP 1: Determine data store type and location
|
|
48 data_store_spec = retrieval_obj.user_api.libraries.show_folder(retrieval_obj.library_id, spec_file_id)
|
|
49 data_store_type = retrieval_obj.test_data_store_type(data_store_spec['name'])
|
|
50 base_folder_id = data_store_spec['folder_id']
|
|
51
|
|
52 if not data_store_type:
|
|
53 print 'Error: unrecognized data store type [' + data_store_type + ']'
|
|
54 sys.exit( 1 )
|
|
55
|
|
56 ds_obj = retrieval_obj.get_data_store_gateway(data_store_type, spec_file_id)
|
|
57
|
|
58 if len(params) > 1 and len(params[1].strip()) > 0:
|
|
59 _versionList = params[1].strip()
|
|
60 version_id = _versionList.split()[0] # VersionList SHOULD just have 1 id
|
|
61 else:
|
|
62 # User didn't select version_id via "Add new retrieval"
|
|
63 if options.globalRetrievalDate:
|
|
64 _retrieval_date = vdb_common.parse_date(options.globalRetrievalDate)
|
|
65 version_id = ds_obj.get_version_options(global_retrieval_date=_retrieval_date, selection=True)
|
|
66
|
|
67 else:
|
|
68 version_id = ''
|
|
69
|
|
70 # Reestablishes file(s) if they don't exist on disk. Do data library links to it as well.
|
|
71 ds_obj.get_version(version_id)
|
|
72 if ds_obj.version_path == None:
|
|
73
|
|
74 print "Error: unable to retrieve version [%s] from %s archive [%s]. Archive doesn't contain this version id?" % (version_id, data_store_type, ds_obj.library_version_path)
|
|
75 sys.exit( 1 )
|
|
76
|
|
77 # Version data file(s) are sitting in [ds_obj.version_path] ready for retrieval.
|
|
78 library_dataset_ids = retrieval_obj.get_library_version_datasets(ds_obj.library_version_path, base_folder_id, ds_obj.version_label, ds_obj.version_path)
|
|
79
|
|
80 # The only thing that doesn't have cache lookup is "folder" data that isn't linked in.
|
|
81 # In that case try lookup directly.
|
|
82 if len(library_dataset_ids) == 0 and data_store_type == 'folder':
|
|
83 library_version_datasets = retrieval_obj.get_library_folder_datasets(ds_obj.library_version_path)
|
|
84 library_dataset_ids = [item['id'] for item in library_version_datasets]
|
|
85
|
|
86 if len(library_dataset_ids) == 0:
|
|
87
|
|
88 print 'Error: unable to retrieve version [%s] from %s archive [%s] ' % (version_id, data_store_type, ds_obj.library_version_path)
|
|
89 sys.exit( 1 )
|
|
90
|
|
91 # At this point we have references to the galaxy ids of the requested versioned dataset, after regeneration
|
|
92 versioned_datasets = retrieval_obj.update_history(library_dataset_ids, ds_obj.library_version_path, version_id)
|
|
93
|
|
94 if len(params) > 2:
|
|
95
|
|
96 workflow_list = params[2].strip()
|
|
97
|
|
98 if len(workflow_list) > 0:
|
|
99 # We have workflow run via admin_api and admin_api history.
|
|
100 retrieval_obj.get_workflow_data(workflow_list, versioned_datasets, version_id)
|
|
101
|
|
102
|
|
103 result=retrievals
|
|
104
|
|
105 # Output file needs to exist. Otherwise Galaxy doesn't generate a placeholder file name for the output, and so we can't do things like check for [placeholder name]_files folder. Add something to report on?
|
|
106 with open(options.output,'w') as fw:
|
|
107 fw.writelines(result)
|
|
108
|
|
109
|
|
110 def get_command_line(self):
|
|
111 ## *************************** Parse Command Line *****************************
|
|
112 parser = MyParser(
|
|
113 description = 'This Galaxy tool retrieves versions of prepared data sources and places them in a galaxy "Versioned Data" library',
|
|
114 usage = 'python versioned_data.py [options]',
|
|
115 epilog="""Details:
|
|
116
|
|
117 This tool retrieves links to current or past versions of fasta (or other key-value text) databases from a cache kept in the data library called "Fasta Databases". It then places them into the current history so that subsequent tools can work with that data.
|
|
118 """)
|
|
119
|
|
120 parser.add_option('-r', '--retrievals', type='string', dest='retrievals',
|
|
121 help='List of datasources and their versions and galaxy workflows to return')
|
|
122
|
|
123 parser.add_option('-o', '--output', type='string', dest='output',
|
|
124 help='Path of output log file to create')
|
|
125
|
|
126 parser.add_option('-O', '--output_id', type='string', dest='output_id',
|
|
127 help='Output identifier')
|
|
128
|
|
129 parser.add_option('-d', '--date', type='string', dest='globalRetrievalDate',
|
|
130 help='Provide date/time for data recall. Defaults to now.')
|
|
131
|
|
132 parser.add_option('-v', '--version', dest='version', default=False, action='store_true',
|
|
133 help='Version number of this program.')
|
|
134
|
|
135 parser.add_option('-s', '--api_info_path', type='string', dest='api_info_path', help='Galaxy user api key/path.')
|
|
136
|
|
137 return parser.parse_args()
|
|
138
|
|
139
|
|
140
|
|
141 if __name__ == '__main__':
|
|
142
|
|
143 time_start = time.time()
|
|
144
|
|
145 reportEngine = ReportEngine()
|
|
146 reportEngine.__main__()
|
|
147
|
|
148 print('Execution time (seconds): ' + str(int(time.time()-time_start)))
|
|
149
|