comparison versioned_data.py @ 1:5c5027485f7d draft

Uploaded correct file
author damion
date Sun, 09 Aug 2015 16:07:50 -0400
parents
children
comparison
equal deleted inserted replaced
0:d31a1bd74e63 1:5c5027485f7d
1 #!/usr/bin/python
2 import os
3 import optparse
4 import sys
5 import time
6 import re
7
8 import vdb_common
9 import vdb_retrieval
10
11 class MyParser(optparse.OptionParser):
12 """
13 From http://stackoverflow.com/questions/1857346/python-optparse-how-to-include-additional-info-in-usage-output
14 Provides a better display of formatted help info in epilog() portion of optParse.
15 """
16 def format_epilog(self, formatter):
17 return self.epilog
18
19
20 def stop_err( msg ):
21 sys.stderr.write("%s\n" % msg)
22 sys.exit(1)
23
24
25 class ReportEngine(object):
26
27 def __init__(self): pass
28
29 def __main__(self):
30
31 options, args = self.get_command_line()
32 retrieval_obj = vdb_retrieval.VDBRetrieval()
33 retrieval_obj.set_api(options.api_info_path)
34
35 retrievals=[]
36
37 for retrieval in options.retrievals.strip().strip('|').split('|'):
38 # Normally xml form supplies "spec_file_id, [version list], [workflow_list]"
39 params = retrieval.strip().split(',')
40
41 spec_file_id = params[0]
42
43 if spec_file_id == 'none':
44 print 'Error: Form was selected without requesting a data store to retrieve!'
45 sys.exit( 1 )
46
47 # STEP 1: Determine data store type and location
48 data_store_spec = retrieval_obj.user_api.libraries.show_folder(retrieval_obj.library_id, spec_file_id)
49 data_store_type = retrieval_obj.test_data_store_type(data_store_spec['name'])
50 base_folder_id = data_store_spec['folder_id']
51
52 if not data_store_type:
53 print 'Error: unrecognized data store type [' + data_store_type + ']'
54 sys.exit( 1 )
55
56 ds_obj = retrieval_obj.get_data_store_gateway(data_store_type, spec_file_id)
57
58 if len(params) > 1 and len(params[1].strip()) > 0:
59 _versionList = params[1].strip()
60 version_id = _versionList.split()[0] # VersionList SHOULD just have 1 id
61 else:
62 # User didn't select version_id via "Add new retrieval"
63 if options.globalRetrievalDate:
64 _retrieval_date = vdb_common.parse_date(options.globalRetrievalDate)
65 version_id = ds_obj.get_version_options(global_retrieval_date=_retrieval_date, selection=True)
66
67 else:
68 version_id = ''
69
70 # Reestablishes file(s) if they don't exist on disk. Do data library links to it as well.
71 ds_obj.get_version(version_id)
72 if ds_obj.version_path == None:
73
74 print "Error: unable to retrieve version [%s] from %s archive [%s]. Archive doesn't contain this version id?" % (version_id, data_store_type, ds_obj.library_version_path)
75 sys.exit( 1 )
76
77 # Version data file(s) are sitting in [ds_obj.version_path] ready for retrieval.
78 library_dataset_ids = retrieval_obj.get_library_version_datasets(ds_obj.library_version_path, base_folder_id, ds_obj.version_label, ds_obj.version_path)
79
80 # The only thing that doesn't have cache lookup is "folder" data that isn't linked in.
81 # In that case try lookup directly.
82 if len(library_dataset_ids) == 0 and data_store_type == 'folder':
83 library_version_datasets = retrieval_obj.get_library_folder_datasets(ds_obj.library_version_path)
84 library_dataset_ids = [item['id'] for item in library_version_datasets]
85
86 if len(library_dataset_ids) == 0:
87
88 print 'Error: unable to retrieve version [%s] from %s archive [%s] ' % (version_id, data_store_type, ds_obj.library_version_path)
89 sys.exit( 1 )
90
91 # At this point we have references to the galaxy ids of the requested versioned dataset, after regeneration
92 versioned_datasets = retrieval_obj.update_history(library_dataset_ids, ds_obj.library_version_path, version_id)
93
94 if len(params) > 2:
95
96 workflow_list = params[2].strip()
97
98 if len(workflow_list) > 0:
99 # We have workflow run via admin_api and admin_api history.
100 retrieval_obj.get_workflow_data(workflow_list, versioned_datasets, version_id)
101
102
103 result=retrievals
104
105 # Output file needs to exist. Otherwise Galaxy doesn't generate a placeholder file name for the output, and so we can't do things like check for [placeholder name]_files folder. Add something to report on?
106 with open(options.output,'w') as fw:
107 fw.writelines(result)
108
109
110 def get_command_line(self):
111 ## *************************** Parse Command Line *****************************
112 parser = MyParser(
113 description = 'This Galaxy tool retrieves versions of prepared data sources and places them in a galaxy "Versioned Data" library',
114 usage = 'python versioned_data.py [options]',
115 epilog="""Details:
116
117 This tool retrieves links to current or past versions of fasta (or other key-value text) databases from a cache kept in the data library called "Fasta Databases". It then places them into the current history so that subsequent tools can work with that data.
118 """)
119
120 parser.add_option('-r', '--retrievals', type='string', dest='retrievals',
121 help='List of datasources and their versions and galaxy workflows to return')
122
123 parser.add_option('-o', '--output', type='string', dest='output',
124 help='Path of output log file to create')
125
126 parser.add_option('-O', '--output_id', type='string', dest='output_id',
127 help='Output identifier')
128
129 parser.add_option('-d', '--date', type='string', dest='globalRetrievalDate',
130 help='Provide date/time for data recall. Defaults to now.')
131
132 parser.add_option('-v', '--version', dest='version', default=False, action='store_true',
133 help='Version number of this program.')
134
135 parser.add_option('-s', '--api_info_path', type='string', dest='api_info_path', help='Galaxy user api key/path.')
136
137 return parser.parse_args()
138
139
140
141 if __name__ == '__main__':
142
143 time_start = time.time()
144
145 reportEngine = ReportEngine()
146 reportEngine.__main__()
147
148 print('Execution time (seconds): ' + str(int(time.time()-time_start)))
149