annotate versioned_data.py @ 1:5c5027485f7d draft

Uploaded correct file
author damion
date Sun, 09 Aug 2015 16:07:50 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
1 #!/usr/bin/python
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
2 import os
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
3 import optparse
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
4 import sys
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
5 import time
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
6 import re
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
7
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
8 import vdb_common
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
9 import vdb_retrieval
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
10
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
11 class MyParser(optparse.OptionParser):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
12 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
13 From http://stackoverflow.com/questions/1857346/python-optparse-how-to-include-additional-info-in-usage-output
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
14 Provides a better display of formatted help info in epilog() portion of optParse.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
15 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
16 def format_epilog(self, formatter):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
17 return self.epilog
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
18
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
19
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
20 def stop_err( msg ):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
21 sys.stderr.write("%s\n" % msg)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
22 sys.exit(1)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
23
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
24
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
25 class ReportEngine(object):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
26
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
27 def __init__(self): pass
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
28
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
29 def __main__(self):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
30
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
31 options, args = self.get_command_line()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
32 retrieval_obj = vdb_retrieval.VDBRetrieval()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
33 retrieval_obj.set_api(options.api_info_path)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
34
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
35 retrievals=[]
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
36
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
37 for retrieval in options.retrievals.strip().strip('|').split('|'):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
38 # Normally xml form supplies "spec_file_id, [version list], [workflow_list]"
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
39 params = retrieval.strip().split(',')
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
40
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
41 spec_file_id = params[0]
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
42
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
43 if spec_file_id == 'none':
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
44 print 'Error: Form was selected without requesting a data store to retrieve!'
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
45 sys.exit( 1 )
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
46
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
47 # STEP 1: Determine data store type and location
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
48 data_store_spec = retrieval_obj.user_api.libraries.show_folder(retrieval_obj.library_id, spec_file_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
49 data_store_type = retrieval_obj.test_data_store_type(data_store_spec['name'])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
50 base_folder_id = data_store_spec['folder_id']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
51
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
52 if not data_store_type:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
53 print 'Error: unrecognized data store type [' + data_store_type + ']'
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
54 sys.exit( 1 )
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
55
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
56 ds_obj = retrieval_obj.get_data_store_gateway(data_store_type, spec_file_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
57
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
58 if len(params) > 1 and len(params[1].strip()) > 0:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
59 _versionList = params[1].strip()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
60 version_id = _versionList.split()[0] # VersionList SHOULD just have 1 id
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
61 else:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
62 # User didn't select version_id via "Add new retrieval"
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
63 if options.globalRetrievalDate:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
64 _retrieval_date = vdb_common.parse_date(options.globalRetrievalDate)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
65 version_id = ds_obj.get_version_options(global_retrieval_date=_retrieval_date, selection=True)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
66
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
67 else:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
68 version_id = ''
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
69
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
70 # Reestablishes file(s) if they don't exist on disk. Do data library links to it as well.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
71 ds_obj.get_version(version_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
72 if ds_obj.version_path == None:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
73
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
74 print "Error: unable to retrieve version [%s] from %s archive [%s]. Archive doesn't contain this version id?" % (version_id, data_store_type, ds_obj.library_version_path)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
75 sys.exit( 1 )
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
76
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
77 # Version data file(s) are sitting in [ds_obj.version_path] ready for retrieval.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
78 library_dataset_ids = retrieval_obj.get_library_version_datasets(ds_obj.library_version_path, base_folder_id, ds_obj.version_label, ds_obj.version_path)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
79
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
80 # The only thing that doesn't have cache lookup is "folder" data that isn't linked in.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
81 # In that case try lookup directly.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
82 if len(library_dataset_ids) == 0 and data_store_type == 'folder':
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
83 library_version_datasets = retrieval_obj.get_library_folder_datasets(ds_obj.library_version_path)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
84 library_dataset_ids = [item['id'] for item in library_version_datasets]
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
85
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
86 if len(library_dataset_ids) == 0:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
87
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
88 print 'Error: unable to retrieve version [%s] from %s archive [%s] ' % (version_id, data_store_type, ds_obj.library_version_path)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
89 sys.exit( 1 )
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
90
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
91 # At this point we have references to the galaxy ids of the requested versioned dataset, after regeneration
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
92 versioned_datasets = retrieval_obj.update_history(library_dataset_ids, ds_obj.library_version_path, version_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
93
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
94 if len(params) > 2:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
95
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
96 workflow_list = params[2].strip()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
97
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
98 if len(workflow_list) > 0:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
99 # We have workflow run via admin_api and admin_api history.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
100 retrieval_obj.get_workflow_data(workflow_list, versioned_datasets, version_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
101
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
102
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
103 result=retrievals
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
104
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
105 # Output file needs to exist. Otherwise Galaxy doesn't generate a placeholder file name for the output, and so we can't do things like check for [placeholder name]_files folder. Add something to report on?
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
106 with open(options.output,'w') as fw:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
107 fw.writelines(result)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
108
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
109
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
110 def get_command_line(self):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
111 ## *************************** Parse Command Line *****************************
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
112 parser = MyParser(
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
113 description = 'This Galaxy tool retrieves versions of prepared data sources and places them in a galaxy "Versioned Data" library',
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
114 usage = 'python versioned_data.py [options]',
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
115 epilog="""Details:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
116
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
117 This tool retrieves links to current or past versions of fasta (or other key-value text) databases from a cache kept in the data library called "Fasta Databases". It then places them into the current history so that subsequent tools can work with that data.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
118 """)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
119
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
120 parser.add_option('-r', '--retrievals', type='string', dest='retrievals',
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
121 help='List of datasources and their versions and galaxy workflows to return')
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
122
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
123 parser.add_option('-o', '--output', type='string', dest='output',
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
124 help='Path of output log file to create')
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
125
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
126 parser.add_option('-O', '--output_id', type='string', dest='output_id',
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
127 help='Output identifier')
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
128
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
129 parser.add_option('-d', '--date', type='string', dest='globalRetrievalDate',
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
130 help='Provide date/time for data recall. Defaults to now.')
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
131
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
132 parser.add_option('-v', '--version', dest='version', default=False, action='store_true',
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
133 help='Version number of this program.')
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
134
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
135 parser.add_option('-s', '--api_info_path', type='string', dest='api_info_path', help='Galaxy user api key/path.')
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
136
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
137 return parser.parse_args()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
138
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
139
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
140
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
141 if __name__ == '__main__':
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
142
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
143 time_start = time.time()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
144
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
145 reportEngine = ReportEngine()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
146 reportEngine.__main__()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
147
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
148 print('Execution time (seconds): ' + str(int(time.time()-time_start)))
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
149