Mercurial > repos > damion > versioned_data
comparison versioned_data.py @ 1:5c5027485f7d draft
Uploaded correct file
author | damion |
---|---|
date | Sun, 09 Aug 2015 16:07:50 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d31a1bd74e63 | 1:5c5027485f7d |
---|---|
1 #!/usr/bin/python | |
2 import os | |
3 import optparse | |
4 import sys | |
5 import time | |
6 import re | |
7 | |
8 import vdb_common | |
9 import vdb_retrieval | |
10 | |
11 class MyParser(optparse.OptionParser): | |
12 """ | |
13 From http://stackoverflow.com/questions/1857346/python-optparse-how-to-include-additional-info-in-usage-output | |
14 Provides a better display of formatted help info in epilog() portion of optParse. | |
15 """ | |
16 def format_epilog(self, formatter): | |
17 return self.epilog | |
18 | |
19 | |
20 def stop_err( msg ): | |
21 sys.stderr.write("%s\n" % msg) | |
22 sys.exit(1) | |
23 | |
24 | |
25 class ReportEngine(object): | |
26 | |
27 def __init__(self): pass | |
28 | |
29 def __main__(self): | |
30 | |
31 options, args = self.get_command_line() | |
32 retrieval_obj = vdb_retrieval.VDBRetrieval() | |
33 retrieval_obj.set_api(options.api_info_path) | |
34 | |
35 retrievals=[] | |
36 | |
37 for retrieval in options.retrievals.strip().strip('|').split('|'): | |
38 # Normally xml form supplies "spec_file_id, [version list], [workflow_list]" | |
39 params = retrieval.strip().split(',') | |
40 | |
41 spec_file_id = params[0] | |
42 | |
43 if spec_file_id == 'none': | |
44 print 'Error: Form was selected without requesting a data store to retrieve!' | |
45 sys.exit( 1 ) | |
46 | |
47 # STEP 1: Determine data store type and location | |
48 data_store_spec = retrieval_obj.user_api.libraries.show_folder(retrieval_obj.library_id, spec_file_id) | |
49 data_store_type = retrieval_obj.test_data_store_type(data_store_spec['name']) | |
50 base_folder_id = data_store_spec['folder_id'] | |
51 | |
52 if not data_store_type: | |
53 print 'Error: unrecognized data store type [' + data_store_type + ']' | |
54 sys.exit( 1 ) | |
55 | |
56 ds_obj = retrieval_obj.get_data_store_gateway(data_store_type, spec_file_id) | |
57 | |
58 if len(params) > 1 and len(params[1].strip()) > 0: | |
59 _versionList = params[1].strip() | |
60 version_id = _versionList.split()[0] # VersionList SHOULD just have 1 id | |
61 else: | |
62 # User didn't select version_id via "Add new retrieval" | |
63 if options.globalRetrievalDate: | |
64 _retrieval_date = vdb_common.parse_date(options.globalRetrievalDate) | |
65 version_id = ds_obj.get_version_options(global_retrieval_date=_retrieval_date, selection=True) | |
66 | |
67 else: | |
68 version_id = '' | |
69 | |
70 # Reestablishes file(s) if they don't exist on disk. Do data library links to it as well. | |
71 ds_obj.get_version(version_id) | |
72 if ds_obj.version_path == None: | |
73 | |
74 print "Error: unable to retrieve version [%s] from %s archive [%s]. Archive doesn't contain this version id?" % (version_id, data_store_type, ds_obj.library_version_path) | |
75 sys.exit( 1 ) | |
76 | |
77 # Version data file(s) are sitting in [ds_obj.version_path] ready for retrieval. | |
78 library_dataset_ids = retrieval_obj.get_library_version_datasets(ds_obj.library_version_path, base_folder_id, ds_obj.version_label, ds_obj.version_path) | |
79 | |
80 # The only thing that doesn't have cache lookup is "folder" data that isn't linked in. | |
81 # In that case try lookup directly. | |
82 if len(library_dataset_ids) == 0 and data_store_type == 'folder': | |
83 library_version_datasets = retrieval_obj.get_library_folder_datasets(ds_obj.library_version_path) | |
84 library_dataset_ids = [item['id'] for item in library_version_datasets] | |
85 | |
86 if len(library_dataset_ids) == 0: | |
87 | |
88 print 'Error: unable to retrieve version [%s] from %s archive [%s] ' % (version_id, data_store_type, ds_obj.library_version_path) | |
89 sys.exit( 1 ) | |
90 | |
91 # At this point we have references to the galaxy ids of the requested versioned dataset, after regeneration | |
92 versioned_datasets = retrieval_obj.update_history(library_dataset_ids, ds_obj.library_version_path, version_id) | |
93 | |
94 if len(params) > 2: | |
95 | |
96 workflow_list = params[2].strip() | |
97 | |
98 if len(workflow_list) > 0: | |
99 # We have workflow run via admin_api and admin_api history. | |
100 retrieval_obj.get_workflow_data(workflow_list, versioned_datasets, version_id) | |
101 | |
102 | |
103 result=retrievals | |
104 | |
105 # Output file needs to exist. Otherwise Galaxy doesn't generate a placeholder file name for the output, and so we can't do things like check for [placeholder name]_files folder. Add something to report on? | |
106 with open(options.output,'w') as fw: | |
107 fw.writelines(result) | |
108 | |
109 | |
110 def get_command_line(self): | |
111 ## *************************** Parse Command Line ***************************** | |
112 parser = MyParser( | |
113 description = 'This Galaxy tool retrieves versions of prepared data sources and places them in a galaxy "Versioned Data" library', | |
114 usage = 'python versioned_data.py [options]', | |
115 epilog="""Details: | |
116 | |
117 This tool retrieves links to current or past versions of fasta (or other key-value text) databases from a cache kept in the data library called "Fasta Databases". It then places them into the current history so that subsequent tools can work with that data. | |
118 """) | |
119 | |
120 parser.add_option('-r', '--retrievals', type='string', dest='retrievals', | |
121 help='List of datasources and their versions and galaxy workflows to return') | |
122 | |
123 parser.add_option('-o', '--output', type='string', dest='output', | |
124 help='Path of output log file to create') | |
125 | |
126 parser.add_option('-O', '--output_id', type='string', dest='output_id', | |
127 help='Output identifier') | |
128 | |
129 parser.add_option('-d', '--date', type='string', dest='globalRetrievalDate', | |
130 help='Provide date/time for data recall. Defaults to now.') | |
131 | |
132 parser.add_option('-v', '--version', dest='version', default=False, action='store_true', | |
133 help='Version number of this program.') | |
134 | |
135 parser.add_option('-s', '--api_info_path', type='string', dest='api_info_path', help='Galaxy user api key/path.') | |
136 | |
137 return parser.parse_args() | |
138 | |
139 | |
140 | |
141 if __name__ == '__main__': | |
142 | |
143 time_start = time.time() | |
144 | |
145 reportEngine = ReportEngine() | |
146 reportEngine.__main__() | |
147 | |
148 print('Execution time (seconds): ' + str(int(time.time()-time_start))) | |
149 |