annotate vdb_retrieval.py @ 1:5c5027485f7d draft

Uploaded correct file
author damion
date Sun, 09 Aug 2015 16:07:50 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
1 #!/usr/bin/python
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
2
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
3 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
4 ****************************** vdb_retrieval.py ******************************
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
5 VDBRetrieval() instance called in two stages:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
6 1) by tool's versioned_data.xml form (dynamic_option field)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
7 2) by its executable versioned_data.py script.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
8
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
9 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
10
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
11 import os, sys, glob, time
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
12 import string
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
13 from random import choice
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
14
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
15 from bioblend.galaxy import GalaxyInstance
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
16 from requests.exceptions import ChunkedEncodingError
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
17 from requests.exceptions import ConnectionError
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
18
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
19 import urllib2
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
20 import json
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
21 import vdb_common
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
22
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
23 # Store these values in python/galaxy environment variables?
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
24 VDB_DATA_LIBRARY = 'Versioned Data'
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
25 VDB_WORKFLOW_CACHE_FOLDER_NAME = 'Workflow cache'
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
26 VDB_CACHED_DATA_LABEL = 'Cached data'
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
27
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
28 # Don't forget to add "versionedata@localhost.com" to galaxy config admin_users list.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
29
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
30 VDB_ADMIN_API_USER = 'versioneddata'
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
31 VDB_ADMIN_API_EMAIL = 'versioneddata@localhost.com'
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
32 VDB_ADMIN_API_KEY_PATH = os.path.join(os.path.dirname(sys._getframe().f_code.co_filename), 'versioneddata_api_key.txt')
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
33
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
34 #kipper, git, folder and other registered handlers
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
35 VDB_STORAGE_OPTIONS = 'kipper git folder biomaj'
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
36
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
37 # Used in versioned_data_form.py
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
38 VDB_DATASET_NOT_AVAILABLE = 'This database is not currently available (no items).'
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
39 VDB_DATA_LIBRARY_FOLDER_ERROR = 'Error: this data library folder is not configured correctly.'
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
40 VDB_DATA_LIBRARY_CONFIG_ERROR = 'Error: Check folder config file: '
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
41
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
42
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
43 class VDBRetrieval(object):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
44
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
45 def __init__(self, api_key=None, api_url=None):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
46 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
47 This gets either trans.x.y from <code file="..."> call in versioned_data.xml,
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
48 or it gets a call with api_key and api_url from versioned_data.py
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
49
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
50 @param api_key_path string File path to temporary file containing user's galaxy api_key
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
51 @param api_url string contains http://[ip]:[port] for handling galaxy api calls.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
52
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
53 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
54 # Initialized constants during the life of a request:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
55 self.global_retrieval_date = None
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
56 self.library_id = None
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
57 self.history_id = None
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
58 self.data_stores = []
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
59
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
60 # Entire json library structure. item.url, type=file|folder, id, name (library path)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
61 # Note: changes to library during a request aren't reflected here.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
62 self.library = None
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
63
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
64 self.user_api_key = None
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
65 self.user_api = None
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
66 self.admin_api_key = None
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
67 self.admin_api = None
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
68 self.api_url = None
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
69
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
70
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
71 def set_trans(self, api_url, history_id, user_api_key=None): #master_api_key=None,
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
72 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
73 Used only on initial presentation of versioned_data.xml form. Doesn't need admin_api
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
74 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
75 self.history_id = history_id
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
76 self.api_url = api_url
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
77 self.user_api_key = user_api_key
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
78 #self.master_api_key = master_api_key
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
79
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
80 self.set_user_api()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
81 self.set_admin_api()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
82 self.set_datastores()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
83
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
84
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
85 def set_api(self, api_info_path):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
86 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
87 "api_info_path" is provided only when user submits tool via versioned_data.py call.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
88 It encodes both the api_url and the history_id of current session
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
89 Only at this point will we need the admin_api, so it is looked up below.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
90
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
91 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
92
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
93 with open(api_info_path, 'r') as access:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
94
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
95 self.user_api_key = access.readline().strip()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
96 #self.master_api_key = access.readline().strip()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
97 api_info = access.readline().strip() #[api_url]-[history_id]
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
98 self.api_url, self.history_id = api_info.split('-')
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
99
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
100 self.set_user_api()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
101 self.set_admin_api()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
102 self.set_datastores()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
103
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
104
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
105 def set_user_api(self):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
106 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
107 Note: error message tacked on to self.data_stores for display back to user.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
108 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
109 self.user_api = GalaxyInstance(url=self.api_url, key=self.user_api_key)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
110
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
111 if not self.user_api:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
112 self.data_stores.append({'name':'Error: user Galaxy API connection was not set up correctly. Try getting another user API key.', 'id':'none'})
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
113 return
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
114
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
115
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
116 def set_datastores(self):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
117 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
118 Provides the list of data stores that users can select versions from.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
119 Note: error message tacked on to self.data_stores for display back to user.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
120 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
121 # Look for library called "Versioned Data"
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
122 try:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
123 libs = self.user_api.libraries.get_libraries(name=VDB_DATA_LIBRARY, deleted=False)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
124 except Exception as err:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
125 # This is the first call to api so api url or authentication erro can happen here.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
126 self.data_stores.append({'name':'Error: Unable to make API connection: ' + err.message, 'id':'none'})
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
127 return
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
128
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
129 found = False
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
130 for lib in libs:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
131 if lib['deleted'] == False:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
132 found = True
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
133 self.library_id = lib['id']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
134 break;
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
135
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
136 if not found:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
137 self.data_stores.append({'name':'Error: Data Library [%s] needs to be set up by a galaxy administrator.' % VDB_DATA_LIBRARY, 'id':'none'})
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
138 return
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
139
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
140 try:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
141
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
142 if self.admin_api:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
143 self.library = self.admin_api.libraries.show_library(self.library_id, contents=True)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
144 else:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
145 self.library = self.user_api.libraries.show_library(self.library_id, contents=True)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
146
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
147 except Exception as err:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
148 # If data within a library is somehow messed up (maybe user has no permissions?), this can generate a bioblend errorapi.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
149 if err.message[-21:] == 'HTTP status code: 403':
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
150 self.data_stores.append({'name':'Error: [%s] library needs permissions adjusted so users can view it.' % VDB_DATA_LIBRARY , 'id':'none'})
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
151 else:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
152 self.data_stores.append({'name':'Error: Unable to get [%s] library contents: %s' % (VDB_DATA_LIBRARY, err.message) , 'id':'none'})
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
153 return
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
154
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
155 # Need to ensure it is sorted folder/file wise such that folders listed by date/id descending (name leads with version date/id) files will follow).
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
156 self.library = sorted(self.library, key=lambda x: x['name'], reverse=False)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
157
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
158 # Gets list of data stores
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
159 # For given library_id (usually called "Versioned Data"), retrieves folder/name
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
160 # for any folder containing a data source specification file. A folder should
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
161 # have at most one of these. It indicates the storage method used for the folder.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
162
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
163 for item in self.library:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
164 if item['type'] == "file" and self.test_data_store_type(item['name']):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
165 # Returns id of specification file that points to data source.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
166 self.data_stores.append({
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
167 'name':os.path.dirname(item['name']),
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
168 'id':item['id']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
169 })
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
170
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
171
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
172
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
173 def set_admin_api(self):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
174
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
175 # Now fetch admin_api_key from disk, or regenerate user account and api from scratch.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
176 if os.path.isfile(VDB_ADMIN_API_KEY_PATH):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
177
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
178 with open(VDB_ADMIN_API_KEY_PATH, 'r') as access:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
179 self.admin_api_key = access.readline().strip()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
180 self.api_url = access.readline().strip()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
181
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
182 else:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
183 # VERIFY THAT USER IS AN ADMIN
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
184 user = self.user_api.users.get_current_user()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
185 if user['is_admin'] == False:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
186 print "Unable to establish the admin api: you need to be in the admin_user=... list in galaxy config."
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
187 sys.exit(1)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
188 """ Future: will master API be able to do...
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
189 #if not self.master_api_key:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
190 # print "Unable to establish the admin api: no existing path to config file, and no master_api_key." + self.master_api_key
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
191 # sys.exit(1)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
192 # Generate from scratch:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
193 #master_api = GalaxyInstance(url=self.api_url, key=self.master_api_key)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
194 #users = master_api.users.get_users(deleted=False)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
195 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
196 users = self.user_api.users.get_users(deleted=False)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
197 for user in users:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
198
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
199 if user['email'] == VDB_ADMIN_API_EMAIL:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
200 self.admin_api_key = self.user_api.users.create_user_apikey(user['id'])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
201
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
202 if not self.admin_api_key:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
203 #Create admin api access account with dummy email address and reliable but secure password:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
204 # NOTE: this will only be considered an admin account if it is listed in galaxy config file as one.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
205 random_password = ''.join([choice(string.letters + string.digits) for i in range(15)])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
206 api_admin_user = self.user_api.users.create_local_user(VDB_ADMIN_API_USER, VDB_ADMIN_API_EMAIL, random_password)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
207 self.admin_api_key = self.user_api.users.create_user_apikey(api_admin_user['id'])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
208
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
209 with open(VDB_ADMIN_API_KEY_PATH, 'w') as access:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
210 access.write(self.admin_api_key + '\n' + self.api_url)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
211
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
212 self.admin_api = GalaxyInstance(url=self.api_url, key=self.admin_api_key)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
213
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
214 if not self.admin_api:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
215 print 'Error: admin Galaxy API connection was not set up correctly. Admin user should be ' + VDB_ADMIN_API_EMAIL
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
216 print "Unexpected error:", sys.exc_info()[0]
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
217 sys.exit(1)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
218
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
219
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
220 def get_data_store_gateway(self, type, spec_file_id):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
221 # NOTE THAT PYTHON NEVER TIMES OUT FOR THESE CALLS - BUT IT WILL TIME OUT FOR API CALLS.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
222 # FUTURE: Adapt this so that any modules in data_stores/ folder are usable
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
223 # e.g. https://bbs.archlinux.org/viewtopic.php?id=109561
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
224 # http://stackoverflow.com/questions/301134/dynamic-module-import-in-python
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
225
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
226 # ****************** GIT ARCHIVE ****************
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
227 if type == "git":
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
228 import data_stores.vdb_git
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
229 return data_stores.vdb_git.VDBGitDataStore(self, spec_file_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
230
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
231 # ****************** Kipper ARCHIVE ****************
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
232 elif type == "kipper":
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
233 import data_stores.vdb_kipper
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
234 return data_stores.vdb_kipper.VDBKipperDataStore(self, spec_file_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
235
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
236 # ****************** FILE FOLDER ******************
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
237 elif type == "folder":
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
238 import data_stores.vdb_folder
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
239 return data_stores.vdb_folder.VDBFolderDataStore(self, spec_file_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
240
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
241 # ****************** BIOMAJ FOLDER ******************
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
242 elif type == "biomaj":
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
243 import data_stores.vdb_biomaj
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
244 return data_stores.vdb_biomaj.VDBBiomajDataStore(self, spec_file_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
245
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
246 else:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
247 print 'Error: %s not recognized as a valid data store type.' % type
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
248 sys.exit( 1 )
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
249
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
250
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
251 #For a given path leading to pointer.[git|kipper|folder|biomaj] returns suffix
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
252 def test_data_store_type(self, file_name, file_path=None):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
253 if file_path and not os.path.isfile(file_path):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
254 return False
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
255
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
256 suffix = file_name.rsplit('.',1)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
257 if len(suffix) > 1 and suffix[1] in VDB_STORAGE_OPTIONS:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
258 return suffix[1]
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
259
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
260 return False
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
261
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
262
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
263
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
264
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
265
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
266 def get_library_data_store_list(self):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
267 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
268 For display on tool form, returns names, ids of specification files that point to data sources.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
269
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
270 @return dirs array of [[folder label], [folder_id, selected]...]
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
271 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
272 dirs = []
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
273 # Gets recursive contents of library - files and folders
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
274 for item in self.data_stores:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
275 dirs.append([item['name'], item['id'], False])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
276
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
277 return dirs
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
278
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
279
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
280 def get_library_label_path(self, spec_file_id):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
281 for item in self.data_stores:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
282 if item['id'] == spec_file_id:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
283 return item['name']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
284
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
285 return None
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
286
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
287
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
288 def get_library_folder_datasets(self, library_version_path, admin=False):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
289 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
290 Gets set of ALL dataset FILES within folder - INCLUDING SUBFOLDERS - by searching
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
291 through a library, examining each item's full hierarchic label
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
292 BUT CURRENTLY: If any file has state='error' the whole list is rejected (and regenerated).
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
293
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
294 WISHLIST: HAVE AN API FUNCTION TO GET ONLY A GIVEN FOLDER'S (BY ID) CONTENTS!
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
295
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
296 @param library_version_path string Full hierarchic label of a library file or folder.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
297
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
298 @return array of ldda_id library dataset data association ids.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
299 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
300
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
301 if admin:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
302 api_handle = self.admin_api
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
303 else:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
304 api_handle = self.user_api
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
305
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
306 count = 0
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
307 while count < 4:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
308 try:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
309 items = api_handle.libraries.show_library(self.library_id, True)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
310 break
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
311 except ChunkedEncodingError:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
312 print "Error: Trying to fetch Versioned Data library listing. Try [" + str(count) + "]"
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
313 time.sleep (2)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
314 pass
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
315
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
316 count +=1
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
317
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
318 datasets = []
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
319 libvpath_len = len(library_version_path) + 1
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
320 for item in items:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
321 if item['type'] == "file":
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
322 name = item['name']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
323 # need slash or else will match to similar prefixes.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
324 if name[0:libvpath_len] == library_version_path + '/':
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
325
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
326 # ISSUE seems to be that input library datasets can be queued / running, and this MUST wait till they are finished or it will plow ahead prematurely.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
327
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
328 count = 0
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
329
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
330 while count < 10:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
331
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
332 try:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
333 lib_dataset = api_handle.libraries.show_dataset(self.library_id, item['id'])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
334 break
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
335
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
336 except:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
337 print "Unexpected error:", sys.exc_info()[0]
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
338 sys.exit(1)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
339
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
340 if lib_dataset['state'] == 'running':
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
341 time.sleep(10)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
342 count +=1
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
343 continue
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
344
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
345 elif lib_dataset['state'] == 'queued':
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
346
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
347 # FUTURE: Check date. If it is really stale it should be killed?
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
348 print 'Note: library folder dataset item "%s" is [%s]. Please wait until it is finished processing, or have a galaxy administrator delete the dataset if its creation has failed.' % (name, lib_dataset['state'])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
349 sys.exit(1)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
350
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
351 elif lib_dataset['state'] != 'ok' or not os.path.isfile(lib_dataset['file_name']):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
352 print 'Note: library folder dataset "%s" had an error during job. Its state was [%s]. Regenerating.' % (name, lib_dataset['state'])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
353 self.admin_api.libraries.delete_library_dataset(self.library_id, item['id'], purged=True)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
354 return []
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
355
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
356 else:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
357 break
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
358
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
359 datasets.append(item['id'])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
360
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
361
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
362 return datasets
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
363
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
364
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
365 def get_library_version_datasets(self, library_version_path, base_folder_id='', version_label='', version_path=''):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
366 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
367 Check if given library has a folder for given version_path. If so:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
368 - and it has content, return its datasets.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
369 - otherwise refetch content for verison folder
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
370 If no folder, populate the version folder with data from the archive and return those datasets.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
371 Version exists in external cache (or in case of unlinked folder, in EXISTING galaxy library folder).
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
372 Don't call unless version_path contents have been established.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
373
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
374 @param library_version_path string Full hierarchic label of a library file or folder with version id.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
375
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
376 For creation:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
377 @param base_folder_id string a library folder id under which version files should exist
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
378 @param version_label string Label to give newly created galaxy library version folder
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
379 @param version_path string Data source folder to retrieve versioned data files from
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
380
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
381 @return array of dataset
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
382 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
383
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
384
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
385 # Pick the first folder of any that match given 'Versioned Data/.../.../[version id]' path.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
386 # This case will always match 'folder' data store:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
387
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
388 folder_matches = self.get_folders(name=library_version_path)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
389
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
390 if len(folder_matches):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
391
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
392 folder_id = folder_matches[0]['id']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
393 dataset_ids = self.get_library_folder_datasets(library_version_path)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
394
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
395 if len(dataset_ids) > 0:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
396
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
397 return dataset_ids
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
398
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
399 if os.listdir(version_path) == []:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
400 # version_path doesn't exist for 'folder' data store versions that are datasets directly in library (i.e. not linked)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
401 print "Error: the data store didn't return any content for given version id. Looked in: " + version_path
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
402 sys.exit(1)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
403
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
404 # NOTE ONE 3rd party COMMENT THAT ONE SHOULD PUT IN file_type='fasta' FOR LARGE FILES. Problem with that is that then galaxy can't recognize other data types.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
405 library_folder_datasets = self.admin_api.libraries.upload_from_galaxy_filesystem(self.library_id, version_path, folder_id, link_data_only=True, roles=None)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
406
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
407
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
408 else:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
409 if base_folder_id == '': #Normally shouldn't happen
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
410
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
411 print "Error: no match to given version folder for [" + library_version_path + "] but unable to create one - missing parent folder identifier"
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
412 return []
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
413
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
414 # Provide archive folder with datestamped name and version (folderNew has url, id, name):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
415 folderNew = self.admin_api.libraries.create_folder(self.library_id, version_label, description=VDB_CACHED_DATA_LABEL, base_folder_id=base_folder_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
416 folder_id = str(folderNew[0]['id'])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
417
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
418 # Now link results to suitably named galaxy library dataset
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
419 # Note, this command links to EVERY file/folder in version_folder source.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
420 # Also, Galaxy will strip off .gz suffixes - WITHOUT UNCOMPRESSING FILES!
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
421 # So, best to prevent data store from showing .gz files in first place
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
422 try:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
423 library_folder_datasets = self.admin_api.libraries.upload_from_galaxy_filesystem(self.library_id, version_path, folder_id, link_data_only=True, roles=None)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
424
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
425 except:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
426 # Will return error if version_path folder is empty or kipper unable to create folder or db due to permissions etc.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
427 print "Error: a permission or other error was encountered when trying to retrieve version data for version folder [" + version_path + "]: Is the [%s] listed in galaxy config admin_users list?" % VDB_ADMIN_API_EMAIL, sys.exc_info()[0]
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
428 sys.exit(1)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
429
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
430
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
431 library_dataset_ids = [dataset['id'] for dataset in library_folder_datasets]
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
432
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
433 # LOOP WAITS UNTIL THESE DATASETS ARE UPLOADED.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
434 # They still take time even for linked big data probably because they are read for metadata.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
435 # Not nice that user doesn't see process as soon as it starts, but timeout possibilities
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
436 # later on down the line are more difficult to manage.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
437 for dataset_id in library_dataset_ids:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
438 # ten seconds x 60 = 6 minutes; should be longer?
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
439 for count in range(60):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
440 try:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
441 lib_dataset = self.admin_api.libraries.show_dataset(self.library_id, dataset_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
442 break
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
443
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
444 except:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
445 print "Unexpected error:", sys.exc_info()[0]
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
446 continue
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
447
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
448 if lib_dataset['state'] in 'running queued':
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
449 time.sleep(10)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
450 count +=1
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
451 continue
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
452 else:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
453 # Possibly in a nice "ok" or not nice state here.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
454 break
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
455
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
456
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
457 return library_dataset_ids
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
458
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
459
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
460 def get_folders(self, name):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
461 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
462 ISSUE: Have run into this sporadic error with a number of bioblend api calls. Means api calls may need to be wrapped in a retry mechanism:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
463 File "/usr/lib/python2.6/site-packages/requests/models.py", line 656, in generate
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
464 raise ChunkedEncodingError(e)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
465 requests.exceptions.ChunkedEncodingError: ('Connection broken: IncompleteRead(475 bytes read)', IncompleteRead(475 bytes read))
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
466 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
467 for count in range(3):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
468 try:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
469 return self.user_api.libraries.get_folders(self.library_id, name=name )
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
470 break
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
471
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
472 except:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
473 print 'Try (%s) to fetch library folders for "%s"' % (str(count), name)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
474 print sys.exc_info()[0]
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
475 time.sleep (5)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
476
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
477 print "Failed after (%s) tries!" % (str(count))
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
478 return None
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
479
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
480
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
481 def get_library_folder(self, library_path, relative_path, relative_labels):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
482 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
483 Check if given library has folder that looks like library_path + relative_path.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
484 If not, create and return resulting id. Used for cache creation.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
485 Ignores bad library_path.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
486
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
487 @param library_path string Full hierarchic label of a library folder. NOTE: Library_path must have leading forward slash for a match, i.e. /derivative_path
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
488 @param relative_path string branch of folder tree stemming from library_path
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
489 @param relative_labels string label for each relative_path item
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
490
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
491 @return folder_id
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
492 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
493 created = False
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
494 root_match = self.get_folders( name=library_path)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
495
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
496 if len(root_match):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
497 base_folder_id=root_match[0]['id']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
498
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
499 relative_path_array = relative_path.split('/')
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
500 relative_labels_array = relative_labels.split('/')
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
501
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
502 for ptr in range(len (relative_path_array)):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
503
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
504 _library_path = os.path.join(library_path, '/'.join(relative_path_array[0:ptr+1]))
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
505 folder_matches = self.get_folders( name=_library_path)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
506
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
507 if len(folder_matches):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
508 folder_id = folder_matches[0]['id']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
509 else:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
510 dataset_key = relative_path_array[ptr]
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
511 label = relative_labels_array[ptr]
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
512 folder_new = self.admin_api.libraries.create_folder(self.library_id, dataset_key, description=label, base_folder_id=base_folder_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
513 folder_id = str(folder_new[0]['id'])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
514
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
515 base_folder_id = folder_id
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
516
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
517 return folder_id
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
518
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
519 return None
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
520
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
521
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
522 def get_library_folders(self, library_label_path):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
523 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
524 Gets set of ALL folders within given library path. Within each folder, lists its files as well.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
525 Folders are ordered by version date/id, most recent first (natural sort).
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
526
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
527 NOT Quite recursive. Nested folders don't have parent info.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
528
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
529 @param library_version_path string Full hierarchic label of a library folder. Inside it are version subfolders, their datasets, and the pointer file.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
530
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
531 @return array of ids of the version subfolders and also their dataset content ids
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
532 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
533
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
534 folders = []
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
535 libvpath_len = len(library_label_path)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
536 for item in self.library:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
537
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
538 name = item['name']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
539 if name[0:libvpath_len] == library_label_path:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
540
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
541 # Skip any file that is immediately under library_label_path
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
542 if item['type'] == 'file':
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
543 file_key_val = item['name'].rsplit('/',1)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
544 #file_name_parts = file_key_val[1].split('.')
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
545 if file_key_val[0] == library_label_path:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
546 #and len(file_name_parts) > 1 \
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
547 #and file_name_parts[1] in VDB_STORAGE_OPTIONS:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
548 continue
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
549
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
550 if item['type'] == 'folder':
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
551 folders.append({'id':item['id'], 'name':item['name'], 'files':[]})
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
552
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
553 else:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
554 # Items should be sorted ascending such that each item is contained in previous folder.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
555 folders[-1]['files'].append({'id':item['id'], 'name':item['name']})
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
556
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
557 return folders
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
558
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
559
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
560 def get_workflow_data(self, workflow_list, datasets, version_id):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
561 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
562 Run each workflow in turn, given datasets generated above.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
563 See if each workflow's output has been cached.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
564 If not, run workflow and reestablish output data
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
565 Complexity is that cache could be:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
566 1) in user's history.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
567 2) in library data folder called "derivative_cache" under data source folder (as created by this galaxy install)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
568 3) in external data folder ..."/derivative_cache" (as created by this galaxy install)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
569 BUT other galaxy installs can't really use this unless they know metadata on workflow that generated it
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
570 In future we'll design a system for different galaxies to be able to read metadata to determine if they can use the cached workflow data here.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
571
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
572 ISSUE Below: Unless it is a really short workflow, run_workflow() returns before work is actually complete. DO WE NEED TO DELAY UNTIL EVERY SINGLE OUTPUT DATASET IS "ok", not just "queued" or "running"? OR IS SERVER TO LIBRARY UPLOAD PAUSE ABOVE ENOUGH?
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
573
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
574 Note, workflow_list contains only ids for items beginning with "versioning: "
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
575 FUTURE IMPROVEMENT: LOCK WORKFLOW: VULNERABILITY: IF WORKFLOW CHANGES, THAT AFFECTS REPRODUCABILITY.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
576
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
577 FUTURE: NEED TO ENSURE EACH dataset id not found in history is retrieved from cache.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
578 FUTURE: Check to see that EVERY SINGLE workflow output
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
579 has a corresponding dataset_id in history or library,
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
580 i.e. len(workflow['outputs']) == len(history_dataset_ids)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
581 But do we know before execution how many outputs (given conditional output?)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
582
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
583 @param workflow_list
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
584 @param datasets: an array of correct data source versioned datasets that are inputs to tools and workflows
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
585 @param version_id
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
586
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
587 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
588 for workflow_id in workflow_list.split():
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
589
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
590 workflows = self.admin_api.workflows.get_workflows(workflow_id, published=True)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
591
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
592 if not len(workflows):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
593 # Error occurs if admin_api user doesn't have permissions on this workflow???
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
594 # Currently all workflows have to be shared with VDB_ADMIN_API_EMAIL.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
595 # Future: could get around this by using publicly shared workflows via "import_shared_workflow(workflow_id)"
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
596 print 'Error: unable to run workflow - has it been shared with the Versioned Data tool user email address "%s" ?' % VDB_ADMIN_API_EMAIL
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
597 sys.exit(1)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
598
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
599 for workflow_summary in workflows:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
600
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
601 workflow = self.admin_api.workflows.show_workflow(workflow_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
602 print 'Doing workflow: "' + workflow_summary['name'] + '"'
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
603
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
604 if len(workflow['inputs']) == 0:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
605 print "ERROR: This workflow is not configured correctly - it needs at least 1 input dataset step."
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
606
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
607 # FUTURE: Bring greater intelligence to assigning inputs to workflow?!!!
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
608 if len(datasets) < len(workflow['inputs']):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
609
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
610 print 'Error: workflow requires more inputs (%s) than are available in retrieved datasets (%s) for this version of retrieved data.' % (len(workflow['inputs']), len(datasets))
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
611 sys.exit(1)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
612
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
613 codings = self.get_codings(workflow, datasets)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
614 (workflow_input_key, workflow_input_label, annotation_key, dataset_map) = codings
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
615
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
616 history_dataset_ids = self.get_history_workflow_results(annotation_key)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
617
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
618 if not history_dataset_ids:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
619
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
620 library_cache_path = os.path.join("/", VDB_WORKFLOW_CACHE_FOLDER_NAME, workflow_id, workflow_input_key)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
621
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
622 # This has to be privileged api admin fetch.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
623 library_dataset_ids = self.get_library_folder_datasets(library_cache_path, admin=True)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
624
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
625 if not len(library_dataset_ids):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
626 # No cache in library so run workflow
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
627
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
628 # Create admin_api history
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
629 admin_history = self.admin_api.histories.create_history()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
630 admin_history_id = admin_history['id']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
631
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
632 # If you try to run a workflow that hasn't been shared with you, it seems to go a bit brezerk.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
633 work_result = self.admin_api.workflows.run_workflow(workflow_id, dataset_map=dataset_map, history_id=admin_history_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
634
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
635 # Then copy (link) results back to library so can match in future
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
636 self.cache_datasets(library_cache_path, work_result, workflow_summary, codings, version_id, admin_history_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
637
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
638 # Now return the new cached library dataset ids:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
639 library_dataset_ids = self.get_library_folder_datasets(library_cache_path, admin=True)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
640 """ If a dataset is purged, its purged everywhere... so don't purge! Let caching system do that.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
641 THIS APPEARS TO HAPPEN TOO QUICKLY FOR LARGE DATABASES; LEAVE IT TO CACHING MECHANISM TO CLEAR. OR ABOVE FIX TO WAIT UNTIL DS IS OK.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
642 self.admin_api.histories.delete_history(admin_history_id, purge=False)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
643 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
644
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
645 # Now link library cache workflow results to history and add key there for future match.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
646 self.update_history(library_dataset_ids, annotation_key, version_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
647
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
648
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
649
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
650
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
651 def update_history(self, library_dataset_ids, annotation, version_id):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
652 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
653 Copy datasets from library over to current history if they aren't already there.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
654 Must cycle through history datasets, looking for "copied_from_ldda_id" value. This is available only with details view.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
655
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
656 @param library_dataset_ids array List of dataset Ids to copy from library folder
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
657 @param annotation string annotation to add (e.g. Path of original version folder added as annotation)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
658 @param version_id string Label to add to copied dataset in user's history
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
659 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
660 history_datasets = self.user_api.histories.show_history(self.history_id, contents=True, deleted=False, visible=True, details='all' , types=None) # ,
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
661
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
662 datasets = []
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
663 for dataset_id in library_dataset_ids:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
664 # USING ADMIN_API because that's only way to get workflow items back... user_api doesn't nec. have view rights on newly created workflow items. Only versioneddata@localhost.com has perms.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
665 ld_dataset = self.admin_api.libraries.show_dataset(self.library_id, dataset_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
666
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
667 if not ld_dataset['state'] in 'ok running queued':
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
668
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
669 print "Error when linking to library dataset cache [" + ld_dataset['name'] + ", " + ld_dataset['id'] + "] - it isn't in a good state: " + ld_dataset['state']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
670 sys.exit(1)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
671
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
672 if not os.path.isfile(ld_dataset['file_name']):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
673 pass
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
674 #FUTURE: SHOULD TRIGGER LIBRARY REGENERATION OF ITEM?
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
675
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
676 library_ldda_id = ld_dataset['ldda_id']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
677
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
678 # Find out if library dataset item is already in history, and if so, just return that item.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
679 dataset = None
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
680 for dataset2 in history_datasets:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
681
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
682 if 'copied_from_ldda_id' in dataset2 \
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
683 and dataset2['copied_from_ldda_id'] == library_ldda_id \
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
684 and dataset2['state'] in 'ok running' \
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
685 and dataset2['accessible'] == True:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
686 dataset = dataset2
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
687 break
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
688
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
689 if not dataset: # link in given dataset from library
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
690
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
691 dataset = self.user_api.histories.upload_dataset_from_library(self.history_id, dataset_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
692
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
693 # Update dataset's label - not necessary, just hinting at its creation.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
694 new_name = dataset['name']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
695 if dataset['name'][-len(version_id):] != version_id:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
696 new_name += ' ' + version_id
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
697
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
698 self.user_api.histories.update_dataset(self.history_id, dataset['id'], name=new_name, annotation = annotation)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
699
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
700 datasets.append({
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
701 'id': dataset['id'],
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
702 'ld_id': ld_dataset['id'],
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
703 'name': dataset['name'],
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
704 'ldda_id': library_ldda_id,
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
705 'library_dataset_name': ld_dataset['name'],
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
706 'state': ld_dataset['state']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
707 })
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
708
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
709 return datasets
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
710
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
711
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
712 def get_codings(self, workflow, datasets):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
713 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
714 Returns a number of coded lists or arrays for use in caching or displaying workflow results.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
715 Note: workflow['inputs'] = {u'23': {u'label': u'Input Dataset', u'value': u''}},
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
716 Note: step_id is not incremental.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
717 Note: VERY COMPLICATED because of hda/ldda/ld ids
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
718
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
719 FUTURE: IS METADATA AVAILABLE TO BETTER MATCH WORKFLOW INPUTS TO DATA SOURCE RECALL VERSIONS?
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
720 ISSUE: IT IS ASSUMED ALL INPUTS TO WORKFLOW ARE AVAILABLE AS DATASETS BY ID IN LIBRARY. I.e.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
721 one can't have a workflow that also makes reference to another just-generated file in user's
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
722 history.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
723 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
724 db_ptr = 0
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
725 dataset_map = {}
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
726 workflow_input_key = []
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
727 workflow_input_labels = []
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
728
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
729 for step_id, ds_in in workflow['inputs'].iteritems():
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
730 input_dataset_id = datasets[db_ptr]['ld_id']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
731 ldda_id = datasets[db_ptr]['ldda_id']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
732 dataset_map[step_id] = {'src': 'ld', 'id': input_dataset_id}
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
733 workflow_input_key.append(ldda_id) #like dataset_index but from workflow input perspective
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
734 workflow_input_labels.append(datasets[db_ptr]['name'])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
735 db_ptr += 1
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
736
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
737 workflow_input_key = '_'.join(workflow_input_key)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
738 workflow_input_labels = ', '.join(workflow_input_labels)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
739 annotation_key = workflow['id'] + ":" + workflow_input_key
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
740
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
741 return (workflow_input_key, workflow_input_labels, annotation_key, dataset_map)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
742
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
743
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
744 def get_history_workflow_results(self, annotation):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
745 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
746 See if workflow-generated dataset exists in user's history. The only way to spot this
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
747 is to find some dataset in user's history that has workflow_id in its "annotation" field.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
748 We added the specific dataset id's that were used as input to the workflow as well as the
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
749 workflow key since same workflow could have been run on different inputs.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
750
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
751 @param annotation_key string Contains workflow id and input dataset ids..
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
752 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
753 history_datasets = self.user_api.histories.show_history(self.history_id, contents=True, deleted=False, visible=True, details='all') # , types=None
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
754 dataset_ids = []
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
755
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
756 for dataset in history_datasets:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
757 if dataset['annotation'] == annotation:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
758 if dataset['accessible'] == True and dataset['state'] == 'ok':
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
759 dataset_ids.append(dataset['id'])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
760 else:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
761 print "Warning: dataset " + dataset['name'] + " is in an error state [ " + dataset['state'] + "] so skipped!"
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
762
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
763 return dataset_ids
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
764
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
765
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
766 def cache_datasets(self, library_cache_path, work_result, workflow_summary, codings, version_id, history_id):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
767 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
768 Use the Galaxy API to LINK versioned data api admin user's history workflow-created item(s) into the appropriate Versioned Data Workflow Cache folder. Doing this via API call so that metadata is preserved, e.g. preserving that it is a product of makeblastdb/formatdb and all that entails. Only then does Galaxy remain knowledgeable about datatype/data collection.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
769
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
770 Then user gets link to workflow dataset in their history. (If a galaxy user deletes a workflow dataset in their history they actually only deletes their history link to that dataset. True of api admin user?)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
771
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
772 FUTURE: have the galaxy-created data shared from a server location?
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
773 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
774
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
775 (workflow_input_key, workflow_input_label, annotation_key, dataset_map) = codings
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
776
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
777 # This will create folder if it doesn't exist:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
778 _library_cache_labels = os.path.join("/", VDB_WORKFLOW_CACHE_FOLDER_NAME, workflow_summary['name'], 'On ' + workflow_input_label)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
779 folder_id = self.get_library_folder("/", library_cache_path, _library_cache_labels)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
780 if not folder_id: # Case should never happen
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
781 print 'Error: unable to determine library folder to place cache in:' + library_cache_path
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
782 sys.exit(1)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
783
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
784
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
785 for dataset_id in work_result['outputs']:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
786 # We have to mark each dataset entry with the Workflow ID and input datasets it was generated by.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
787 # No other way to know they are associated. ADD VERSION ID TO END OF workflowinput_label?
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
788 label = workflow_summary['name'] +' on ' + workflow_input_label
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
789
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
790 # THIS WILL BE IN ADMIN API HISTORY
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
791 self.admin_api.histories.update_dataset(history_id, dataset_id, annotation = annotation_key, name=label)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
792
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
793 # Upload dataset_id and give it description 'cached data'
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
794 if 'copy_from_dataset' in dir(self.admin_api.libraries):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
795 # IN BIOBLEND LATEST:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
796 self.admin_api.libraries.copy_from_dataset(self.library_id, dataset_id, folder_id, VDB_CACHED_DATA_LABEL + ": version " + version_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
797 else:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
798 self.library_cache_setup_privileged(folder_id, dataset_id, VDB_CACHED_DATA_LABEL + ": version " + version_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
799
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
800
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
801
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
802 def library_cache_setup_privileged(self, folder_id, dataset_id, message):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
803 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
804 Copy a history HDA into a library LDDA (that the current admin api user has add permissions on)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
805 in the given library and library folder. Requires that dataset_id has been created by admin_api_key user. Nicola Soranzo [nicola.soranzo@gmail.com will be adding to BIOBLEND eventually.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
806
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
807 We tried linking a Versioned Data library Workflow Cache folder to the dataset(s) a non-admin api user has just generated. It turns out API user that connects the two must be both a Library admin AND the owner of the history dataset being uploaded, or an error occurs. So system can't do action on behalf of non-library-privileged user. Second complication with that approach is that there is no Bioblend API call - one must do this directly in galaxy API via direct URL fetc.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
808
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
809 NOTE: This will raise "HTTPError(req.get_full_url(), code, msg, hdrs, fp)" if given empty folder_id for example
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
810
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
811 @see def copy_hda_to_ldda( library_id, library_folder_id, hda_id, message='' ):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
812 @see https://wiki.galaxyproject.org/Events/GCC2013/TrainingDay/API?action=AttachFile&do=view&target=lddas_1.py
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
813
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
814 @uses library_id: the id of the library which we want to query.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
815
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
816 @param dataset_id: the id of the user's history dataset we want to copy into the library folder.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
817 @param folder_id: the id of the library folder to copy into.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
818 @param message: an optional message to add to the new LDDA.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
819 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
820
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
821
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
822
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
823 full_url = self.api_url + '/libraries' + '/' + self.library_id + '/contents'
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
824 url = self.make_url( self.admin_api_key, full_url )
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
825
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
826 post_data = {
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
827 'folder_id' : folder_id,
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
828 'create_type' : 'file',
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
829 'from_hda_id' : dataset_id,
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
830 'ldda_message' : message
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
831 }
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
832
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
833 req = urllib2.Request( url, headers = { 'Content-Type': 'application/json' }, data = json.dumps( post_data ) )
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
834 #try:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
835
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
836 results = json.loads( urllib2.urlopen( req ).read() )
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
837 return
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
838
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
839
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
840 #Expecting to phase this out with bioblend api call for library_cache_setup()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
841 def make_url(self, api_key, url, args=None ):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
842 # Adds the API Key to the URL if it's not already there.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
843 if args is None:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
844 args = []
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
845 argsep = '&'
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
846 if '?' not in url:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
847 argsep = '?'
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
848 if '?key=' not in url and '&key=' not in url:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
849 args.insert( 0, ( 'key', api_key ) )
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
850 return url + argsep + '&'.join( [ '='.join( t ) for t in args ] )
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
851
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
852
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
853