comparison tripal.py @ 0:62cfb06bd887 draft

planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/tripal commit f745b23c84a615bf434d717c8c0e553a012f0268
author gga
date Mon, 11 Sep 2017 05:52:46 -0400
parents
children c1fd973dd914
comparison
equal deleted inserted replaced
-1:000000000000 0:62cfb06bd887
1 import collections
2 import os
3 import time
4
5 from abc import abstractmethod
6
7 import tripal
8
9
10 #############################################
11 # BEGIN IMPORT OF CACHING LIBRARY #
12 #############################################
13 # This code is licensed under the MIT #
14 # License and is a copy of code publicly #
15 # available in rev. #
16 # e27332bc82f4e327aedaec17c9b656ae719322ed #
17 # of https://github.com/tkem/cachetools/ #
18 #############################################
19 class DefaultMapping(collections.MutableMapping):
20
21 __slots__ = ()
22
23 @abstractmethod
24 def __contains__(self, key): # pragma: nocover
25 return False
26
27 @abstractmethod
28 def __getitem__(self, key): # pragma: nocover
29 if hasattr(self.__class__, '__missing__'):
30 return self.__class__.__missing__(self, key)
31 else:
32 raise KeyError(key)
33
34 def get(self, key, default=None):
35 if key in self:
36 return self[key]
37 else:
38 return default
39
40 __marker = object()
41
42 def pop(self, key, default=__marker):
43 if key in self:
44 value = self[key]
45 del self[key]
46 elif default is self.__marker:
47 raise KeyError(key)
48 else:
49 value = default
50 return value
51
52 def setdefault(self, key, default=None):
53 if key in self:
54 value = self[key]
55 else:
56 self[key] = value = default
57 return value
58
59
60 DefaultMapping.register(dict)
61
62
63 class _DefaultSize(object):
64 def __getitem__(self, _):
65 return 1
66
67 def __setitem__(self, _, value):
68 assert value == 1
69
70 def pop(self, _):
71 return 1
72
73
74 class Cache(DefaultMapping):
75 """Mutable mapping to serve as a simple cache or cache base class."""
76
77 __size = _DefaultSize()
78
79 def __init__(self, maxsize, missing=None, getsizeof=None):
80 if missing:
81 self.__missing = missing
82 if getsizeof:
83 self.__getsizeof = getsizeof
84 self.__size = dict()
85 self.__data = dict()
86 self.__currsize = 0
87 self.__maxsize = maxsize
88
89 def __repr__(self):
90 return '%s(%r, maxsize=%r, currsize=%r)' % (
91 self.__class__.__name__,
92 list(self.__data.items()),
93 self.__maxsize,
94 self.__currsize,
95 )
96
97 def __getitem__(self, key):
98 try:
99 return self.__data[key]
100 except KeyError:
101 return self.__missing__(key)
102
103 def __setitem__(self, key, value):
104 maxsize = self.__maxsize
105 size = self.getsizeof(value)
106 if size > maxsize:
107 raise ValueError('value too large')
108 if key not in self.__data or self.__size[key] < size:
109 while self.__currsize + size > maxsize:
110 self.popitem()
111 if key in self.__data:
112 diffsize = size - self.__size[key]
113 else:
114 diffsize = size
115 self.__data[key] = value
116 self.__size[key] = size
117 self.__currsize += diffsize
118
119 def __delitem__(self, key):
120 size = self.__size.pop(key)
121 del self.__data[key]
122 self.__currsize -= size
123
124 def __contains__(self, key):
125 return key in self.__data
126
127 def __missing__(self, key):
128 value = self.__missing(key)
129 try:
130 self.__setitem__(key, value)
131 except ValueError:
132 pass # value too large
133 return value
134
135 def __iter__(self):
136 return iter(self.__data)
137
138 def __len__(self):
139 return len(self.__data)
140
141 @staticmethod
142 def __getsizeof(value):
143 return 1
144
145 @staticmethod
146 def __missing(key):
147 raise KeyError(key)
148
149 @property
150 def maxsize(self):
151 """The maximum size of the cache."""
152 return self.__maxsize
153
154 @property
155 def currsize(self):
156 """The current size of the cache."""
157 return self.__currsize
158
159 def getsizeof(self, value):
160 """Return the size of a cache element's value."""
161 return self.__getsizeof(value)
162
163
164 class _Link(object):
165
166 __slots__ = ('key', 'expire', 'next', 'prev')
167
168 def __init__(self, key=None, expire=None):
169 self.key = key
170 self.expire = expire
171
172 def __reduce__(self):
173 return _Link, (self.key, self.expire)
174
175 def unlink(self):
176 next = self.next
177 prev = self.prev
178 prev.next = next
179 next.prev = prev
180
181
182 class _Timer(object):
183
184 def __init__(self, timer):
185 self.__timer = timer
186 self.__nesting = 0
187
188 def __call__(self):
189 if self.__nesting == 0:
190 return self.__timer()
191 else:
192 return self.__time
193
194 def __enter__(self):
195 if self.__nesting == 0:
196 self.__time = time = self.__timer()
197 else:
198 time = self.__time
199 self.__nesting += 1
200 return time
201
202 def __exit__(self, *exc):
203 self.__nesting -= 1
204
205 def __reduce__(self):
206 return _Timer, (self.__timer,)
207
208 def __getattr__(self, name):
209 return getattr(self.__timer, name)
210
211
212 class TTLCache(Cache):
213 """LRU Cache implementation with per-item time-to-live (TTL) value."""
214
215 def __init__(self, maxsize, ttl, timer=time.time, missing=None,
216 getsizeof=None):
217 Cache.__init__(self, maxsize, missing, getsizeof)
218 self.__root = root = _Link()
219 root.prev = root.next = root
220 self.__links = collections.OrderedDict()
221 self.__timer = _Timer(timer)
222 self.__ttl = ttl
223
224 def __contains__(self, key):
225 try:
226 link = self.__links[key] # no reordering
227 except KeyError:
228 return False
229 else:
230 return not (link.expire < self.__timer())
231
232 def __getitem__(self, key, cache_getitem=Cache.__getitem__):
233 try:
234 link = self.__getlink(key)
235 except KeyError:
236 expired = False
237 else:
238 expired = link.expire < self.__timer()
239 if expired:
240 return self.__missing__(key)
241 else:
242 return cache_getitem(self, key)
243
244 def __setitem__(self, key, value, cache_setitem=Cache.__setitem__):
245 with self.__timer as time:
246 self.expire(time)
247 cache_setitem(self, key, value)
248 try:
249 link = self.__getlink(key)
250 except KeyError:
251 self.__links[key] = link = _Link(key)
252 else:
253 link.unlink()
254 link.expire = time + self.__ttl
255 link.next = root = self.__root
256 link.prev = prev = root.prev
257 prev.next = root.prev = link
258
259 def __delitem__(self, key, cache_delitem=Cache.__delitem__):
260 cache_delitem(self, key)
261 link = self.__links.pop(key)
262 link.unlink()
263 if link.expire < self.__timer():
264 raise KeyError(key)
265
266 def __iter__(self):
267 root = self.__root
268 curr = root.next
269 while curr is not root:
270 # "freeze" time for iterator access
271 with self.__timer as time:
272 if not (curr.expire < time):
273 yield curr.key
274 curr = curr.next
275
276 def __len__(self):
277 root = self.__root
278 curr = root.next
279 time = self.__timer()
280 count = len(self.__links)
281 while curr is not root and curr.expire < time:
282 count -= 1
283 curr = curr.next
284 return count
285
286 def __setstate__(self, state):
287 self.__dict__.update(state)
288 root = self.__root
289 root.prev = root.next = root
290 for link in sorted(self.__links.values(), key=lambda obj: obj.expire):
291 link.next = root
292 link.prev = prev = root.prev
293 prev.next = root.prev = link
294 self.expire(self.__timer())
295
296 def __repr__(self, cache_repr=Cache.__repr__):
297 with self.__timer as time:
298 self.expire(time)
299 return cache_repr(self)
300
301 @property
302 def currsize(self):
303 with self.__timer as time:
304 self.expire(time)
305 return super(TTLCache, self).currsize
306
307 @property
308 def timer(self):
309 """The timer function used by the cache."""
310 return self.__timer
311
312 @property
313 def ttl(self):
314 """The time-to-live value of the cache's items."""
315 return self.__ttl
316
317 def expire(self, time=None):
318 """Remove expired items from the cache."""
319 if time is None:
320 time = self.__timer()
321 root = self.__root
322 curr = root.next
323 links = self.__links
324 cache_delitem = Cache.__delitem__
325 while curr is not root and curr.expire < time:
326 cache_delitem(self, curr.key)
327 del links[curr.key]
328 next = curr.next
329 curr.unlink()
330 curr = next
331
332 def clear(self):
333 with self.__timer as time:
334 self.expire(time)
335 Cache.clear(self)
336
337 def get(self, *args, **kwargs):
338 with self.__timer:
339 return Cache.get(self, *args, **kwargs)
340
341 def pop(self, *args, **kwargs):
342 with self.__timer:
343 return Cache.pop(self, *args, **kwargs)
344
345 def setdefault(self, *args, **kwargs):
346 with self.__timer:
347 return Cache.setdefault(self, *args, **kwargs)
348
349 def popitem(self):
350 """Remove and return the `(key, value)` pair least recently used that
351 has not already expired.
352
353 """
354 with self.__timer as time:
355 self.expire(time)
356 try:
357 key = next(iter(self.__links))
358 except StopIteration:
359 raise KeyError('%s is empty' % self.__class__.__name__)
360 else:
361 return (key, self.pop(key))
362
363 if hasattr(collections.OrderedDict, 'move_to_end'):
364 def __getlink(self, key):
365 value = self.__links[key]
366 self.__links.move_to_end(key)
367 return value
368 else:
369 def __getlink(self, key):
370 value = self.__links.pop(key)
371 self.__links[key] = value
372 return value
373
374
375 #############################################
376 # END IMPORT OF CACHING LIBRARY #
377 #############################################
378
379 cache = TTLCache(
380 100, # Up to 100 items
381 1 * 60 # 5 minute cache life
382 )
383
384
385 def _get_instance():
386 return tripal.TripalInstance(
387 os.environ['GALAXY_TRIPAL_URL'],
388 os.environ['GALAXY_TRIPAL_USER'],
389 os.environ['GALAXY_TRIPAL_PASSWORD']
390 )
391
392
393 def list_organisms(*args, **kwargs):
394
395 ti = _get_instance()
396
397 # Key for cached data
398 cacheKey = 'orgs'
399 # We don't want to trust "if key in cache" because between asking and fetch
400 # it might through key error.
401 if cacheKey not in cache:
402 # However if it ISN'T there, we know we're safe to fetch + put in
403 # there.
404 data = _list_organisms(ti, *args, **kwargs)
405 cache[cacheKey] = data
406 return data
407 try:
408 # The cache key may or may not be in the cache at this point, it
409 # /likely/ is. However we take no chances that it wasn't evicted between
410 # when we checked above and now, so we reference the object from the
411 # cache in preparation to return.
412 data = cache[cacheKey]
413 return data
414 except KeyError:
415 # If access fails due to eviction, we will fail over and can ensure that
416 # data is inserted.
417 data = _list_organisms(ti, *args, **kwargs)
418 cache[cacheKey] = data
419 return data
420
421
422 def _list_organisms(ti, *args, **kwargs):
423 # Fetch the orgs.
424 orgs_data = []
425 for org in ti.organism.get_organisms():
426 clean_name = '%s %s' % (org['genus'], org['species'])
427 if org['infraspecific_name']:
428 clean_name += ' (%s)' % (org['infraspecific_name'])
429 orgs_data.append((clean_name, org['organism_id'], False))
430 return orgs_data
431
432
433 def list_analyses(*args, **kwargs):
434
435 ti = _get_instance()
436
437 # Key for cached data
438 cacheKey = 'analyses'
439 # We don't want to trust "if key in cache" because between asking and fetch
440 # it might through key error.
441 if cacheKey not in cache:
442 # However if it ISN'T there, we know we're safe to fetch + put in
443 # there.<?xml version="1.0"?>
444
445 data = _list_analyses(ti, *args, **kwargs)
446 cache[cacheKey] = data
447 return data
448 try:
449 # The cache key may or may not be in the cache at this point, it
450 # /likely/ is. However we take no chances that it wasn't evicted between
451 # when we checked above and now, so we reference the object from the
452 # cache in preparation to return.
453 data = cache[cacheKey]
454 return data
455 except KeyError:
456 # If access fails due to eviction, we will fail over and can ensure that
457 # data is inserted.
458 data = _list_analyses(ti, *args, **kwargs)
459 cache[cacheKey] = data
460 return data
461
462
463 def _list_analyses(ti, *args, **kwargs):
464 ans_data = []
465 for an in ti.analysis.get_analyses():
466 ans_data.append((an['name'], an['analysis_id'], False))
467 return ans_data
468
469
470 def list_blastdbs(*args, **kwargs):
471
472 ti = _get_instance()
473
474 # Key for cached data
475 cacheKey = 'blastdbs'
476 # We don't want to trust "if key in cache" because between asking and fetch
477 # it might through key error.
478 if cacheKey not in cache:
479 # However if it ISN'T there, we know we're safe to fetch + put in
480 # there.
481 data = _list_blastdbs(ti, *args, **kwargs)
482 cache[cacheKey] = data
483 return data
484 try:
485 # The cache key may or may not be in the cache at this point, it
486 # /likely/ is. However we take no chances that it wasn't evicted between
487 # when we checked above and now, so we reference the object from the
488 # cache in preparation to return.
489 data = cache[cacheKey]
490 return data
491 except KeyError:
492 # If access fails due to eviction, we will fail over and can ensure that
493 # data is inserted.
494 data = _list_blastdbs(ti, *args, **kwargs)
495 cache[cacheKey] = data
496 return data
497
498
499 def _list_blastdbs(ti, *args, **kwargs):
500 dbs_data = []
501 for db in ti.db.get_dbs():
502 dbs_data.append((db['name'], db['db_id'], False))
503 return dbs_data