comparison corebio/utils/__init__.py @ 4:4d47ab2b7bcc

Uploaded
author davidmurphy
date Fri, 13 Jan 2012 07:18:19 -0500
parents c55bdc2fb9fa
children
comparison
equal deleted inserted replaced
3:09d2dac9ef73 4:4d47ab2b7bcc
1
2
3 # Copyright (c) 2005 Gavin E. Crooks <gec@threeplusone.com>
4 #
5 # This software is distributed under the MIT Open Source License.
6 # <http://www.opensource.org/licenses/mit-license.html>
7 #
8 # Permission is hereby granted, free of charge, to any person obtaining a
9 # copy of this software and associated documentation files (the "Software"),
10 # to deal in the Software without restriction, including without limitation
11 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 # and/or sell copies of the Software, and to permit persons to whom the
13 # Software is furnished to do so, subject to the following conditions:
14 #
15 # The above copyright notice and this permission notice shall be included
16 # in all copies or substantial portions of the Software.
17 #
18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 # THE SOFTWARE.
25 #
26
27
28 """Extra utilities and core classes not in standard python.
29 """
30
31
32 __all__ = ('isblank', 'isfloat', 'isint', 'fcmp', 'remove_whitespace',
33 'invert_dict','update', 'stdrepr', 'Token', 'Struct', 'Reiterate',
34 'deoptparse', 'crc32', 'crc64', 'FileIndex', 'find_command',
35 'ArgumentError', 'frozendict')
36
37 import os.path
38 import math
39
40 def isblank( string) :
41 """Is this whitespace or an empty string?"""
42 if string == '' : return True
43 return string.isspace()
44
45 def isfloat(s):
46 """Does this object represent a floating point number? """
47 try:
48 float(s)
49 return True
50 except (ValueError, TypeError):
51 return False
52
53 def isint(s):
54 """Does this object represent an integer?"""
55 try:
56 int(s)
57 return True
58 except (ValueError, TypeError):
59 return False
60
61 def fcmp(x, y, precision):
62 """Floating point comparison."""
63 # TODO: Doc string, default precision. Test
64 if math.fabs(x-y) < precision:
65 return 0
66 elif x < y:
67 return -1
68 return 1
69
70 def remove_whitespace( astring) :
71 """Remove all whitespace from a string."""
72 # TODO: Is this horrible slow?
73 return "".join(astring.split())
74
75
76 def invert_dict( dictionary) :
77 """Constructs a new dictionary with inverted mappings so that keys become
78 values and vice versa. If the values of the original dictionary are not
79 unique then only one of the original kesys will be included in the new
80 dictionary.
81 """
82 return dict( [(value, key) for key, value in dictionary.iteritems()] )
83
84
85
86 def update(obj, **entries):
87 """Update an instance with new values.
88
89 >>> update({'a': 1}, a=10, b=20)
90 {'a': 10, 'b': 20}
91 """
92 if hasattr(obj, 'update') :
93 obj.update( entries)
94 else :
95 for k, v in entries.iteritems() :
96 setattr(obj, k, v)
97 return obj
98
99
100
101 def stdrepr( obj, attributes=None, name=None) :
102 """Create a standard representation of an object."""
103 if name==None : name = obj.__class__.__name__
104 if attributes==None: attributes = obj.__class__.__slots__
105 args = []
106 for a in attributes :
107 args.append( '%s=%s' % ( a, repr( getattr(obj, a) ) ) )
108 args = ',\n'.join(args).replace('\n', '\n ')
109 return '%s(\n %s\n)' % (name, args)
110
111
112 class Token(object):
113 """Represents the items returned by a file scanner, normally processed
114 by a parser.
115
116 Attributes :
117 o typeof -- a string describing the kind of token
118 o data -- the value of the token
119 o lineno -- the line of the file on which the data was found (if known)
120 o offset -- the offset of the data within the line (if known)
121 """
122 __slots__ = [ 'typeof', 'data', 'lineno', 'offset']
123 def __init__(self, typeof, data=None, lineno=-1, offset=-1) :
124 self.typeof = typeof
125 self.data = data
126 self.lineno = lineno
127 self.offset = offset
128
129 def __repr__(self) :
130 return stdrepr( self)
131
132 def __str__(self):
133 coord = str(self.lineno)
134 if self.offset != -1 : coord += ':'+str(self.offset)
135 coord = coord.ljust(7)
136 return (coord+ ' '+ self.typeof +' : ').ljust(32)+ str(self.data or '')
137
138
139
140 def Struct(**kwargs) :
141 """Create a new instance of an anonymous class with the supplied attributes
142 and values.
143
144 >>> s = Struct(a=3,b=4)
145 >>> s
146 Struct(
147 a=3,
148 b=4
149 )
150 >>> s.a
151 3
152
153 """
154 name = 'Struct'
155
156 def _init(obj, **kwargs) :
157 for k, v in kwargs.iteritems() :
158 setattr( obj, k, v)
159
160 def _repr(obj) :
161 return stdrepr( obj, obj.__slots__, name)
162
163 adict = {}
164 adict['__slots__'] = kwargs.keys()
165 adict['__init__'] = _init
166 adict['__repr__'] = _repr
167
168 return type( name, (object,) , adict)(**kwargs)
169
170
171 class Reiterate(object):
172 """ A flexible wrapper around a simple iterator.
173 """
174 def __new__(cls, iterator):
175 if isinstance(iterator, cls) : return iterator
176 new = object.__new__(cls)
177 new._iterator = iter(iterator)
178 new._stack = []
179 new._index = 0
180 return new
181
182 def __init__(self, *args, **kw):
183 pass
184
185
186 def __iter__(self):
187 return self
188
189 def next(self):
190 """Return the next item in the iteration."""
191 self._index +=1
192 if self._stack :
193 return self._stack.pop()
194 else:
195 return self._iterator.next()
196
197 def index(self) :
198 """The number of items returned. Incremented by next(), Decremented
199 by push(), unchanged by peek() """
200 return self._index
201
202 def push(self, item) :
203 """Push an item back onto the top of the iterator,"""
204 self._index -=1
205 self._stack.append(item)
206
207 def peek(self) :
208 """Returns the next item, but does not advance the iteration.
209 Returns None if no more items. (Bit may also return None as the
210 next item.)"""
211 try :
212 item = self.next()
213 self.push(item)
214 return item
215 except StopIteration:
216 return None
217
218 def has_item(self) :
219 """More items to return?"""
220 try :
221 item = self.next()
222 self.push(item)
223 return True
224 except StopIteration:
225 return False
226
227 def filter(self, predicate):
228 """Return the next item in the iteration that satisifed the
229 predicate."""
230 next = self.next()
231 while not predicate(next) : next = self.next()
232 return next
233 # End class Reiterate
234
235
236
237
238
239 def crc32(string):
240 """Return the standard CRC32 checksum as a hexidecimal string."""
241 import binascii
242 return "%08X"% binascii.crc32(string)
243
244 _crc64_table =None
245
246 def crc64(string):
247 """ Calculate ISO 3309 standard cyclic redundancy checksum.
248 Used, for example, by SWISS-PROT.
249
250 Returns : The CRC as a hexadecimal string.
251
252 Reference:
253 o W. H. Press, S. A. Teukolsky, W. T. Vetterling, and B. P. Flannery,
254 "Numerical recipes in C", 2nd ed., Cambridge University Press. Pages 896ff.
255 """
256 # Adapted from biopython, which was adapted from bioperl
257 global _crc64_table
258 if _crc64_table is None :
259 # Initialisation of CRC64 table
260 table = []
261 for i in range(256):
262 l = i
263 part_h = 0
264 for j in range(8):
265 rflag = l & 1
266 l >>= 1
267 if part_h & 1: l |= (1L << 31)
268 part_h >>= 1L
269 if rflag: part_h ^= 0xd8000000L
270 table.append(part_h)
271 _crc64_table= tuple(table)
272
273 crcl = 0
274 crch = 0
275 for c in string:
276 shr = (crch & 0xFF) << 24
277 temp1h = crch >> 8
278 temp1l = (crcl >> 8) | shr
279 idx = (crcl ^ ord(c)) & 0xFF
280 crch = temp1h ^ _crc64_table[idx]
281 crcl = temp1l
282
283 return "%08X%08X" % (crch, crcl)
284 # End crc64
285
286
287 class FileIndex(object) :
288 """Line based random access to a file. Quickly turn a file into a read-only
289 database.
290
291 Attr:
292 - indexfile -- The file to be indexed. Can be set to None and latter
293 replaced with a new file handle, for exampel, if you need to
294 close and latter reopen the file.
295
296 Bugs:
297 User must set the indexedfile to None before pickling this class.
298
299 """
300 __slots__ = [ 'indexedfile', '_parser', '_positions', '_keys', '_key_dict']
301
302 def __init__(self, indexedfile, linekey = None, parser=None) :
303 """
304
305 Args:
306 - indexedfile -- The file to index
307 - linekey -- An optional function. keyofline() will be passed each line
308 of the file in turn and should return a string to index the line,
309 or None. If keyofline() is supplied, then only lines that generate
310 keys are indexed.
311 - parser -- An optional parser. A function that reads from a file handle
312 positioned at the start of a record and returns an object.
313 """
314
315 def default_parser(seekedfile) :
316 return seekedfile.readline()
317
318 if parser is None : parser = default_parser
319 self._parser = parser
320
321 indexedfile.seek(0)
322 positions = []
323 keys = []
324
325 while True :
326 position = indexedfile.tell()
327 line = indexedfile.readline()
328 if line == '' : break
329
330 if linekey :
331 k = linekey(line)
332 if k is None: continue
333 keys.append(k)
334
335 positions.append(position)
336
337 self.indexedfile = indexedfile
338 self._positions = tuple(positions)
339
340 if linekey :
341 self._keys = tuple(keys)
342 self._key_dict = dict( zip(keys, positions))
343
344
345 def tell(self, item) :
346 if isinstance(item, str) :
347 p = self._key_dict[item]
348 else :
349 p = self._positions[item]
350 return p
351
352 def seek(self, item) :
353 """Seek the indexfile to the position of item."""
354 self.indexedfile.seek(self.tell(item))
355
356 def __iter__(self) :
357 for i in range(0, len(self)) :
358 yield self[i]
359
360 def __len__(self) :
361 return len(self._positions)
362
363 def __getitem__(self, item) :
364 self.indexedfile.seek(self.tell(item))
365 return self._parser(self.indexedfile)
366
367 def __contains__(self, item) :
368 try:
369 self.tell(item)
370 return True
371 except KeyError :
372 return False
373 except IndexError :
374 return False
375
376 # End class FileIndex
377
378
379 def find_command(command, path=None):
380 """Return the full path to the first match of the given command on
381 the path.
382
383 Arguments:
384 - command -- is a the name of the executable to search for.
385 - path -- is an optional alternate path list to search. The default it
386 to use the COREBIOPATH environment variable, if it exists, else the
387 PATH environment variable.
388
389 Raises:
390 - EnvironmentError -- If no match is found for the command.
391
392 By default the COREBIO or PATH environment variable is searched (as well
393 as, on Windows, the AppPaths key in the registry), but a specific 'path'
394 list to search may be specified as well.
395
396 Author: Adapted from code by Trent Mick (TrentM@ActiveState.com)
397 See: http://trentm.com/projects/which/
398 """
399 import _which
400 if path is None :
401 path = os.environ.get("COREBIOPATH", "").split(os.pathsep)
402 if path==['']: path = None
403
404 try :
405 match =_which.whichgen(command, path).next()
406 except StopIteration, _which.WhichError:
407 raise EnvironmentError("Could not find '%s' on the path." % command)
408 return match
409
410
411
412 class ArgumentError(ValueError) :
413 """ A subclass of ValueError raised when a function receives an argument
414 that has the right type but an inappropriate value, and the situation is not
415 described by a more precise exception such as IndexError. The name of the
416 argument or component at fault and (optionally) the value are also stored.
417 """
418
419 def __init__(self, message, key, value=None) :
420 """ Args:
421 - message -- An error message.
422 - key -- The name of the argument or component at fault.
423 - value -- Optional value of the argument.
424 """
425 ValueError.__init__(self, message)
426 self.key = key
427 self.value = value
428 # end class ArgumentError
429
430
431 class frozendict(dict):
432 """A frozendict is a dictionary that cannot be modified after being created
433 - but it is hashable and may serve as a member of a set or a key in a
434 dictionary.
435 # Author: Adapted from code by Oren Tirosh
436 """
437 # See: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/414283
438
439 def _blocked_attribute(obj):
440 raise AttributeError, "A frozendict cannot be modified."
441 _blocked_attribute = property(_blocked_attribute)
442
443 __delitem__ = _blocked_attribute
444 __setitem__ = _blocked_attribute
445 clear = _blocked_attribute
446 pop = _blocked_attribute
447 popitem = _blocked_attribute
448 setdefault = _blocked_attribute
449 update = _blocked_attribute
450
451 def __new__(cls, *args, **kw):
452 new = dict.__new__(cls)
453 dict.__init__(new, *args, **kw)
454 return new
455
456 def __init__(self, *args, **kw):
457 pass
458
459 def __hash__(self):
460 try:
461 return self._cached_hash
462 except AttributeError:
463 # Hash keys, not items, since items can be mutable and unhasahble.
464 h = self._cached_hash = hash(tuple(sorted(self.keys())))
465 return h
466
467 def __repr__(self):
468 return "frozendict(%s)" % dict.__repr__(self)
469 # end class frozendict
470