0
|
1
|
|
2
|
|
3 # Copyright (c) 2005 Gavin E. Crooks <gec@threeplusone.com>
|
|
4 #
|
|
5 # This software is distributed under the MIT Open Source License.
|
|
6 # <http://www.opensource.org/licenses/mit-license.html>
|
|
7 #
|
|
8 # Permission is hereby granted, free of charge, to any person obtaining a
|
|
9 # copy of this software and associated documentation files (the "Software"),
|
|
10 # to deal in the Software without restriction, including without limitation
|
|
11 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
12 # and/or sell copies of the Software, and to permit persons to whom the
|
|
13 # Software is furnished to do so, subject to the following conditions:
|
|
14 #
|
|
15 # The above copyright notice and this permission notice shall be included
|
|
16 # in all copies or substantial portions of the Software.
|
|
17 #
|
|
18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
19 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
20 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
21 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
22 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
23 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
24 # THE SOFTWARE.
|
|
25 #
|
|
26
|
|
27
|
|
28 """Extra utilities and core classes not in standard python.
|
|
29 """
|
|
30
|
|
31
|
|
32 __all__ = ('isblank', 'isfloat', 'isint', 'fcmp', 'remove_whitespace',
|
|
33 'invert_dict','update', 'stdrepr', 'Token', 'Struct', 'Reiterate',
|
|
34 'deoptparse', 'crc32', 'crc64', 'FileIndex', 'find_command',
|
|
35 'ArgumentError', 'frozendict')
|
|
36
|
|
37 import os.path
|
|
38 import math
|
|
39
|
|
40 def isblank( string) :
|
|
41 """Is this whitespace or an empty string?"""
|
|
42 if string == '' : return True
|
|
43 return string.isspace()
|
|
44
|
|
45 def isfloat(s):
|
|
46 """Does this object represent a floating point number? """
|
|
47 try:
|
|
48 float(s)
|
|
49 return True
|
|
50 except (ValueError, TypeError):
|
|
51 return False
|
|
52
|
|
53 def isint(s):
|
|
54 """Does this object represent an integer?"""
|
|
55 try:
|
|
56 int(s)
|
|
57 return True
|
|
58 except (ValueError, TypeError):
|
|
59 return False
|
|
60
|
|
61 def fcmp(x, y, precision):
|
|
62 """Floating point comparison."""
|
|
63 # TODO: Doc string, default precision. Test
|
|
64 if math.fabs(x-y) < precision:
|
|
65 return 0
|
|
66 elif x < y:
|
|
67 return -1
|
|
68 return 1
|
|
69
|
|
70 def remove_whitespace( astring) :
|
|
71 """Remove all whitespace from a string."""
|
|
72 # TODO: Is this horrible slow?
|
|
73 return "".join(astring.split())
|
|
74
|
|
75
|
|
76 def invert_dict( dictionary) :
|
|
77 """Constructs a new dictionary with inverted mappings so that keys become
|
|
78 values and vice versa. If the values of the original dictionary are not
|
|
79 unique then only one of the original kesys will be included in the new
|
|
80 dictionary.
|
|
81 """
|
|
82 return dict( [(value, key) for key, value in dictionary.iteritems()] )
|
|
83
|
|
84
|
|
85
|
|
86 def update(obj, **entries):
|
|
87 """Update an instance with new values.
|
|
88
|
|
89 >>> update({'a': 1}, a=10, b=20)
|
|
90 {'a': 10, 'b': 20}
|
|
91 """
|
|
92 if hasattr(obj, 'update') :
|
|
93 obj.update( entries)
|
|
94 else :
|
|
95 for k, v in entries.iteritems() :
|
|
96 setattr(obj, k, v)
|
|
97 return obj
|
|
98
|
|
99
|
|
100
|
|
101 def stdrepr( obj, attributes=None, name=None) :
|
|
102 """Create a standard representation of an object."""
|
|
103 if name==None : name = obj.__class__.__name__
|
|
104 if attributes==None: attributes = obj.__class__.__slots__
|
|
105 args = []
|
|
106 for a in attributes :
|
|
107 args.append( '%s=%s' % ( a, repr( getattr(obj, a) ) ) )
|
|
108 args = ',\n'.join(args).replace('\n', '\n ')
|
|
109 return '%s(\n %s\n)' % (name, args)
|
|
110
|
|
111
|
|
112 class Token(object):
|
|
113 """Represents the items returned by a file scanner, normally processed
|
|
114 by a parser.
|
|
115
|
|
116 Attributes :
|
|
117 o typeof -- a string describing the kind of token
|
|
118 o data -- the value of the token
|
|
119 o lineno -- the line of the file on which the data was found (if known)
|
|
120 o offset -- the offset of the data within the line (if known)
|
|
121 """
|
|
122 __slots__ = [ 'typeof', 'data', 'lineno', 'offset']
|
|
123 def __init__(self, typeof, data=None, lineno=-1, offset=-1) :
|
|
124 self.typeof = typeof
|
|
125 self.data = data
|
|
126 self.lineno = lineno
|
|
127 self.offset = offset
|
|
128
|
|
129 def __repr__(self) :
|
|
130 return stdrepr( self)
|
|
131
|
|
132 def __str__(self):
|
|
133 coord = str(self.lineno)
|
|
134 if self.offset != -1 : coord += ':'+str(self.offset)
|
|
135 coord = coord.ljust(7)
|
|
136 return (coord+ ' '+ self.typeof +' : ').ljust(32)+ str(self.data or '')
|
|
137
|
|
138
|
|
139
|
|
140 def Struct(**kwargs) :
|
|
141 """Create a new instance of an anonymous class with the supplied attributes
|
|
142 and values.
|
|
143
|
|
144 >>> s = Struct(a=3,b=4)
|
|
145 >>> s
|
|
146 Struct(
|
|
147 a=3,
|
|
148 b=4
|
|
149 )
|
|
150 >>> s.a
|
|
151 3
|
|
152
|
|
153 """
|
|
154 name = 'Struct'
|
|
155
|
|
156 def _init(obj, **kwargs) :
|
|
157 for k, v in kwargs.iteritems() :
|
|
158 setattr( obj, k, v)
|
|
159
|
|
160 def _repr(obj) :
|
|
161 return stdrepr( obj, obj.__slots__, name)
|
|
162
|
|
163 adict = {}
|
|
164 adict['__slots__'] = kwargs.keys()
|
|
165 adict['__init__'] = _init
|
|
166 adict['__repr__'] = _repr
|
|
167
|
|
168 return type( name, (object,) , adict)(**kwargs)
|
|
169
|
|
170
|
|
171 class Reiterate(object):
|
|
172 """ A flexible wrapper around a simple iterator.
|
|
173 """
|
|
174 def __new__(cls, iterator):
|
|
175 if isinstance(iterator, cls) : return iterator
|
|
176 new = object.__new__(cls)
|
|
177 new._iterator = iter(iterator)
|
|
178 new._stack = []
|
|
179 new._index = 0
|
|
180 return new
|
|
181
|
|
182 def __init__(self, *args, **kw):
|
|
183 pass
|
|
184
|
|
185
|
|
186 def __iter__(self):
|
|
187 return self
|
|
188
|
|
189 def next(self):
|
|
190 """Return the next item in the iteration."""
|
|
191 self._index +=1
|
|
192 if self._stack :
|
|
193 return self._stack.pop()
|
|
194 else:
|
|
195 return self._iterator.next()
|
|
196
|
|
197 def index(self) :
|
|
198 """The number of items returned. Incremented by next(), Decremented
|
|
199 by push(), unchanged by peek() """
|
|
200 return self._index
|
|
201
|
|
202 def push(self, item) :
|
|
203 """Push an item back onto the top of the iterator,"""
|
|
204 self._index -=1
|
|
205 self._stack.append(item)
|
|
206
|
|
207 def peek(self) :
|
|
208 """Returns the next item, but does not advance the iteration.
|
|
209 Returns None if no more items. (Bit may also return None as the
|
|
210 next item.)"""
|
|
211 try :
|
|
212 item = self.next()
|
|
213 self.push(item)
|
|
214 return item
|
|
215 except StopIteration:
|
|
216 return None
|
|
217
|
|
218 def has_item(self) :
|
|
219 """More items to return?"""
|
|
220 try :
|
|
221 item = self.next()
|
|
222 self.push(item)
|
|
223 return True
|
|
224 except StopIteration:
|
|
225 return False
|
|
226
|
|
227 def filter(self, predicate):
|
|
228 """Return the next item in the iteration that satisifed the
|
|
229 predicate."""
|
|
230 next = self.next()
|
|
231 while not predicate(next) : next = self.next()
|
|
232 return next
|
|
233 # End class Reiterate
|
|
234
|
|
235
|
|
236
|
|
237
|
|
238
|
|
239 def crc32(string):
|
|
240 """Return the standard CRC32 checksum as a hexidecimal string."""
|
|
241 import binascii
|
|
242 return "%08X"% binascii.crc32(string)
|
|
243
|
|
244 _crc64_table =None
|
|
245
|
|
246 def crc64(string):
|
|
247 """ Calculate ISO 3309 standard cyclic redundancy checksum.
|
|
248 Used, for example, by SWISS-PROT.
|
|
249
|
|
250 Returns : The CRC as a hexadecimal string.
|
|
251
|
|
252 Reference:
|
|
253 o W. H. Press, S. A. Teukolsky, W. T. Vetterling, and B. P. Flannery,
|
|
254 "Numerical recipes in C", 2nd ed., Cambridge University Press. Pages 896ff.
|
|
255 """
|
|
256 # Adapted from biopython, which was adapted from bioperl
|
|
257 global _crc64_table
|
|
258 if _crc64_table is None :
|
|
259 # Initialisation of CRC64 table
|
|
260 table = []
|
|
261 for i in range(256):
|
|
262 l = i
|
|
263 part_h = 0
|
|
264 for j in range(8):
|
|
265 rflag = l & 1
|
|
266 l >>= 1
|
|
267 if part_h & 1: l |= (1L << 31)
|
|
268 part_h >>= 1L
|
|
269 if rflag: part_h ^= 0xd8000000L
|
|
270 table.append(part_h)
|
|
271 _crc64_table= tuple(table)
|
|
272
|
|
273 crcl = 0
|
|
274 crch = 0
|
|
275 for c in string:
|
|
276 shr = (crch & 0xFF) << 24
|
|
277 temp1h = crch >> 8
|
|
278 temp1l = (crcl >> 8) | shr
|
|
279 idx = (crcl ^ ord(c)) & 0xFF
|
|
280 crch = temp1h ^ _crc64_table[idx]
|
|
281 crcl = temp1l
|
|
282
|
|
283 return "%08X%08X" % (crch, crcl)
|
|
284 # End crc64
|
|
285
|
|
286
|
|
287 class FileIndex(object) :
|
|
288 """Line based random access to a file. Quickly turn a file into a read-only
|
|
289 database.
|
|
290
|
|
291 Attr:
|
|
292 - indexfile -- The file to be indexed. Can be set to None and latter
|
|
293 replaced with a new file handle, for exampel, if you need to
|
|
294 close and latter reopen the file.
|
|
295
|
|
296 Bugs:
|
|
297 User must set the indexedfile to None before pickling this class.
|
|
298
|
|
299 """
|
|
300 __slots__ = [ 'indexedfile', '_parser', '_positions', '_keys', '_key_dict']
|
|
301
|
|
302 def __init__(self, indexedfile, linekey = None, parser=None) :
|
|
303 """
|
|
304
|
|
305 Args:
|
|
306 - indexedfile -- The file to index
|
|
307 - linekey -- An optional function. keyofline() will be passed each line
|
|
308 of the file in turn and should return a string to index the line,
|
|
309 or None. If keyofline() is supplied, then only lines that generate
|
|
310 keys are indexed.
|
|
311 - parser -- An optional parser. A function that reads from a file handle
|
|
312 positioned at the start of a record and returns an object.
|
|
313 """
|
|
314
|
|
315 def default_parser(seekedfile) :
|
|
316 return seekedfile.readline()
|
|
317
|
|
318 if parser is None : parser = default_parser
|
|
319 self._parser = parser
|
|
320
|
|
321 indexedfile.seek(0)
|
|
322 positions = []
|
|
323 keys = []
|
|
324
|
|
325 while True :
|
|
326 position = indexedfile.tell()
|
|
327 line = indexedfile.readline()
|
|
328 if line == '' : break
|
|
329
|
|
330 if linekey :
|
|
331 k = linekey(line)
|
|
332 if k is None: continue
|
|
333 keys.append(k)
|
|
334
|
|
335 positions.append(position)
|
|
336
|
|
337 self.indexedfile = indexedfile
|
|
338 self._positions = tuple(positions)
|
|
339
|
|
340 if linekey :
|
|
341 self._keys = tuple(keys)
|
|
342 self._key_dict = dict( zip(keys, positions))
|
|
343
|
|
344
|
|
345 def tell(self, item) :
|
|
346 if isinstance(item, str) :
|
|
347 p = self._key_dict[item]
|
|
348 else :
|
|
349 p = self._positions[item]
|
|
350 return p
|
|
351
|
|
352 def seek(self, item) :
|
|
353 """Seek the indexfile to the position of item."""
|
|
354 self.indexedfile.seek(self.tell(item))
|
|
355
|
|
356 def __iter__(self) :
|
|
357 for i in range(0, len(self)) :
|
|
358 yield self[i]
|
|
359
|
|
360 def __len__(self) :
|
|
361 return len(self._positions)
|
|
362
|
|
363 def __getitem__(self, item) :
|
|
364 self.indexedfile.seek(self.tell(item))
|
|
365 return self._parser(self.indexedfile)
|
|
366
|
|
367 def __contains__(self, item) :
|
|
368 try:
|
|
369 self.tell(item)
|
|
370 return True
|
|
371 except KeyError :
|
|
372 return False
|
|
373 except IndexError :
|
|
374 return False
|
|
375
|
|
376 # End class FileIndex
|
|
377
|
|
378
|
|
379 def find_command(command, path=None):
|
|
380 """Return the full path to the first match of the given command on
|
|
381 the path.
|
|
382
|
|
383 Arguments:
|
|
384 - command -- is a the name of the executable to search for.
|
|
385 - path -- is an optional alternate path list to search. The default it
|
|
386 to use the COREBIOPATH environment variable, if it exists, else the
|
|
387 PATH environment variable.
|
|
388
|
|
389 Raises:
|
|
390 - EnvironmentError -- If no match is found for the command.
|
|
391
|
|
392 By default the COREBIO or PATH environment variable is searched (as well
|
|
393 as, on Windows, the AppPaths key in the registry), but a specific 'path'
|
|
394 list to search may be specified as well.
|
|
395
|
|
396 Author: Adapted from code by Trent Mick (TrentM@ActiveState.com)
|
|
397 See: http://trentm.com/projects/which/
|
|
398 """
|
|
399 import _which
|
|
400 if path is None :
|
|
401 path = os.environ.get("COREBIOPATH", "").split(os.pathsep)
|
|
402 if path==['']: path = None
|
|
403
|
|
404 try :
|
|
405 match =_which.whichgen(command, path).next()
|
|
406 except StopIteration, _which.WhichError:
|
|
407 raise EnvironmentError("Could not find '%s' on the path." % command)
|
|
408 return match
|
|
409
|
|
410
|
|
411
|
|
412 class ArgumentError(ValueError) :
|
|
413 """ A subclass of ValueError raised when a function receives an argument
|
|
414 that has the right type but an inappropriate value, and the situation is not
|
|
415 described by a more precise exception such as IndexError. The name of the
|
|
416 argument or component at fault and (optionally) the value are also stored.
|
|
417 """
|
|
418
|
|
419 def __init__(self, message, key, value=None) :
|
|
420 """ Args:
|
|
421 - message -- An error message.
|
|
422 - key -- The name of the argument or component at fault.
|
|
423 - value -- Optional value of the argument.
|
|
424 """
|
|
425 ValueError.__init__(self, message)
|
|
426 self.key = key
|
|
427 self.value = value
|
|
428 # end class ArgumentError
|
|
429
|
|
430
|
|
431 class frozendict(dict):
|
|
432 """A frozendict is a dictionary that cannot be modified after being created
|
|
433 - but it is hashable and may serve as a member of a set or a key in a
|
|
434 dictionary.
|
|
435 # Author: Adapted from code by Oren Tirosh
|
|
436 """
|
|
437 # See: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/414283
|
|
438
|
|
439 def _blocked_attribute(obj):
|
|
440 raise AttributeError, "A frozendict cannot be modified."
|
|
441 _blocked_attribute = property(_blocked_attribute)
|
|
442
|
|
443 __delitem__ = _blocked_attribute
|
|
444 __setitem__ = _blocked_attribute
|
|
445 clear = _blocked_attribute
|
|
446 pop = _blocked_attribute
|
|
447 popitem = _blocked_attribute
|
|
448 setdefault = _blocked_attribute
|
|
449 update = _blocked_attribute
|
|
450
|
|
451 def __new__(cls, *args, **kw):
|
|
452 new = dict.__new__(cls)
|
|
453 dict.__init__(new, *args, **kw)
|
|
454 return new
|
|
455
|
|
456 def __init__(self, *args, **kw):
|
|
457 pass
|
|
458
|
|
459 def __hash__(self):
|
|
460 try:
|
|
461 return self._cached_hash
|
|
462 except AttributeError:
|
|
463 # Hash keys, not items, since items can be mutable and unhasahble.
|
|
464 h = self._cached_hash = hash(tuple(sorted(self.keys())))
|
|
465 return h
|
|
466
|
|
467 def __repr__(self):
|
|
468 return "frozendict(%s)" % dict.__repr__(self)
|
|
469 # end class frozendict
|
|
470 |