comparison ngsutils/support/ngs_utils.py @ 0:4e4e4093d65d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ngsutils commit 09194687c74a424732f8b0c017cbb942aad89068
author iuc
date Wed, 11 Nov 2015 13:04:07 -0500
parents
children 7a68005de299
comparison
equal deleted inserted replaced
-1:000000000000 0:4e4e4093d65d
1 #!/usr/bin/env python
2 """
3
4 Common util classes / functions for the NGS project
5
6 """
7 import sys
8 import os
9 import gzip
10 import re
11 import collections
12
13
14 def format_number(n):
15 '''
16 >>> format_number(1000)
17 '1,000'
18 >>> format_number(1234567)
19 '1,234,567'
20 '''
21 ar = list(str(n))
22 for i in range(len(ar))[::-3][1:]:
23 ar.insert(i + 1, ',')
24 return ''.join(ar)
25
26
27 def natural_sort(ar):
28 '''
29 >>> natural_sort('1 3 4 2 5'.split())
30 ['1', '2', '3', '4', '5']
31 >>> natural_sort('1 10 20 2 3 4'.split())
32 ['1', '2', '3', '4', '10', '20']
33 '''
34 to_sort = []
35 for item in ar:
36 spl = re.split('(\d+)', item)
37 l2 = []
38 for el in spl:
39 try:
40 n = int(el)
41 except:
42 n = el
43 l2.append(n)
44 to_sort.append((l2, item))
45
46 to_sort.sort()
47 return [x[1] for x in to_sort]
48
49
50 def dictify(values, colnames):
51 """
52 Convert a list of values into a dictionary based upon given column names.
53
54 If the column name starts with an '@', the value is assumed to be a comma
55 separated list.
56
57 If the name starts with a '#', the value is assumed to be an int.
58
59 If the name starts with '@#', the value is assumed to a comma separated
60 list of ints.
61
62 """
63 d = {}
64 for i in xrange(len(colnames)):
65 key = colnames[i]
66 split = False
67 num = False
68
69 if key[0] == '@':
70 key = key[1:]
71 split = True
72 if key[0] == '#':
73 key = key[1:]
74 num = True
75
76 if i < len(values):
77 if num and split:
78 val = [int(x) for x in values[i].rstrip(',').split(',')]
79 elif num:
80 val = int(values[i])
81 elif split:
82 val = values[i].rstrip(',').split(',')
83 else:
84 val = values[i]
85
86 d[key] = val
87
88 else:
89 d[key] = None
90
91 return d
92
93
94 def gzip_aware_open(fname):
95 if fname == '-':
96 f = sys.stdin
97 elif fname[-3:] == '.gz' or fname[-4:] == '.bgz':
98 f = gzip.open(os.path.expanduser(fname))
99 else:
100 f = open(os.path.expanduser(fname))
101 return f
102
103
104 class gzip_opener:
105 '''
106 A Python 2.6 class to handle 'with' opening of text files that may
107 or may not be gzip compressed.
108 '''
109 def __init__(self, fname):
110 self.fname = fname
111
112 def __enter__(self):
113 self.f = gzip_aware_open(self.fname)
114 return self.f
115
116 def __exit__(self, type, value, traceback):
117 if self.f != sys.stdin:
118 self.f.close()
119 return False
120
121
122 def filenames_to_uniq(names, new_delim='.'):
123 '''
124 Given a set of file names, produce a list of names consisting of the
125 uniq parts of the names. This works from the end of the name. Chunks of
126 the name are split on '.' and '-'.
127
128 For example:
129 A.foo.bar.txt
130 B.foo.bar.txt
131 returns: ['A','B']
132
133 AA.BB.foo.txt
134 CC.foo.txt
135 returns: ['AA.BB','CC']
136
137 >>> filenames_to_uniq('a.foo.bar.txt b.foo.bar.txt'.split())
138 ['a', 'b']
139 >>> filenames_to_uniq('a.b.foo.txt c.foo.txt'.split())
140 ['a.b', 'c']
141
142 '''
143 name_words = []
144 maxlen = 0
145 for name in names:
146 name_words.append(name.replace('.', ' ').replace('-', ' ').strip().split())
147 name_words[-1].reverse()
148 if len(name_words[-1]) > maxlen:
149 maxlen = len(name_words[-1])
150
151 common = [False, ] * maxlen
152 for i in xrange(maxlen):
153 last = None
154 same = True
155 for nameword in name_words:
156 if i >= len(nameword):
157 same = False
158 break
159 if not last:
160 last = nameword[i]
161 elif nameword[i] != last:
162 same = False
163 break
164 common[i] = same
165
166 newnames = []
167 for nameword in name_words:
168 nn = []
169 for (i, val) in enumerate(common):
170 if not val and i < len(nameword):
171 nn.append(nameword[i])
172 nn.reverse()
173 newnames.append(new_delim.join(nn))
174 return newnames
175
176
177 def parse_args(argv, defaults=None, expected_argc=0):
178 opts = {}
179 if defaults:
180 opts.update(defaults)
181
182 args = []
183
184 i = 0
185 while i < len(argv):
186 if argv[i][0] == '-':
187 arg = argv[i].lstrip('-')
188 if '=' in arg:
189 k, v = arg.split('=', 2)
190 if k in defaults:
191 if type(defaults[k]) == float:
192 opts[k] = float(v)
193 elif type(defaults[k]) == int:
194 opts[k] = int(v)
195 else:
196 opts[k] = v
197 else:
198 opts[arg] = True
199 else:
200 args.append(argv[i])
201 i += 1
202
203 while len(args) < expected_argc:
204 args.append(None)
205 return opts, args
206
207
208 class memoize(object):
209 'Simple memoizing decorator to cache results'
210 def __init__(self, func):
211 self.func = func
212 self.cache = {}
213
214 def __call__(self, *args):
215 if not isinstance(args, collections.Hashable):
216 # uncacheable. a list, for instance.
217 # better to not cache than blow up.
218 return self.func(*args)
219
220 if args in self.cache:
221 return self.cache[args]
222 else:
223 value = self.func(*args)
224 self.cache[args] = value
225 return value