20
|
1 '''
|
|
2 collapse tabular files, with key columns, and max columns
|
|
3 '''
|
|
4
|
|
5 def collapseTab(filename,c_key,c_max):
|
|
6 # keeping rows with max value in column c_max
|
|
7 nCol = max(max(c_key),c_max)
|
|
8 c_max = c_max - 1
|
|
9 for i in range(len(c_key)):
|
|
10 c_key[i] = c_key[i] - 1
|
|
11 uniqintv = {}
|
|
12 data = {}
|
|
13 f = open(filename)
|
|
14 for line in f:
|
|
15 flds = line.strip().split('\t')
|
|
16 if len(flds) < nCol:
|
|
17 continue
|
|
18 key = ''
|
|
19 for i in c_key:
|
|
20 key = key + flds[i-1] # i is 1-based, python is 0-based
|
|
21 if not uniqintv.has_key(key):
|
|
22 uniqintv[key] = float(flds[c_max])
|
|
23 data[key] = flds
|
|
24 elif uniqintv[key] < float(flds[c_max]):
|
|
25 uniqintv[key] = float(flds[c_max])
|
|
26 data[key] = flds
|
|
27
|
|
28 f.close()
|
|
29 for key in uniqintv.keys():
|
|
30 print '\t'.join(data[key])
|
|
31
|
|
32 import sys
|
|
33
|
|
34 # convert string to number list
|
|
35 c_key = map(int,sys.argv[2].split(','))
|
|
36 c_max = int(sys.argv[3])
|
|
37 collapseTab(sys.argv[1],c_key,c_max)
|