Mercurial > repos > xuebing > tab_collapse
comparison tab_collapse.py @ 1:7ae45c21ef71 default tip
Uploaded
| author | xuebing |
|---|---|
| date | Sat, 31 Mar 2012 14:11:27 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:8c992303067a | 1:7ae45c21ef71 |
|---|---|
| 1 ''' | |
| 2 collapse tabular files, with key columns, and max columns | |
| 3 ''' | |
| 4 | |
| 5 def collapseTab(filename,c_key,c_max): | |
| 6 # keeping rows with max value in column c_max | |
| 7 nCol = max(max(c_key),c_max) | |
| 8 c_max = c_max - 1 | |
| 9 for i in range(len(c_key)): | |
| 10 c_key[i] = c_key[i] - 1 | |
| 11 uniqintv = {} | |
| 12 data = {} | |
| 13 f = open(filename) | |
| 14 for line in f: | |
| 15 flds = line.strip().split('\t') | |
| 16 if len(flds) < nCol: | |
| 17 continue | |
| 18 key = '' | |
| 19 for i in c_key: | |
| 20 key = key + flds[i-1] # i is 1-based, python is 0-based | |
| 21 if not uniqintv.has_key(key): | |
| 22 uniqintv[key] = float(flds[c_max]) | |
| 23 data[key] = flds | |
| 24 elif uniqintv[key] < float(flds[c_max]): | |
| 25 uniqintv[key] = float(flds[c_max]) | |
| 26 data[key] = flds | |
| 27 | |
| 28 f.close() | |
| 29 for key in uniqintv.keys(): | |
| 30 print '\t'.join(data[key]) | |
| 31 | |
| 32 import sys | |
| 33 | |
| 34 # convert string to number list | |
| 35 c_key = map(int,sys.argv[2].split(',')) | |
| 36 c_max = int(sys.argv[3]) | |
| 37 collapseTab(sys.argv[1],c_key,c_max) |
