Mercurial > repos > xuebing > tab_collapse
diff tab_collapse.py @ 1:7ae45c21ef71 default tip
Uploaded
author | xuebing |
---|---|
date | Sat, 31 Mar 2012 14:11:27 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tab_collapse.py Sat Mar 31 14:11:27 2012 -0400 @@ -0,0 +1,37 @@ +''' +collapse tabular files, with key columns, and max columns +''' + +def collapseTab(filename,c_key,c_max): + # keeping rows with max value in column c_max + nCol = max(max(c_key),c_max) + c_max = c_max - 1 + for i in range(len(c_key)): + c_key[i] = c_key[i] - 1 + uniqintv = {} + data = {} + f = open(filename) + for line in f: + flds = line.strip().split('\t') + if len(flds) < nCol: + continue + key = '' + for i in c_key: + key = key + flds[i-1] # i is 1-based, python is 0-based + if not uniqintv.has_key(key): + uniqintv[key] = float(flds[c_max]) + data[key] = flds + elif uniqintv[key] < float(flds[c_max]): + uniqintv[key] = float(flds[c_max]) + data[key] = flds + + f.close() + for key in uniqintv.keys(): + print '\t'.join(data[key]) + +import sys + +# convert string to number list +c_key = map(int,sys.argv[2].split(',')) +c_max = int(sys.argv[3]) +collapseTab(sys.argv[1],c_key,c_max)