# HG changeset patch # User xuebing # Date 1333217487 14400 # Node ID 7ae45c21ef71755d464915da9a02101a8db15575 # Parent 8c992303067a0dd1d3cf2e74608ffe16eaf8d1a1 Uploaded diff -r 8c992303067a -r 7ae45c21ef71 tab_collapse.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tab_collapse.py Sat Mar 31 14:11:27 2012 -0400 @@ -0,0 +1,37 @@ +''' +collapse tabular files, with key columns, and max columns +''' + +def collapseTab(filename,c_key,c_max): + # keeping rows with max value in column c_max + nCol = max(max(c_key),c_max) + c_max = c_max - 1 + for i in range(len(c_key)): + c_key[i] = c_key[i] - 1 + uniqintv = {} + data = {} + f = open(filename) + for line in f: + flds = line.strip().split('\t') + if len(flds) < nCol: + continue + key = '' + for i in c_key: + key = key + flds[i-1] # i is 1-based, python is 0-based + if not uniqintv.has_key(key): + uniqintv[key] = float(flds[c_max]) + data[key] = flds + elif uniqintv[key] < float(flds[c_max]): + uniqintv[key] = float(flds[c_max]) + data[key] = flds + + f.close() + for key in uniqintv.keys(): + print '\t'.join(data[key]) + +import sys + +# convert string to number list +c_key = map(int,sys.argv[2].split(',')) +c_max = int(sys.argv[3]) +collapseTab(sys.argv[1],c_key,c_max)