view collapseTab.py @ 11:b7f1d9f8f3bc

Uploaded
author xuebing
date Sat, 10 Mar 2012 07:59:27 -0500
parents
children
line wrap: on
line source

'''
collapse tabular files, with key columns, and max columns
'''

def collapseTab(filename,c_key,c_max):
    # keeping rows with max value in column c_max
    nCol = max(max(c_key),c_max)
    c_max = c_max - 1
    for i in range(len(c_key)):
        c_key[i] = c_key[i] - 1
    uniqintv = {}
    data = {}
    f = open(filename)
    for line in f:
        flds = line.strip().split('\t')
        if len(flds) < nCol:
            continue
        key = ''
        for i in c_key:
            key = key + flds[i-1] # i is 1-based, python is 0-based
        if not uniqintv.has_key(key):
            uniqintv[key] = float(flds[c_max])
            data[key] = flds
        elif uniqintv[key] < float(flds[c_max]):
            uniqintv[key] = float(flds[c_max])
            data[key] = flds

    f.close()        
    for key in uniqintv.keys():
        print '\t'.join(data[key])
        
import sys

# convert string to number list
c_key = map(int,sys.argv[2].split(','))
c_max = int(sys.argv[3])
collapseTab(sys.argv[1],c_key,c_max)