view mytools/collapseBed2.py @ 9:87eb5c5ddfe9

Uploaded
author xuebing
date Fri, 09 Mar 2012 20:01:43 -0500
parents f0dc65e7f6c0
children
line wrap: on
line source

'''
collapse intervals
'''

def collapseInterval_strand(filename,c_strand,c_score):
    # keeping max column c
    uniqintv = {}
    data = {}
    f = open(filename)
    header = f.readline()
    if 'chr' in header:
        flds = header.strip().split('\t')
        key = '\t'.join([flds[0],flds[1],flds[2],flds[c_strand]])
        uniqintv[key] = float(flds[c_score])
        data[key] = flds
    for line in f:
        flds = line.strip().split('\t')
        key = '\t'.join([flds[0],flds[1],flds[2],flds[c_strand]])
        if not uniqintv.has_key(key):
            uniqintv[key] = float(flds[c_score])
            data[key] = flds
        elif uniqintv[key] < float(flds[c_score]):
            uniqintv[key] = float(flds[c_score])
            data[key] = flds
            
    f.close()        
    for key in uniqintv.keys():
        print '\t'.join(data[key])
        
import sys

if sys.argv[2] == '0':#ignore strand
    sys.argv[2] = 1
if sys.argv[3] == '0':# ignore score
    sys.argv[3] = 2
collapseInterval_strand(sys.argv[1],int(sys.argv[2])-1,int(sys.argv[3])-1)