view collapseBed.py @ 22:869c7664e584

Uploaded
author xuebing
date Sat, 31 Mar 2012 11:52:14 -0400
parents 16ba480adf96
children
line wrap: on
line source

'''
collapse intervals
'''

def collapseInterval_strand(filename):
    uniqintv = {}
    data = {}
    f = open(filename)
    header = f.readline()
    if 'chr' in header:
        flds = header.strip().split('\t')
        key = '\t'.join([flds[0],flds[1],flds[2],flds[5]])
        uniqintv[key] = 1
        data[key] = flds
    for line in f:
        flds = line.strip().split('\t')
        key = '\t'.join([flds[0],flds[1],flds[2],flds[5]])
        if uniqintv.has_key(key):
            uniqintv[key] = uniqintv[key] + 1
        else:
            uniqintv[key] = 1
            data[key] = flds
    f.close()        
    for key in uniqintv.keys():
        print '\t'.join(data[key]+[str(uniqintv[key])])
        #flds = key.split('\t')
        #print '\t'.join([flds[0],flds[1],flds[2],'.',str(uniqintv[key]),flds[3]])

def collapseInterval(filename):
    uniqintv = {}
    data = {}
    f = open(filename)
    header = f.readline()
    if 'chr' in header:
        flds = header.strip().split('\t')
        key = '\t'.join([flds[0],flds[1],flds[2]])
        uniqintv[key] = 1
        data[key] = flds
    for line in f:
        flds = line.strip().split('\t')
        key = '\t'.join([flds[0],flds[1],flds[2]])
        if uniqintv.has_key(key):
            uniqintv[key] = uniqintv[key] + 1
        else:
            uniqintv[key] = 1
            data[key] = flds
    f.close()        
    for key in uniqintv.keys():
        print '\t'.join(data[key]+[str(uniqintv[key])])
        #flds = key.split('\t')
        #print '\t'.join([flds[0],flds[1],flds[2],'.',str(uniqintv[key])])       

import sys

if sys.argv[2] == 'strand':
    collapseInterval_strand(sys.argv[1])
else:
    collapseInterval(sys.argv[1])