| 
0
 | 
     1 '''
 | 
| 
 | 
     2 collapse intervals
 | 
| 
 | 
     3 '''
 | 
| 
 | 
     4 
 | 
| 
 | 
     5 def collapseInterval_strand(filename,c_strand,c_score):
 | 
| 
 | 
     6     # keeping max column c
 | 
| 
 | 
     7     uniqintv = {}
 | 
| 
 | 
     8     data = {}
 | 
| 
 | 
     9     f = open(filename)
 | 
| 
 | 
    10     header = f.readline()
 | 
| 
 | 
    11     if 'chr' in header:
 | 
| 
 | 
    12         flds = header.strip().split('\t')
 | 
| 
 | 
    13         key = '\t'.join([flds[0],flds[1],flds[2],flds[c_strand]])
 | 
| 
 | 
    14         uniqintv[key] = float(flds[c_score])
 | 
| 
 | 
    15         data[key] = flds
 | 
| 
 | 
    16     for line in f:
 | 
| 
 | 
    17         flds = line.strip().split('\t')
 | 
| 
 | 
    18         key = '\t'.join([flds[0],flds[1],flds[2],flds[c_strand]])
 | 
| 
 | 
    19         if not uniqintv.has_key(key):
 | 
| 
 | 
    20             uniqintv[key] = float(flds[c_score])
 | 
| 
 | 
    21             data[key] = flds
 | 
| 
 | 
    22         elif uniqintv[key] < float(flds[c_score]):
 | 
| 
 | 
    23             uniqintv[key] = float(flds[c_score])
 | 
| 
 | 
    24             data[key] = flds
 | 
| 
 | 
    25             
 | 
| 
 | 
    26     f.close()        
 | 
| 
 | 
    27     for key in uniqintv.keys():
 | 
| 
 | 
    28         print '\t'.join(data[key])
 | 
| 
 | 
    29         
 | 
| 
 | 
    30 import sys
 | 
| 
 | 
    31 
 | 
| 
 | 
    32 if sys.argv[2] == '0':#ignore strand
 | 
| 
 | 
    33     sys.argv[2] = 1
 | 
| 
 | 
    34 if sys.argv[3] == '0':# ignore score
 | 
| 
 | 
    35     sys.argv[3] = 2
 | 
| 
 | 
    36 collapseInterval_strand(sys.argv[1],int(sys.argv[2])-1,int(sys.argv[3])-1)
 |