Mercurial > repos > xuebing > sharplab_seq_motif
comparison mytools/collapseBed.py @ 0:39217fa39ff2
Uploaded
| author | xuebing |
|---|---|
| date | Tue, 13 Mar 2012 23:34:52 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:39217fa39ff2 |
|---|---|
| 1 ''' | |
| 2 collapse intervals | |
| 3 ''' | |
| 4 | |
| 5 def collapseInterval_strand(filename): | |
| 6 uniqintv = {} | |
| 7 data = {} | |
| 8 f = open(filename) | |
| 9 header = f.readline() | |
| 10 if 'chr' in header: | |
| 11 flds = header.strip().split('\t') | |
| 12 key = '\t'.join([flds[0],flds[1],flds[2],flds[5]]) | |
| 13 uniqintv[key] = 1 | |
| 14 data[key] = flds | |
| 15 for line in f: | |
| 16 flds = line.strip().split('\t') | |
| 17 key = '\t'.join([flds[0],flds[1],flds[2],flds[5]]) | |
| 18 if uniqintv.has_key(key): | |
| 19 uniqintv[key] = uniqintv[key] + 1 | |
| 20 else: | |
| 21 uniqintv[key] = 1 | |
| 22 data[key] = flds | |
| 23 f.close() | |
| 24 for key in uniqintv.keys(): | |
| 25 print '\t'.join(data[key]+[str(uniqintv[key])]) | |
| 26 #flds = key.split('\t') | |
| 27 #print '\t'.join([flds[0],flds[1],flds[2],'.',str(uniqintv[key]),flds[3]]) | |
| 28 | |
| 29 def collapseInterval(filename): | |
| 30 uniqintv = {} | |
| 31 data = {} | |
| 32 f = open(filename) | |
| 33 header = f.readline() | |
| 34 if 'chr' in header: | |
| 35 flds = header.strip().split('\t') | |
| 36 key = '\t'.join([flds[0],flds[1],flds[2]]) | |
| 37 uniqintv[key] = 1 | |
| 38 data[key] = flds | |
| 39 for line in f: | |
| 40 flds = line.strip().split('\t') | |
| 41 key = '\t'.join([flds[0],flds[1],flds[2]]) | |
| 42 if uniqintv.has_key(key): | |
| 43 uniqintv[key] = uniqintv[key] + 1 | |
| 44 else: | |
| 45 uniqintv[key] = 1 | |
| 46 data[key] = flds | |
| 47 f.close() | |
| 48 for key in uniqintv.keys(): | |
| 49 print '\t'.join(data[key]+[str(uniqintv[key])]) | |
| 50 #flds = key.split('\t') | |
| 51 #print '\t'.join([flds[0],flds[1],flds[2],'.',str(uniqintv[key])]) | |
| 52 | |
| 53 import sys | |
| 54 | |
| 55 if sys.argv[2] == 'strand': | |
| 56 collapseInterval_strand(sys.argv[1]) | |
| 57 else: | |
| 58 collapseInterval(sys.argv[1]) |
