diff interval/collapseBed.py @ 18:9bbb37e8683f

Uploaded
author xuebing
date Sat, 31 Mar 2012 08:24:32 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/interval/collapseBed.py	Sat Mar 31 08:24:32 2012 -0400
@@ -0,0 +1,58 @@
+'''
+collapse intervals
+'''
+
+def collapseInterval_strand(filename):
+    uniqintv = {}
+    data = {}
+    f = open(filename)
+    header = f.readline()
+    if 'chr' in header:
+        flds = header.strip().split('\t')
+        key = '\t'.join([flds[0],flds[1],flds[2],flds[5]])
+        uniqintv[key] = 1
+        data[key] = flds
+    for line in f:
+        flds = line.strip().split('\t')
+        key = '\t'.join([flds[0],flds[1],flds[2],flds[5]])
+        if uniqintv.has_key(key):
+            uniqintv[key] = uniqintv[key] + 1
+        else:
+            uniqintv[key] = 1
+            data[key] = flds
+    f.close()        
+    for key in uniqintv.keys():
+        print '\t'.join(data[key]+[str(uniqintv[key])])
+        #flds = key.split('\t')
+        #print '\t'.join([flds[0],flds[1],flds[2],'.',str(uniqintv[key]),flds[3]])
+
+def collapseInterval(filename):
+    uniqintv = {}
+    data = {}
+    f = open(filename)
+    header = f.readline()
+    if 'chr' in header:
+        flds = header.strip().split('\t')
+        key = '\t'.join([flds[0],flds[1],flds[2]])
+        uniqintv[key] = 1
+        data[key] = flds
+    for line in f:
+        flds = line.strip().split('\t')
+        key = '\t'.join([flds[0],flds[1],flds[2]])
+        if uniqintv.has_key(key):
+            uniqintv[key] = uniqintv[key] + 1
+        else:
+            uniqintv[key] = 1
+            data[key] = flds
+    f.close()        
+    for key in uniqintv.keys():
+        print '\t'.join(data[key]+[str(uniqintv[key])])
+        #flds = key.split('\t')
+        #print '\t'.join([flds[0],flds[1],flds[2],'.',str(uniqintv[key])])       
+
+import sys
+
+if sys.argv[2] == 'strand':
+    collapseInterval_strand(sys.argv[1])
+else:
+    collapseInterval(sys.argv[1])