Repository 'tab_collapse'
hg clone https://toolshed.g2.bx.psu.edu/repos/xuebing/tab_collapse

Changeset 1:7ae45c21ef71 (2012-03-31)
Previous changeset 0:8c992303067a (2012-03-31)
Commit message:
Uploaded
added:
tab_collapse.py
b
diff -r 8c992303067a -r 7ae45c21ef71 tab_collapse.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tab_collapse.py Sat Mar 31 14:11:27 2012 -0400
[
@@ -0,0 +1,37 @@
+'''
+collapse tabular files, with key columns, and max columns
+'''
+
+def collapseTab(filename,c_key,c_max):
+    # keeping rows with max value in column c_max
+    nCol = max(max(c_key),c_max)
+    c_max = c_max - 1
+    for i in range(len(c_key)):
+        c_key[i] = c_key[i] - 1
+    uniqintv = {}
+    data = {}
+    f = open(filename)
+    for line in f:
+        flds = line.strip().split('\t')
+        if len(flds) < nCol:
+            continue
+        key = ''
+        for i in c_key:
+            key = key + flds[i-1] # i is 1-based, python is 0-based
+        if not uniqintv.has_key(key):
+            uniqintv[key] = float(flds[c_max])
+            data[key] = flds
+        elif uniqintv[key] < float(flds[c_max]):
+            uniqintv[key] = float(flds[c_max])
+            data[key] = flds
+
+    f.close()        
+    for key in uniqintv.keys():
+        print '\t'.join(data[key])
+        
+import sys
+
+# convert string to number list
+c_key = map(int,sys.argv[2].split(','))
+c_max = int(sys.argv[3])
+collapseTab(sys.argv[1],c_key,c_max)