diff public_clonotype_computation.py @ 0:0e37e5b73273 draft

Initial commit
author chmaramis
date Fri, 30 Mar 2018 07:22:29 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/public_clonotype_computation.py	Fri Mar 30 07:22:29 2018 -0400
@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Mar 24 17:18:09 2018
+
+@author: chmaramis
+"""
+
+from __future__ import division
+import numpy as np
+from pandas import *
+from numpy import nan as NA
+import sys
+import time
+
+clono_def = {'CDR3': ['AA JUNCTION'],
+             'VCDR3': ['V-GENE','AA JUNCTION'],
+             'JCDR3': ['J-GENE','AA JUNCTION']}
+
+
+
+def publicClonotypeComputation(inputs, clono, thres):
+    
+    clono_comps = clono_def[clono]
+    
+    clono=DataFrame()
+
+    for x in range(0,len(inputs),2):
+            cl = DataFrame()
+            cl = read_csv(inputs[x] , sep = '\t' , index_col = 0)
+            #tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 )
+            #cl = concat([chunk for chunk in tp]) 
+            
+            if (thres != 'null'):
+                cl = cl[cl['Reads'] > int(thres)]
+            
+            x1 = inputs[x+1].split('_')
+            
+            del cl['Reads']
+            cl.columns = [cl.columns[0], cl.columns[1], x1[0]+' '+cl.columns[2], x1[0]+' Relative '+cl.columns[3]]
+            
+            if clono.empty:
+                clono = cl
+            else:
+                clono = clono.merge(cl, how='outer', on=clono_comps)
+    
+    
+    col = clono.columns
+    freqs = col.map(lambda x: 'Frequency' in x)
+    reads = col.map(lambda x: 'Reads/Total' in x)
+    
+    clono[col[freqs]] = clono[col[freqs]].fillna(0)
+    clono[col[reads]] = clono[col[reads]].fillna('0/*')
+    
+    clono['Num of Patients']= clono[col[freqs]].apply(lambda x: np.sum(x != 0), axis=1)
+
+    clono = clono[clono['Num of Patients'] > 1]
+
+    clono.index = range(1,len(clono)+1)
+    
+    return clono    
+
+
+if __name__ == '__main__':   
+
+    start=time.time()
+
+    # Parse input arguments    
+    arg = sys.argv[4:]
+    clono = sys.argv[1]
+    output = sys.argv[2]
+    thres = sys.argv[3]
+    
+    
+    
+    # Execute basic function
+    mer = publicClonotypeComputation(arg, clono, thres)
+    
+    # Save output to CSV files
+    if not mer.empty: 
+        mer.to_csv(output , sep = '\t') 
+        
+    # Print execution time
+    stop=time.time()
+    print('Runtime:' + str(stop-start))