diff cmpb2016/top10_CDR3_inexact_pairing.py @ 0:8be019b173e6 draft

Uploaded included tools
author chmaramis
date Sun, 18 Mar 2018 05:54:20 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cmpb2016/top10_CDR3_inexact_pairing.py	Sun Mar 18 05:54:20 2018 -0400
@@ -0,0 +1,58 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Apr 18 11:37:40 2016
+
+@author: chmaramis
+"""
+
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Apr 18 09:48:00 2016
+
+@author: chmaramis
+"""
+
+import pandas as pd
+import numpy as np
+import sys
+import functools as ft
+
+def maxHam1(s1, s2):
+    if  len(s1) != len(s2):
+        return False
+    else: 
+        return sum(c1 != c2 for c1, c2 in zip(s1, s2)) <= 1
+    
+
+if __name__ == "__main__":
+    
+    clonosFN = sys.argv[1]
+    outFN = sys.argv[2]
+
+    Cl = pd.read_csv(clonosFN,sep='\t',index_col=0)
+    T10 = Cl[:10].copy()
+    
+    aa_junction = np.array(T10['AA JUNCTION'])
+    geneCol = [x for x in T10.columns if x.upper().endswith('GENE')][0]
+    
+    F = np.zeros((2,20))
+    
+    for i in range(0,10):
+        taa = T10['AA JUNCTION'][i+1]
+        gene = T10[geneCol][i+1]
+        S1 = Cl['AA JUNCTION'].apply(ft.partial(maxHam1, s2=taa))
+        S2 = Cl[geneCol] == gene
+        S1[i+1] = False
+        F[0,2*i] = (S1 & S2).sum()
+        F[0,2*i+1] = Cl['Frequency %'][S1 & S2].sum()
+        F[1,2*i] = (S1 & ~S2).sum()
+        F[1,2*i+1] = Cl['Frequency %'][S1 & ~S2].sum()
+        
+    
+    K = list(aa_junction+' Nr. Clonos') 
+    L = list(aa_junction+' Freq. %')
+    columns = [val for pair in zip(K,L) for val in pair]
+                          
+    D = pd.DataFrame(F,columns=columns, index=['same gene', 'different gene'])
+    D.to_csv(outFN,sep='\t')
+