diff cmpb2016/top10_CDR3_exact_pairing.py @ 0:8be019b173e6 draft

Uploaded included tools
author chmaramis
date Sun, 18 Mar 2018 05:54:20 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cmpb2016/top10_CDR3_exact_pairing.py	Sun Mar 18 05:54:20 2018 -0400
@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Apr 18 09:48:00 2016
+
+@author: chmaramis
+"""
+
+import pandas as pd
+import numpy as np
+import sys
+
+
+if __name__ == "__main__":
+    
+    clonosFN = sys.argv[1]
+    outFN = sys.argv[2]
+    
+    Cl = pd.read_csv(clonosFN,sep='\t',index_col=0)
+    T10 = Cl[:10].copy()
+    
+    aa_junction = np.array(T10['AA JUNCTION'])
+    geneCol = [x for x in T10.columns if x.upper().endswith('GENE')][0]
+    
+    vG_unique = np.unique(Cl[geneCol])
+    noVG = len(vG_unique)
+    
+    F = np.zeros((noVG,20))
+    
+    for i in range(0,10):
+        taa = T10['AA JUNCTION'][i+1]
+        sameAA = np.where(Cl['AA JUNCTION'] == taa)[0]+1
+        if Cl[geneCol][sameAA[0]] != T10[geneCol][i+1]:
+            print('We have a problem here!')
+    
+        # Make original Gene -1    
+        #orGene = Cl['V-GENE'][sameAA[0]]
+        #orGeneUn = np.where(vG_unique == orGene)[0][0]
+        #F[orGeneUn,i] = -1
+        
+        # Other Genes
+        for j in range(0,len(sameAA)):
+            othGene = Cl[geneCol][sameAA[j]]
+            othRead = Cl['Reads'][sameAA[j]]
+            othFreq = Cl['Frequency %'][sameAA[j]]
+            orGeneUn = np.where(vG_unique == othGene)[0][0]
+            F[orGeneUn,2*i] += othRead
+            F[orGeneUn,2*i+1] += othFreq
+         
+         
+    K = list(aa_junction+' Reads') 
+    L = list(aa_junction+' Freq. %')
+    columns = [val for pair in zip(K,L) for val in pair]
+    
+    D = pd.DataFrame(F,columns=columns,index=vG_unique)
+    D.to_csv(outFN,sep='\t')
+