diff cmpb2016/top10_CDR3_inexact_pairing.py @ 12:cdf95051bc55 draft default tip

Uploaded 2 tools
author chmaramis
date Sun, 18 Mar 2018 07:11:06 -0400
parents 14896ea6e180
children
line wrap: on
line diff
--- a/cmpb2016/top10_CDR3_inexact_pairing.py	Sun Mar 18 07:07:39 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Apr 18 11:37:40 2016
-
-@author: chmaramis
-"""
-
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Apr 18 09:48:00 2016
-
-@author: chmaramis
-"""
-
-import pandas as pd
-import numpy as np
-import sys
-import functools as ft
-
-def maxHam1(s1, s2):
-    if  len(s1) != len(s2):
-        return False
-    else: 
-        return sum(c1 != c2 for c1, c2 in zip(s1, s2)) <= 1
-    
-
-if __name__ == "__main__":
-    
-    clonosFN = sys.argv[1]
-    outFN = sys.argv[2]
-
-    Cl = pd.read_csv(clonosFN,sep='\t',index_col=0)
-    T10 = Cl[:10].copy()
-    
-    aa_junction = np.array(T10['AA JUNCTION'])
-    geneCol = [x for x in T10.columns if x.upper().endswith('GENE')][0]
-    
-    F = np.zeros((2,20))
-    
-    for i in range(0,10):
-        taa = T10['AA JUNCTION'][i+1]
-        gene = T10[geneCol][i+1]
-        S1 = Cl['AA JUNCTION'].apply(ft.partial(maxHam1, s2=taa))
-        S2 = Cl[geneCol] == gene
-        S1[i+1] = False
-        F[0,2*i] = (S1 & S2).sum()
-        F[0,2*i+1] = Cl['Frequency %'][S1 & S2].sum()
-        F[1,2*i] = (S1 & ~S2).sum()
-        F[1,2*i+1] = Cl['Frequency %'][S1 & ~S2].sum()
-        
-    
-    K = list(aa_junction+' Nr. Clonos') 
-    L = list(aa_junction+' Freq. %')
-    columns = [val for pair in zip(K,L) for val in pair]
-                          
-    D = pd.DataFrame(F,columns=columns, index=['same gene', 'different gene'])
-    D.to_csv(outFN,sep='\t')
-