Mercurial > repos > chmaramis > testirprofiler
view cmpb2016/top10_CDR3_inexact_pairing.py @ 0:8be019b173e6 draft
Uploaded included tools
| author | chmaramis |
|---|---|
| date | Sun, 18 Mar 2018 05:54:20 -0400 |
| parents | |
| children |
line wrap: on
line source
# -*- coding: utf-8 -*- """ Created on Mon Apr 18 11:37:40 2016 @author: chmaramis """ # -*- coding: utf-8 -*- """ Created on Mon Apr 18 09:48:00 2016 @author: chmaramis """ import pandas as pd import numpy as np import sys import functools as ft def maxHam1(s1, s2): if len(s1) != len(s2): return False else: return sum(c1 != c2 for c1, c2 in zip(s1, s2)) <= 1 if __name__ == "__main__": clonosFN = sys.argv[1] outFN = sys.argv[2] Cl = pd.read_csv(clonosFN,sep='\t',index_col=0) T10 = Cl[:10].copy() aa_junction = np.array(T10['AA JUNCTION']) geneCol = [x for x in T10.columns if x.upper().endswith('GENE')][0] F = np.zeros((2,20)) for i in range(0,10): taa = T10['AA JUNCTION'][i+1] gene = T10[geneCol][i+1] S1 = Cl['AA JUNCTION'].apply(ft.partial(maxHam1, s2=taa)) S2 = Cl[geneCol] == gene S1[i+1] = False F[0,2*i] = (S1 & S2).sum() F[0,2*i+1] = Cl['Frequency %'][S1 & S2].sum() F[1,2*i] = (S1 & ~S2).sum() F[1,2*i+1] = Cl['Frequency %'][S1 & ~S2].sum() K = list(aa_junction+' Nr. Clonos') L = list(aa_junction+' Freq. %') columns = [val for pair in zip(K,L) for val in pair] D = pd.DataFrame(F,columns=columns, index=['same gene', 'different gene']) D.to_csv(outFN,sep='\t')
