Mercurial > repos > chmaramis > testirprofiler
diff cmpb2016/top10_CDR3_inexact_pairing.py @ 0:8be019b173e6 draft
Uploaded included tools
| author | chmaramis |
|---|---|
| date | Sun, 18 Mar 2018 05:54:20 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cmpb2016/top10_CDR3_inexact_pairing.py Sun Mar 18 05:54:20 2018 -0400 @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon Apr 18 11:37:40 2016 + +@author: chmaramis +""" + +# -*- coding: utf-8 -*- +""" +Created on Mon Apr 18 09:48:00 2016 + +@author: chmaramis +""" + +import pandas as pd +import numpy as np +import sys +import functools as ft + +def maxHam1(s1, s2): + if len(s1) != len(s2): + return False + else: + return sum(c1 != c2 for c1, c2 in zip(s1, s2)) <= 1 + + +if __name__ == "__main__": + + clonosFN = sys.argv[1] + outFN = sys.argv[2] + + Cl = pd.read_csv(clonosFN,sep='\t',index_col=0) + T10 = Cl[:10].copy() + + aa_junction = np.array(T10['AA JUNCTION']) + geneCol = [x for x in T10.columns if x.upper().endswith('GENE')][0] + + F = np.zeros((2,20)) + + for i in range(0,10): + taa = T10['AA JUNCTION'][i+1] + gene = T10[geneCol][i+1] + S1 = Cl['AA JUNCTION'].apply(ft.partial(maxHam1, s2=taa)) + S2 = Cl[geneCol] == gene + S1[i+1] = False + F[0,2*i] = (S1 & S2).sum() + F[0,2*i+1] = Cl['Frequency %'][S1 & S2].sum() + F[1,2*i] = (S1 & ~S2).sum() + F[1,2*i+1] = Cl['Frequency %'][S1 & ~S2].sum() + + + K = list(aa_junction+' Nr. Clonos') + L = list(aa_junction+' Freq. %') + columns = [val for pair in zip(K,L) for val in pair] + + D = pd.DataFrame(F,columns=columns, index=['same gene', 'different gene']) + D.to_csv(outFN,sep='\t') +
