Mercurial > repos > chmaramis > testirprofiler
diff cmpb2016/top10_CDR3_inexact_pairing.py @ 12:cdf95051bc55 draft default tip
Uploaded 2 tools
| author | chmaramis |
|---|---|
| date | Sun, 18 Mar 2018 07:11:06 -0400 |
| parents | 14896ea6e180 |
| children |
line wrap: on
line diff
--- a/cmpb2016/top10_CDR3_inexact_pairing.py Sun Mar 18 07:07:39 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Mon Apr 18 11:37:40 2016 - -@author: chmaramis -""" - -# -*- coding: utf-8 -*- -""" -Created on Mon Apr 18 09:48:00 2016 - -@author: chmaramis -""" - -import pandas as pd -import numpy as np -import sys -import functools as ft - -def maxHam1(s1, s2): - if len(s1) != len(s2): - return False - else: - return sum(c1 != c2 for c1, c2 in zip(s1, s2)) <= 1 - - -if __name__ == "__main__": - - clonosFN = sys.argv[1] - outFN = sys.argv[2] - - Cl = pd.read_csv(clonosFN,sep='\t',index_col=0) - T10 = Cl[:10].copy() - - aa_junction = np.array(T10['AA JUNCTION']) - geneCol = [x for x in T10.columns if x.upper().endswith('GENE')][0] - - F = np.zeros((2,20)) - - for i in range(0,10): - taa = T10['AA JUNCTION'][i+1] - gene = T10[geneCol][i+1] - S1 = Cl['AA JUNCTION'].apply(ft.partial(maxHam1, s2=taa)) - S2 = Cl[geneCol] == gene - S1[i+1] = False - F[0,2*i] = (S1 & S2).sum() - F[0,2*i+1] = Cl['Frequency %'][S1 & S2].sum() - F[1,2*i] = (S1 & ~S2).sum() - F[1,2*i+1] = Cl['Frequency %'][S1 & ~S2].sum() - - - K = list(aa_junction+' Nr. Clonos') - L = list(aa_junction+' Freq. %') - columns = [val for pair in zip(K,L) for val in pair] - - D = pd.DataFrame(F,columns=columns, index=['same gene', 'different gene']) - D.to_csv(outFN,sep='\t') -
