Mercurial > repos > chmaramis > testirprofiler
comparison cmpb2016/top10_CDR3_inexact_pairing.py @ 0:8be019b173e6 draft
Uploaded included tools
| author | chmaramis |
|---|---|
| date | Sun, 18 Mar 2018 05:54:20 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:8be019b173e6 |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 """ | |
| 3 Created on Mon Apr 18 11:37:40 2016 | |
| 4 | |
| 5 @author: chmaramis | |
| 6 """ | |
| 7 | |
| 8 # -*- coding: utf-8 -*- | |
| 9 """ | |
| 10 Created on Mon Apr 18 09:48:00 2016 | |
| 11 | |
| 12 @author: chmaramis | |
| 13 """ | |
| 14 | |
| 15 import pandas as pd | |
| 16 import numpy as np | |
| 17 import sys | |
| 18 import functools as ft | |
| 19 | |
| 20 def maxHam1(s1, s2): | |
| 21 if len(s1) != len(s2): | |
| 22 return False | |
| 23 else: | |
| 24 return sum(c1 != c2 for c1, c2 in zip(s1, s2)) <= 1 | |
| 25 | |
| 26 | |
| 27 if __name__ == "__main__": | |
| 28 | |
| 29 clonosFN = sys.argv[1] | |
| 30 outFN = sys.argv[2] | |
| 31 | |
| 32 Cl = pd.read_csv(clonosFN,sep='\t',index_col=0) | |
| 33 T10 = Cl[:10].copy() | |
| 34 | |
| 35 aa_junction = np.array(T10['AA JUNCTION']) | |
| 36 geneCol = [x for x in T10.columns if x.upper().endswith('GENE')][0] | |
| 37 | |
| 38 F = np.zeros((2,20)) | |
| 39 | |
| 40 for i in range(0,10): | |
| 41 taa = T10['AA JUNCTION'][i+1] | |
| 42 gene = T10[geneCol][i+1] | |
| 43 S1 = Cl['AA JUNCTION'].apply(ft.partial(maxHam1, s2=taa)) | |
| 44 S2 = Cl[geneCol] == gene | |
| 45 S1[i+1] = False | |
| 46 F[0,2*i] = (S1 & S2).sum() | |
| 47 F[0,2*i+1] = Cl['Frequency %'][S1 & S2].sum() | |
| 48 F[1,2*i] = (S1 & ~S2).sum() | |
| 49 F[1,2*i+1] = Cl['Frequency %'][S1 & ~S2].sum() | |
| 50 | |
| 51 | |
| 52 K = list(aa_junction+' Nr. Clonos') | |
| 53 L = list(aa_junction+' Freq. %') | |
| 54 columns = [val for pair in zip(K,L) for val in pair] | |
| 55 | |
| 56 D = pd.DataFrame(F,columns=columns, index=['same gene', 'different gene']) | |
| 57 D.to_csv(outFN,sep='\t') | |
| 58 |
