comparison cmpb2016/top10_CDR3_inexact_pairing.py @ 0:8be019b173e6 draft

Uploaded included tools
author chmaramis
date Sun, 18 Mar 2018 05:54:20 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:8be019b173e6
1 # -*- coding: utf-8 -*-
2 """
3 Created on Mon Apr 18 11:37:40 2016
4
5 @author: chmaramis
6 """
7
8 # -*- coding: utf-8 -*-
9 """
10 Created on Mon Apr 18 09:48:00 2016
11
12 @author: chmaramis
13 """
14
15 import pandas as pd
16 import numpy as np
17 import sys
18 import functools as ft
19
20 def maxHam1(s1, s2):
21 if len(s1) != len(s2):
22 return False
23 else:
24 return sum(c1 != c2 for c1, c2 in zip(s1, s2)) <= 1
25
26
27 if __name__ == "__main__":
28
29 clonosFN = sys.argv[1]
30 outFN = sys.argv[2]
31
32 Cl = pd.read_csv(clonosFN,sep='\t',index_col=0)
33 T10 = Cl[:10].copy()
34
35 aa_junction = np.array(T10['AA JUNCTION'])
36 geneCol = [x for x in T10.columns if x.upper().endswith('GENE')][0]
37
38 F = np.zeros((2,20))
39
40 for i in range(0,10):
41 taa = T10['AA JUNCTION'][i+1]
42 gene = T10[geneCol][i+1]
43 S1 = Cl['AA JUNCTION'].apply(ft.partial(maxHam1, s2=taa))
44 S2 = Cl[geneCol] == gene
45 S1[i+1] = False
46 F[0,2*i] = (S1 & S2).sum()
47 F[0,2*i+1] = Cl['Frequency %'][S1 & S2].sum()
48 F[1,2*i] = (S1 & ~S2).sum()
49 F[1,2*i+1] = Cl['Frequency %'][S1 & ~S2].sum()
50
51
52 K = list(aa_junction+' Nr. Clonos')
53 L = list(aa_junction+' Freq. %')
54 columns = [val for pair in zip(K,L) for val in pair]
55
56 D = pd.DataFrame(F,columns=columns, index=['same gene', 'different gene'])
57 D.to_csv(outFN,sep='\t')
58