|
0
|
1 # -*- coding: utf-8 -*-
|
|
|
2 """
|
|
|
3 Created on Mon Apr 18 11:37:40 2016
|
|
|
4
|
|
|
5 @author: chmaramis
|
|
|
6 """
|
|
|
7
|
|
|
8 # -*- coding: utf-8 -*-
|
|
|
9 """
|
|
|
10 Created on Mon Apr 18 09:48:00 2016
|
|
|
11
|
|
|
12 @author: chmaramis
|
|
|
13 """
|
|
|
14
|
|
|
15 import pandas as pd
|
|
|
16 import numpy as np
|
|
|
17 import sys
|
|
|
18 import functools as ft
|
|
|
19
|
|
|
20 def maxHam1(s1, s2):
|
|
|
21 if len(s1) != len(s2):
|
|
|
22 return False
|
|
|
23 else:
|
|
|
24 return sum(c1 != c2 for c1, c2 in zip(s1, s2)) <= 1
|
|
|
25
|
|
|
26
|
|
|
27 if __name__ == "__main__":
|
|
|
28
|
|
|
29 clonosFN = sys.argv[1]
|
|
|
30 outFN = sys.argv[2]
|
|
|
31
|
|
|
32 Cl = pd.read_csv(clonosFN,sep='\t',index_col=0)
|
|
|
33 T10 = Cl[:10].copy()
|
|
|
34
|
|
|
35 aa_junction = np.array(T10['AA JUNCTION'])
|
|
|
36 geneCol = [x for x in T10.columns if x.upper().endswith('GENE')][0]
|
|
|
37
|
|
|
38 F = np.zeros((2,20))
|
|
|
39
|
|
|
40 for i in range(0,10):
|
|
|
41 taa = T10['AA JUNCTION'][i+1]
|
|
|
42 gene = T10[geneCol][i+1]
|
|
|
43 S1 = Cl['AA JUNCTION'].apply(ft.partial(maxHam1, s2=taa))
|
|
|
44 S2 = Cl[geneCol] == gene
|
|
|
45 S1[i+1] = False
|
|
|
46 F[0,2*i] = (S1 & S2).sum()
|
|
|
47 F[0,2*i+1] = Cl['Frequency %'][S1 & S2].sum()
|
|
|
48 F[1,2*i] = (S1 & ~S2).sum()
|
|
|
49 F[1,2*i+1] = Cl['Frequency %'][S1 & ~S2].sum()
|
|
|
50
|
|
|
51
|
|
|
52 K = list(aa_junction+' Nr. Clonos')
|
|
|
53 L = list(aa_junction+' Freq. %')
|
|
|
54 columns = [val for pair in zip(K,L) for val in pair]
|
|
|
55
|
|
|
56 D = pd.DataFrame(F,columns=columns, index=['same gene', 'different gene'])
|
|
|
57 D.to_csv(outFN,sep='\t')
|
|
|
58
|