|
0
|
1 # -*- coding: utf-8 -*-
|
|
|
2 """
|
|
|
3 Created on Mon Apr 18 09:48:00 2016
|
|
|
4
|
|
|
5 @author: chmaramis
|
|
|
6 """
|
|
|
7
|
|
|
8 import pandas as pd
|
|
|
9 import numpy as np
|
|
|
10 import sys
|
|
|
11
|
|
|
12
|
|
|
13 if __name__ == "__main__":
|
|
|
14
|
|
|
15 clonosFN = sys.argv[1]
|
|
|
16 outFN = sys.argv[2]
|
|
|
17
|
|
|
18 Cl = pd.read_csv(clonosFN,sep='\t',index_col=0)
|
|
|
19 T10 = Cl[:10].copy()
|
|
|
20
|
|
|
21 aa_junction = np.array(T10['AA JUNCTION'])
|
|
|
22 geneCol = [x for x in T10.columns if x.upper().endswith('GENE')][0]
|
|
|
23
|
|
|
24 vG_unique = np.unique(Cl[geneCol])
|
|
|
25 noVG = len(vG_unique)
|
|
|
26
|
|
|
27 F = np.zeros((noVG,20))
|
|
|
28
|
|
|
29 for i in range(0,10):
|
|
|
30 taa = T10['AA JUNCTION'][i+1]
|
|
|
31 sameAA = np.where(Cl['AA JUNCTION'] == taa)[0]+1
|
|
|
32 if Cl[geneCol][sameAA[0]] != T10[geneCol][i+1]:
|
|
|
33 print('We have a problem here!')
|
|
|
34
|
|
|
35 # Make original Gene -1
|
|
|
36 #orGene = Cl['V-GENE'][sameAA[0]]
|
|
|
37 #orGeneUn = np.where(vG_unique == orGene)[0][0]
|
|
|
38 #F[orGeneUn,i] = -1
|
|
|
39
|
|
|
40 # Other Genes
|
|
|
41 for j in range(0,len(sameAA)):
|
|
|
42 othGene = Cl[geneCol][sameAA[j]]
|
|
|
43 othRead = Cl['Reads'][sameAA[j]]
|
|
|
44 othFreq = Cl['Frequency %'][sameAA[j]]
|
|
|
45 orGeneUn = np.where(vG_unique == othGene)[0][0]
|
|
|
46 F[orGeneUn,2*i] += othRead
|
|
|
47 F[orGeneUn,2*i+1] += othFreq
|
|
|
48
|
|
|
49
|
|
|
50 K = list(aa_junction+' Reads')
|
|
|
51 L = list(aa_junction+' Freq. %')
|
|
|
52 columns = [val for pair in zip(K,L) for val in pair]
|
|
|
53
|
|
|
54 D = pd.DataFrame(F,columns=columns,index=vG_unique)
|
|
|
55 D.to_csv(outFN,sep='\t')
|
|
|
56
|