comparison cmpb2016/top10_CDR3_exact_pairing.py @ 0:8be019b173e6 draft

Uploaded included tools
author chmaramis
date Sun, 18 Mar 2018 05:54:20 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:8be019b173e6
1 # -*- coding: utf-8 -*-
2 """
3 Created on Mon Apr 18 09:48:00 2016
4
5 @author: chmaramis
6 """
7
8 import pandas as pd
9 import numpy as np
10 import sys
11
12
13 if __name__ == "__main__":
14
15 clonosFN = sys.argv[1]
16 outFN = sys.argv[2]
17
18 Cl = pd.read_csv(clonosFN,sep='\t',index_col=0)
19 T10 = Cl[:10].copy()
20
21 aa_junction = np.array(T10['AA JUNCTION'])
22 geneCol = [x for x in T10.columns if x.upper().endswith('GENE')][0]
23
24 vG_unique = np.unique(Cl[geneCol])
25 noVG = len(vG_unique)
26
27 F = np.zeros((noVG,20))
28
29 for i in range(0,10):
30 taa = T10['AA JUNCTION'][i+1]
31 sameAA = np.where(Cl['AA JUNCTION'] == taa)[0]+1
32 if Cl[geneCol][sameAA[0]] != T10[geneCol][i+1]:
33 print('We have a problem here!')
34
35 # Make original Gene -1
36 #orGene = Cl['V-GENE'][sameAA[0]]
37 #orGeneUn = np.where(vG_unique == orGene)[0][0]
38 #F[orGeneUn,i] = -1
39
40 # Other Genes
41 for j in range(0,len(sameAA)):
42 othGene = Cl[geneCol][sameAA[j]]
43 othRead = Cl['Reads'][sameAA[j]]
44 othFreq = Cl['Frequency %'][sameAA[j]]
45 orGeneUn = np.where(vG_unique == othGene)[0][0]
46 F[orGeneUn,2*i] += othRead
47 F[orGeneUn,2*i+1] += othFreq
48
49
50 K = list(aa_junction+' Reads')
51 L = list(aa_junction+' Freq. %')
52 columns = [val for pair in zip(K,L) for val in pair]
53
54 D = pd.DataFrame(F,columns=columns,index=vG_unique)
55 D.to_csv(outFN,sep='\t')
56