annotate cmpb2016/top10_CDR3_inexact_pairing.py @ 0:8be019b173e6 draft

Uploaded included tools
author chmaramis
date Sun, 18 Mar 2018 05:54:20 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
1 # -*- coding: utf-8 -*-
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
2 """
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
3 Created on Mon Apr 18 11:37:40 2016
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
4
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
5 @author: chmaramis
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
6 """
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
7
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
8 # -*- coding: utf-8 -*-
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
9 """
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
10 Created on Mon Apr 18 09:48:00 2016
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
11
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
12 @author: chmaramis
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
13 """
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
14
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
15 import pandas as pd
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
16 import numpy as np
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
17 import sys
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
18 import functools as ft
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
19
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
20 def maxHam1(s1, s2):
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
21 if len(s1) != len(s2):
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
22 return False
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
23 else:
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
24 return sum(c1 != c2 for c1, c2 in zip(s1, s2)) <= 1
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
25
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
26
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
27 if __name__ == "__main__":
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
28
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
29 clonosFN = sys.argv[1]
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
30 outFN = sys.argv[2]
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
31
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
32 Cl = pd.read_csv(clonosFN,sep='\t',index_col=0)
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
33 T10 = Cl[:10].copy()
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
34
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
35 aa_junction = np.array(T10['AA JUNCTION'])
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
36 geneCol = [x for x in T10.columns if x.upper().endswith('GENE')][0]
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
37
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
38 F = np.zeros((2,20))
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
39
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
40 for i in range(0,10):
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
41 taa = T10['AA JUNCTION'][i+1]
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
42 gene = T10[geneCol][i+1]
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
43 S1 = Cl['AA JUNCTION'].apply(ft.partial(maxHam1, s2=taa))
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
44 S2 = Cl[geneCol] == gene
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
45 S1[i+1] = False
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
46 F[0,2*i] = (S1 & S2).sum()
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
47 F[0,2*i+1] = Cl['Frequency %'][S1 & S2].sum()
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
48 F[1,2*i] = (S1 & ~S2).sum()
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
49 F[1,2*i+1] = Cl['Frequency %'][S1 & ~S2].sum()
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
50
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
51
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
52 K = list(aa_junction+' Nr. Clonos')
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
53 L = list(aa_junction+' Freq. %')
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
54 columns = [val for pair in zip(K,L) for val in pair]
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
55
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
56 D = pd.DataFrame(F,columns=columns, index=['same gene', 'different gene'])
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
57 D.to_csv(outFN,sep='\t')
8be019b173e6 Uploaded included tools
chmaramis
parents:
diff changeset
58