comparison public_clonotype_computation.py @ 0:0e37e5b73273 draft

Initial commit
author chmaramis
date Fri, 30 Mar 2018 07:22:29 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0e37e5b73273
1 # -*- coding: utf-8 -*-
2 """
3 Created on Sat Mar 24 17:18:09 2018
4
5 @author: chmaramis
6 """
7
8 from __future__ import division
9 import numpy as np
10 from pandas import *
11 from numpy import nan as NA
12 import sys
13 import time
14
15 clono_def = {'CDR3': ['AA JUNCTION'],
16 'VCDR3': ['V-GENE','AA JUNCTION'],
17 'JCDR3': ['J-GENE','AA JUNCTION']}
18
19
20
21 def publicClonotypeComputation(inputs, clono, thres):
22
23 clono_comps = clono_def[clono]
24
25 clono=DataFrame()
26
27 for x in range(0,len(inputs),2):
28 cl = DataFrame()
29 cl = read_csv(inputs[x] , sep = '\t' , index_col = 0)
30 #tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 )
31 #cl = concat([chunk for chunk in tp])
32
33 if (thres != 'null'):
34 cl = cl[cl['Reads'] > int(thres)]
35
36 x1 = inputs[x+1].split('_')
37
38 del cl['Reads']
39 cl.columns = [cl.columns[0], cl.columns[1], x1[0]+' '+cl.columns[2], x1[0]+' Relative '+cl.columns[3]]
40
41 if clono.empty:
42 clono = cl
43 else:
44 clono = clono.merge(cl, how='outer', on=clono_comps)
45
46
47 col = clono.columns
48 freqs = col.map(lambda x: 'Frequency' in x)
49 reads = col.map(lambda x: 'Reads/Total' in x)
50
51 clono[col[freqs]] = clono[col[freqs]].fillna(0)
52 clono[col[reads]] = clono[col[reads]].fillna('0/*')
53
54 clono['Num of Patients']= clono[col[freqs]].apply(lambda x: np.sum(x != 0), axis=1)
55
56 clono = clono[clono['Num of Patients'] > 1]
57
58 clono.index = range(1,len(clono)+1)
59
60 return clono
61
62
63 if __name__ == '__main__':
64
65 start=time.time()
66
67 # Parse input arguments
68 arg = sys.argv[4:]
69 clono = sys.argv[1]
70 output = sys.argv[2]
71 thres = sys.argv[3]
72
73
74
75 # Execute basic function
76 mer = publicClonotypeComputation(arg, clono, thres)
77
78 # Save output to CSV files
79 if not mer.empty:
80 mer.to_csv(output , sep = '\t')
81
82 # Print execution time
83 stop=time.time()
84 print('Runtime:' + str(stop-start))