comparison cmpb2016/pub_clono_CDR3.py @ 0:8be019b173e6 draft

Uploaded included tools
author chmaramis
date Sun, 18 Mar 2018 05:54:20 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:8be019b173e6
1 # -*- coding: utf-8 -*-
2 """
3 Created on Mon Dec 21 18:26:01 2015
4
5 @author: chmaramis
6 """
7
8 from __future__ import division
9 import numpy as np
10 from pandas import *
11 from numpy import nan as NA
12 import sys
13 import time
14
15
16
17 def publicCDR3Func(inputs,thres):
18
19 cdr3=DataFrame()
20
21 for x in range(0,len(inputs),2):
22 cl = DataFrame()
23 cl = read_csv(inputs[x] , sep = '\t' , index_col = 0)
24 #tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 )
25 #cl = concat([chunk for chunk in tp])
26
27 if (thres != 'null'):
28 cl = cl[cl['Reads'] > int(thres)]
29
30 x1 = inputs[x+1].split('_')
31
32 del cl['Reads']
33 cl.columns = [cl.columns[0], x1[0]+' '+cl.columns[1], x1[0]+' Relative '+cl.columns[2]]
34
35 if cdr3.empty:
36 cdr3 = cl
37 else:
38 cdr3 = cdr3.merge(cl, how='outer', on='AA JUNCTION')
39
40
41 col = cdr3.columns
42 freqs = col.map(lambda x: 'Frequency' in x)
43 reads = col.map(lambda x: 'Reads/Total' in x)
44
45 cdr3[col[freqs]] = cdr3[col[freqs]].fillna(0)
46 cdr3[col[reads]] = cdr3[col[reads]].fillna('0/*')
47
48 cdr3['Num of Patients']= cdr3[col[freqs]].apply(lambda x: np.sum(x != 0), axis=1)
49
50 cdr3 = cdr3[cdr3['Num of Patients'] > 1]
51
52 cdr3.index = range(1,len(cdr3)+1)
53
54 return cdr3
55
56
57 if __name__ == '__main__':
58
59 start=time.time()
60
61 # Parse input arguments
62 threshold = sys.argv[2]
63 arg = sys.argv[3:]
64 output = sys.argv[1]
65
66 # Execute basic function
67 mer = publicCDR3Func(arg,threshold)
68
69 # Save output to CSV files
70 if not mer.empty:
71 mer.to_csv(output , sep = '\t')
72
73 # Print execution time
74 stop=time.time()
75 print('Runtime:' + str(stop-start))