Mercurial > repos > chmaramis > irprofiler
comparison public_clonotype_computation.py @ 0:0e37e5b73273 draft
Initial commit
author | chmaramis |
---|---|
date | Fri, 30 Mar 2018 07:22:29 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0e37e5b73273 |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """ | |
3 Created on Sat Mar 24 17:18:09 2018 | |
4 | |
5 @author: chmaramis | |
6 """ | |
7 | |
8 from __future__ import division | |
9 import numpy as np | |
10 from pandas import * | |
11 from numpy import nan as NA | |
12 import sys | |
13 import time | |
14 | |
15 clono_def = {'CDR3': ['AA JUNCTION'], | |
16 'VCDR3': ['V-GENE','AA JUNCTION'], | |
17 'JCDR3': ['J-GENE','AA JUNCTION']} | |
18 | |
19 | |
20 | |
21 def publicClonotypeComputation(inputs, clono, thres): | |
22 | |
23 clono_comps = clono_def[clono] | |
24 | |
25 clono=DataFrame() | |
26 | |
27 for x in range(0,len(inputs),2): | |
28 cl = DataFrame() | |
29 cl = read_csv(inputs[x] , sep = '\t' , index_col = 0) | |
30 #tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 ) | |
31 #cl = concat([chunk for chunk in tp]) | |
32 | |
33 if (thres != 'null'): | |
34 cl = cl[cl['Reads'] > int(thres)] | |
35 | |
36 x1 = inputs[x+1].split('_') | |
37 | |
38 del cl['Reads'] | |
39 cl.columns = [cl.columns[0], cl.columns[1], x1[0]+' '+cl.columns[2], x1[0]+' Relative '+cl.columns[3]] | |
40 | |
41 if clono.empty: | |
42 clono = cl | |
43 else: | |
44 clono = clono.merge(cl, how='outer', on=clono_comps) | |
45 | |
46 | |
47 col = clono.columns | |
48 freqs = col.map(lambda x: 'Frequency' in x) | |
49 reads = col.map(lambda x: 'Reads/Total' in x) | |
50 | |
51 clono[col[freqs]] = clono[col[freqs]].fillna(0) | |
52 clono[col[reads]] = clono[col[reads]].fillna('0/*') | |
53 | |
54 clono['Num of Patients']= clono[col[freqs]].apply(lambda x: np.sum(x != 0), axis=1) | |
55 | |
56 clono = clono[clono['Num of Patients'] > 1] | |
57 | |
58 clono.index = range(1,len(clono)+1) | |
59 | |
60 return clono | |
61 | |
62 | |
63 if __name__ == '__main__': | |
64 | |
65 start=time.time() | |
66 | |
67 # Parse input arguments | |
68 arg = sys.argv[4:] | |
69 clono = sys.argv[1] | |
70 output = sys.argv[2] | |
71 thres = sys.argv[3] | |
72 | |
73 | |
74 | |
75 # Execute basic function | |
76 mer = publicClonotypeComputation(arg, clono, thres) | |
77 | |
78 # Save output to CSV files | |
79 if not mer.empty: | |
80 mer.to_csv(output , sep = '\t') | |
81 | |
82 # Print execution time | |
83 stop=time.time() | |
84 print('Runtime:' + str(stop-start)) |