comparison exclusive_clonotype_computation.py @ 0:0e37e5b73273 draft

Initial commit
author chmaramis
date Fri, 30 Mar 2018 07:22:29 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0e37e5b73273
1 # -*- coding: utf-8 -*-
2 """
3 Created on Sat Mar 24 17:31:38 2018
4
5 @author: chmaramis
6 """
7
8 from __future__ import division
9 import numpy as np
10 from pandas import *
11 from numpy import nan as NA
12 import sys
13 import time
14
15 clono_def = {'CDR3': ['AA JUNCTION'],
16 'VCDR3': ['V-GENE','AA JUNCTION'],
17 'JCDR3': ['J-GENE','AA JUNCTION']}
18
19
20 def exclusiveClonotypeComputation(inputs, clono, thres):
21
22 clono_comps = clono_def[clono]
23
24 vClono=DataFrame()
25
26 # File A
27 cl = DataFrame()
28 cl = read_csv(inputs[0] , sep = '\t' , index_col = 0)
29 if (thres != 'null'):
30 cl = cl[cl['Reads'] > int(thres)]
31 vClono = cl
32
33 # File B
34 cl = DataFrame()
35 cl = read_csv(inputs[2] , sep = '\t' , index_col = 0)
36 if (thres != 'null'):
37 cl = cl[cl['Reads'] > int(thres)]
38 cl.rename(columns={'Reads':'ReadsB'}, inplace=True)
39 vClono = vClono.merge(cl[clono_comps+['ReadsB']], how='left', on=clono_comps)
40
41 vClono['ReadsB'].fillna(0, inplace=True)
42
43 vClono = vClono[vClono['ReadsB'] == 0]
44 del vClono['ReadsB']
45
46 vClono.index = range(1,len(vClono)+1)
47
48 return vClono
49
50
51 if __name__ == '__main__':
52
53 start=time.time()
54
55 # Parse input arguments
56 arg = sys.argv[4:]
57 clono = sys.argv[1]
58 output = sys.argv[2]
59 threshold = sys.argv[3]
60
61 # Execute basic function
62 excl = exclusiveClonotypeComputation(arg, clono, threshold)
63
64 # Save output to CSV files
65 if not excl.empty:
66 excl.to_csv(output , sep = '\t')
67
68 # Print execution time
69 stop=time.time()
70 print('Runtime:' + str(stop-start))