Mercurial > repos > chmaramis > irprofiler
comparison exclusive_clonotype_computation.py @ 0:0e37e5b73273 draft
Initial commit
author | chmaramis |
---|---|
date | Fri, 30 Mar 2018 07:22:29 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0e37e5b73273 |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """ | |
3 Created on Sat Mar 24 17:31:38 2018 | |
4 | |
5 @author: chmaramis | |
6 """ | |
7 | |
8 from __future__ import division | |
9 import numpy as np | |
10 from pandas import * | |
11 from numpy import nan as NA | |
12 import sys | |
13 import time | |
14 | |
15 clono_def = {'CDR3': ['AA JUNCTION'], | |
16 'VCDR3': ['V-GENE','AA JUNCTION'], | |
17 'JCDR3': ['J-GENE','AA JUNCTION']} | |
18 | |
19 | |
20 def exclusiveClonotypeComputation(inputs, clono, thres): | |
21 | |
22 clono_comps = clono_def[clono] | |
23 | |
24 vClono=DataFrame() | |
25 | |
26 # File A | |
27 cl = DataFrame() | |
28 cl = read_csv(inputs[0] , sep = '\t' , index_col = 0) | |
29 if (thres != 'null'): | |
30 cl = cl[cl['Reads'] > int(thres)] | |
31 vClono = cl | |
32 | |
33 # File B | |
34 cl = DataFrame() | |
35 cl = read_csv(inputs[2] , sep = '\t' , index_col = 0) | |
36 if (thres != 'null'): | |
37 cl = cl[cl['Reads'] > int(thres)] | |
38 cl.rename(columns={'Reads':'ReadsB'}, inplace=True) | |
39 vClono = vClono.merge(cl[clono_comps+['ReadsB']], how='left', on=clono_comps) | |
40 | |
41 vClono['ReadsB'].fillna(0, inplace=True) | |
42 | |
43 vClono = vClono[vClono['ReadsB'] == 0] | |
44 del vClono['ReadsB'] | |
45 | |
46 vClono.index = range(1,len(vClono)+1) | |
47 | |
48 return vClono | |
49 | |
50 | |
51 if __name__ == '__main__': | |
52 | |
53 start=time.time() | |
54 | |
55 # Parse input arguments | |
56 arg = sys.argv[4:] | |
57 clono = sys.argv[1] | |
58 output = sys.argv[2] | |
59 threshold = sys.argv[3] | |
60 | |
61 # Execute basic function | |
62 excl = exclusiveClonotypeComputation(arg, clono, threshold) | |
63 | |
64 # Save output to CSV files | |
65 if not excl.empty: | |
66 excl.to_csv(output , sep = '\t') | |
67 | |
68 # Print execution time | |
69 stop=time.time() | |
70 print('Runtime:' + str(stop-start)) |