annotate exclusive_clonotype_computation.py @ 1:acaa8e8a0b88 draft default tip

Uploaded test-data & added tool help
author chmaramis
date Mon, 30 Apr 2018 04:47:52 -0400
parents 0e37e5b73273
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
1 # -*- coding: utf-8 -*-
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
2 """
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
3 Created on Sat Mar 24 17:31:38 2018
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
4
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
5 @author: chmaramis
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
6 """
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
7
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
8 from __future__ import division
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
9 import numpy as np
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
10 from pandas import *
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
11 from numpy import nan as NA
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
12 import sys
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
13 import time
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
14
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
15 clono_def = {'CDR3': ['AA JUNCTION'],
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
16 'VCDR3': ['V-GENE','AA JUNCTION'],
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
17 'JCDR3': ['J-GENE','AA JUNCTION']}
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
18
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
19
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
20 def exclusiveClonotypeComputation(inputs, clono, thres):
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
21
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
22 clono_comps = clono_def[clono]
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
23
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
24 vClono=DataFrame()
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
25
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
26 # File A
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
27 cl = DataFrame()
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
28 cl = read_csv(inputs[0] , sep = '\t' , index_col = 0)
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
29 if (thres != 'null'):
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
30 cl = cl[cl['Reads'] > int(thres)]
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
31 vClono = cl
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
32
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
33 # File B
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
34 cl = DataFrame()
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
35 cl = read_csv(inputs[2] , sep = '\t' , index_col = 0)
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
36 if (thres != 'null'):
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
37 cl = cl[cl['Reads'] > int(thres)]
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
38 cl.rename(columns={'Reads':'ReadsB'}, inplace=True)
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
39 vClono = vClono.merge(cl[clono_comps+['ReadsB']], how='left', on=clono_comps)
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
40
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
41 vClono['ReadsB'].fillna(0, inplace=True)
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
42
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
43 vClono = vClono[vClono['ReadsB'] == 0]
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
44 del vClono['ReadsB']
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
45
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
46 vClono.index = range(1,len(vClono)+1)
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
47
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
48 return vClono
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
49
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
50
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
51 if __name__ == '__main__':
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
52
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
53 start=time.time()
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
54
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
55 # Parse input arguments
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
56 arg = sys.argv[4:]
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
57 clono = sys.argv[1]
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
58 output = sys.argv[2]
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
59 threshold = sys.argv[3]
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
60
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
61 # Execute basic function
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
62 excl = exclusiveClonotypeComputation(arg, clono, threshold)
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
63
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
64 # Save output to CSV files
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
65 if not excl.empty:
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
66 excl.to_csv(output , sep = '\t')
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
67
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
68 # Print execution time
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
69 stop=time.time()
0e37e5b73273 Initial commit
chmaramis
parents:
diff changeset
70 print('Runtime:' + str(stop-start))