annotate ngs_filtering.py @ 12:cdf95051bc55 draft default tip

Uploaded 2 tools
author chmaramis
date Sun, 18 Mar 2018 07:11:06 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
1 # -*- coding: utf-8 -*-
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
2 """
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
3 Created on Wed Sep 4 18:41:42 2013
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
4
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
5 @author: chmaramis
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
6 """
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
7
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
8 from __future__ import division
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
9 import string as strpy
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
10 import numpy as np
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
11 from pandas import *
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
12 from numpy import nan as NA
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
13 import time
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
14 import sys
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
15
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
16
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
17 def filter_condition_AAjunction(x):
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
18 x= x.strip()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
19 if ' ' in x:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
20 return x.split(' ')[0]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
21 else:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
22 return x
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
23
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
24 #-----------frame creation---------------------
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
25 def filtering(inp,cells,psorf,con,prod,CF,Vper,Vgene,laa1,laa2,conaa,Jgene,Dgene,fname):
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
26
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
27 try:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
28 path=inp
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
29 frame = DataFrame()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
30 seqlen = []
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
31 head = []
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
32 tp = read_csv(path, iterator=True, chunksize=5000,sep='\t', index_col=0 )
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
33 frame = concat([chunk for chunk in tp])
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
34
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
35 frcol = list(frame.columns)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
36 #print frcol[-1]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
37 if 'Unnamed' in frcol[-1]:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
38 del frcol[-1]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
39 frame=frame[frcol]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
40
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
41 frame.index = range(1,len(frame)+1)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
42
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
43 head.append('Total reads of raw data')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
44 seqlen.append(len(frame))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
45
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
46 #------------drop nulls--------------------
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
47 filtered = DataFrame()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
48 filtall = DataFrame()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
49 summ_df = DataFrame()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
50 filtered = frame[isnull(frame['AA JUNCTION']) | isnull(frame['V-GENE and allele'])]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
51
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
52 filtall = filtall.append(filtered)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
53 if len(filtall) > 0:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
54 filtall.loc[filtered.index,'Reason'] = "NoResults"
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
55 frame = frame[frame['AA JUNCTION'].notnull()]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
56 frame = frame[frame['V-GENE and allele'].notnull()]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
57
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
58 head.append('Not Null CDR3/V')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
59 head.append('filter out')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
60 seqlen.append(len(frame))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
61 seqlen.append(len(filtered))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
62 filtered = DataFrame()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
63
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
64 if psorf.startswith('y') or psorf.startswith('Y'):
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
65
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
66 cc0=np.array(frame['V-GENE and allele'].unique())
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
67
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
68
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
69 for x in cc0:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
70 x1=x.split('*')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
71 try:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
72 if (x1[1].find('P')>-1) or (x1[1].find('ORF')>-1):
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
73 filtered = filtered.append(frame[frame['V-GENE and allele'] == x])
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
74 frame['V-GENE and allele']=frame['V-GENE and allele'].replace(x,NA)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
75 elif x.find('or')>-1:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
76 posa=x.count('or')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
77 x2=x.split('or')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
78 x4=''
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
79 genelist=[]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
80 for cnt in range(0, posa+1):
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
81 x3=x2[cnt].split('*')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
82 x3[0]=x3[0].strip()#kobei ta space
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
83 k=x3[0].split(' ')# holds only TRBV
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
84 if cnt==0:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
85 genelist.append(k[1])
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
86 x4+=k[1]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
87 elif ((str(k[1]) in genelist) == False) & (x3[1].find('P')==-1):# check for P in x3
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
88 genelist.append(k[1])
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
89 x4+=' or '
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
90 x4+=k[1]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
91 x3=None
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
92 k1=None
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
93 genelist=None
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
94
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
95 frame['V-GENE and allele']=frame['V-GENE and allele'].replace(x,x4)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
96
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
97 else:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
98 s=x1[0].split(' ')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
99 frame['V-GENE and allele']=frame['V-GENE and allele'].replace(x,s[1])
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
100 except IndexError as e:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
101 print('V-gene is already been formed')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
102 continue
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
103
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
104 x=None
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
105 x1=None
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
106 s=None
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
107
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
108 filtall = filtall.append(filtered)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
109 if len(filtall) > 0:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
110 filtall.loc[filtered.index,'Reason'] = 'P or ORF'
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
111 frame = frame[frame['V-GENE and allele'].notnull()]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
112
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
113 head.append('Functional TRBV')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
114 head.append('filter out')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
115 seqlen.append(len(frame))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
116 seqlen.append(len(filtered))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
117 filtered = DataFrame()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
118
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
119
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
120
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
121 #------------FILTERING for data quality--------------------
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
122 if con.startswith('y') or con.startswith('Y'):
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
123 filtered = frame [frame['AA JUNCTION'].str.contains('X') |
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
124 frame['AA JUNCTION'].str.contains('#') |
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
125 frame['AA JUNCTION'].str.contains('[*]')]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
126
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
127
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
128
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
129 frame = frame [~frame['AA JUNCTION'].str.contains('X') &
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
130 ~frame['AA JUNCTION'].str.contains('#') &
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
131 ~frame['AA JUNCTION'].str.contains('[*]') ]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
132
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
133
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
134 filtall = filtall.append(filtered)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
135 if len(filtall) > 0:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
136 filtall.loc[filtered.index,'Reason'] = 'X,#,*'
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
137 head.append('Not Containing X,#,*')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
138 head.append('filter out')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
139 seqlen.append(len(frame))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
140 seqlen.append(len(filtered))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
141 filtered = DataFrame()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
142
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
143 # Set label of functionality column, taking into account current & past IMGT Summary column label
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
144 functionality_label = 'Functionality'
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
145 if 'V-DOMAIN Functionality' in frame.columns:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
146 functionality_label = 'V-DOMAIN Functionality'
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
147
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
148 if prod.startswith('y') or prod.startswith('Y'):
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
149 filtered = frame[~frame[functionality_label].str.startswith('productive')]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
150 filtall = filtall.append(filtered)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
151 if len(filtall) > 0:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
152 filtall.loc[filtered.index,'Reason'] = 'not productive'
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
153
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
154
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
155 frame=frame[frame[functionality_label].str.startswith('productive')]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
156
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
157 head.append('Productive')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
158 head.append('filter out')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
159 seqlen.append(len(frame))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
160
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
161 seqlen.append(len(filtered))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
162
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
163
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
164 frame['AA JUNCTION'] = frame['AA JUNCTION'].map(filter_condition_AAjunction)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
165
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
166 if CF.startswith('y') or CF.startswith('Y'):
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
167 if cells == 'TCR':
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
168 filtered = DataFrame()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
169 filtered = frame[~frame['AA JUNCTION'].str.startswith('C') |
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
170 ~frame['AA JUNCTION'].str.endswith('F')]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
171
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
172 filtall = filtall.append(filtered)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
173 if len(filtall) > 0:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
174 filtall.loc[filtered.index,'Reason'] = 'Not C..F'
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
175
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
176 frame = frame[frame['AA JUNCTION'].str.startswith('C') &
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
177 frame['AA JUNCTION'].str.endswith('F')]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
178
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
179 head.append('CDR3 landmarks C-F')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
180 head.append('filter out')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
181 seqlen.append(len(frame))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
182 seqlen.append(len(filtered))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
183 filtered = DataFrame()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
184 elif cells == 'BCR':
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
185 filtered = DataFrame()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
186 filtered = frame[~frame['AA JUNCTION'].str.startswith('C') |
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
187 ~frame['AA JUNCTION'].str.endswith('W')]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
188
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
189 filtall = filtall.append(filtered)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
190 if len(filtall) > 0:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
191 filtall.loc[filtered.index,'Reason'] = 'Not C..W'
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
192
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
193 frame = frame[frame['AA JUNCTION'].str.startswith('C') &
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
194 frame['AA JUNCTION'].str.endswith('W')]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
195
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
196 head.append('CDR3 landmarks C-W')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
197 head.append('filter out')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
198 seqlen.append(len(frame))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
199 seqlen.append(len(filtered))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
200 filtered = DataFrame()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
201 else:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
202 print('TCR or BCR type')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
203
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
204
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
205 filtered = DataFrame()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
206
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
207 filtered = frame[frame['V-REGION identity %'] < Vper]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
208
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
209
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
210 filtall = filtall.append(filtered)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
211 if len(filtall) > 0:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
212 filtall.loc[filtered.index,'Reason'] = 'identity < {iden}%'.format(iden = Vper)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
213
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
214 frame=frame[frame['V-REGION identity %']>= Vper]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
215 head.append('Identity >= {iden}%'.format(iden = Vper))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
216 head.append('filter out')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
217 seqlen.append(len(frame))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
218 seqlen.append(len(filtered))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
219
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
220 head.append('Total filter out A')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
221 head.append('Total filter in A')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
222 seqlen.append(len(filtall))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
223 seqlen.append(len(frame))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
224
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
225 ###############################
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
226 if Vgene != 'null':
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
227
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
228 filtered = DataFrame()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
229
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
230 filtered = frame[frame['V-GENE and allele'] != Vgene]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
231
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
232 filtall = filtall.append(filtered)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
233 if len(filtall) > 0:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
234 filtall.loc[filtered.index,'Reason'] = 'V-GENE != {} '.format(Vgene)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
235
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
236
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
237 frame = frame[frame['V-GENE and allele'] == Vgene]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
238
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
239
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
240
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
241 head.append('V-GENE = {} '.format(Vgene))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
242 head.append('filter out')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
243 seqlen.append(len(frame))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
244 seqlen.append(len(filtered))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
245
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
246
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
247
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
248 ###############################
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
249 if (laa1 != 'null') or (laa2 != 'null'):
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
250 if int(laa2) == 0:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
251 low = int(laa1)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
252 high = 100
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
253 elif int(laa1) > int(laa2):
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
254 low = int(laa2)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
255 high = int(laa1)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
256 else:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
257 low = int(laa1)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
258 high = int(laa2)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
259
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
260 filtered = DataFrame()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
261 criteria = frame['AA JUNCTION'].apply(lambda row: (len(row)-2) < low)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
262 criteria2 = frame['AA JUNCTION'].apply(lambda row: (len(row)-2) > high)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
263 filtered = frame[criteria | criteria2]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
264
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
265 filtall = filtall.append(filtered)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
266 if int(laa2)==0:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
267 if len(filtall) > 0:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
268 filtall.loc[filtered.index,'Reason'] = 'CDR3 length not bigger than {}'.format(low)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
269 else:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
270 if len(filtall) > 0:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
271 filtall.loc[filtered.index,'Reason'] = 'CDR3 length not from {} to {}'.format(low,high)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
272
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
273 criteria3 = frame['AA JUNCTION'].apply(lambda row: (len(row)-2) >= low)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
274 criteria4 = frame['AA JUNCTION'].apply(lambda row: (len(row)-2) <= high)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
275 frame = frame[criteria3 & criteria4]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
276
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
277 if int(laa2)==0:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
278 head.append('CDR3 length bigger than {}'.format(low))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
279 else:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
280 head.append('CDR3 length from {} to {} '.format(low,high))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
281 head.append('filter out')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
282 seqlen.append(len(frame))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
283 seqlen.append(len(filtered))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
284
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
285 ###############################
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
286 if conaa != 'null':
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
287 if conaa.islower():
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
288 conaa = conaa.upper()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
289 filtered = DataFrame()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
290
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
291 filtered = frame[~frame['AA JUNCTION'].str.contains(conaa)]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
292
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
293 filtall = filtall.append(filtered)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
294 if len(filtall) > 0:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
295 filtall.loc[filtered.index,'Reason'] = 'CDR3 not containing {}'.format(conaa)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
296
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
297 frame = frame[frame['AA JUNCTION'].str.contains(conaa) ]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
298
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
299 head.append('CDR3 containing {}'.format(conaa))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
300 head.append('filter out')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
301 seqlen.append(len(frame))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
302 seqlen.append(len(filtered))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
303
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
304
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
305
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
306
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
307 #####------------keep the small J gene name--------------------
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
308 #frame['J-GENE and allele'] = frame['J-GENE and allele'].map(filter_condition_Jgene)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
309 cc2=np.array(frame['J-GENE and allele'].unique())
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
310
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
311 for x in cc2:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
312 try:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
313 if notnull(x):
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
314 x1=x.split('*')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
315 # print(x)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
316 # print (x1[0])
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
317 trbj=x1[0].split(' ')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
318 frame['J-GENE and allele']=frame['J-GENE and allele'].replace(x,trbj[1])
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
319 except IndexError as e:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
320 print('J-Gene has been formed')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
321
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
322
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
323
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
324 x=None
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
325 x1=None
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
326
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
327
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
328 #------------keep the small D gene name--------------------
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
329 cc1=np.array(frame['D-GENE and allele'].unique())
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
330 for x in cc1:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
331 try:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
332 if notnull(x):
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
333 x1=x.split('*')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
334 trbd=x1[0].split(' ')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
335 frame['D-GENE and allele']=frame['D-GENE and allele'].replace(x,trbd[1])
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
336 else:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
337 frame['D-GENE and allele']=frame['D-GENE and allele'].replace(x,'none')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
338 except IndexError as e:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
339 print('D-gene has been formed')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
340
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
341
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
342 x=None
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
343 x1=None
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
344
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
345
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
346 if Jgene != 'null':
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
347
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
348 filtered = DataFrame()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
349
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
350 filtered = frame[frame['J-GENE and allele'] != Jgene]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
351
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
352 filtall = filtall.append(filtered)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
353 if len(filtall) > 0:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
354 filtall.loc[filtered.index,'Reason'] = 'J-GENE not {} '.format(Jgene)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
355
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
356
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
357 frame = frame[frame['J-GENE and allele'] == Jgene]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
358
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
359
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
360
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
361 head.append('J-GENE = {} '.format(Jgene))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
362 head.append('filter out')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
363 seqlen.append(len(frame))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
364 seqlen.append(len(filtered))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
365
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
366
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
367
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
368 if Dgene != 'null':
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
369
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
370 filtered = DataFrame()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
371
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
372 filtered = frame[frame['D-GENE and allele'] != Dgene]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
373
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
374 filtall = filtall.append(filtered)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
375 if len(filtall) > 0:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
376 filtall.loc[filtered.index,'Reason'] = 'D-GENE not {} '.format(Dgene)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
377
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
378
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
379 frame = frame[frame['D-GENE and allele'] == Dgene]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
380
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
381
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
382
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
383 head.append('D-GENE = {} '.format(Dgene))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
384 head.append('filter out')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
385 seqlen.append(len(frame))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
386 seqlen.append(len(filtered))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
387
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
388
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
389 head.append('Total filter out')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
390 head.append('Total filter in')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
391 seqlen.append(len(filtall))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
392 seqlen.append(len(frame))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
393 summ_df = DataFrame(index = head)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
394 col = fname
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
395
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
396 summ_df[col] = seqlen
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
397 frame=frame.rename(columns = {'V-GENE and allele':'V-GENE',
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
398 'J-GENE and allele':'J-GENE','D-GENE and allele':'D-GENE'})
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
399
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
400
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
401 frcol.append('Reason')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
402
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
403 filtall = filtall[frcol]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
404
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
405 #--------------out CSV---------------------------
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
406 frame.index = range(1,len(frame)+1)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
407 if not summ_df.empty:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
408 summ_df['%'] = (100*summ_df[summ_df.columns[0]]/summ_df[summ_df.columns[0]][summ_df.index[0]]).map(('{:.4f}'.format))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
409 return(frame,filtall,summ_df)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
410 except KeyError as e:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
411 print('This file has no ' + str(e) + ' column')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
412 return(frame,filtall,summ_df)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
413
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
414
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
415 if __name__ == '__main__':
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
416
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
417 start=time.time()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
418
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
419 # Parse input arguments
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
420 inp = sys.argv[1]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
421 cells = sys.argv[2]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
422 psorf = sys.argv[3]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
423 con = sys.argv[4]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
424 prod = sys.argv[5]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
425 CF = sys.argv[6]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
426 Vper = float(sys.argv[7])
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
427 Vgene = sys.argv[8]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
428 laa1 = sys.argv[9]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
429 conaa = sys.argv[10]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
430 filterin = sys.argv[11]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
431 filterout = sys.argv[12]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
432 Sum_table = sys.argv[13]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
433 Jgene = sys.argv[14]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
434 Dgene = sys.argv[15]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
435 laa2 = sys.argv[16]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
436 fname = sys.argv[17]
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
437
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
438 # Execute basic function
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
439 fin,fout,summ = filtering(inp,cells,psorf,con,prod,CF,Vper,Vgene,laa1,laa2,conaa,Jgene,Dgene,fname)
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
440
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
441 # Save output to CSV files
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
442 if not summ.empty:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
443 summ.to_csv(Sum_table, sep = '\t')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
444 if not fin.empty:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
445 fin.to_csv(filterin , sep = '\t')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
446 if not fout.empty:
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
447 fout.to_csv(filterout, sep= '\t')
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
448
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
449 # Print execution time
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
450 stop=time.time()
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
451 print('Runtime:' + str(stop-start))
cdf95051bc55 Uploaded 2 tools
chmaramis
parents:
diff changeset
452