comparison clsi_profile.py @ 0:62226cd1acb5 draft

"planemo upload for repository https://github.com/rakesh4osdd/clsi_profile/tree/master commit ae6f2bc197b28000c56f6368ad044b350bc34f6a"
author rakesh4osdd
date Fri, 11 Jun 2021 12:36:25 +0000
parents
children 683299422575
comparison
equal deleted inserted replaced
-1:000000000000 0:62226cd1acb5
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 # In[206]:
5
6
7 # ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics
8 # By rakesh4osdd@gmail.com, 06-Jun-2021
9 import pandas as pd
10 import re
11 import sys
12
13
14 # In[207]:
15
16
17 #print(pd.__version__, re.__version__)
18
19
20 # In[208]:
21
22
23 # compare two MIC value strings
24 def check_mic(mic1,mic2,mic_type):
25 #print(mic1,mic2,mic_type)
26 try:
27 if '/' in mic1:
28 m1a = mic1.split('/')[0]
29 m1b = mic1.split('/')[1]
30 if float(m1a)==0 or float(m1b)==0:
31 strain_type='Strain could not be classified'
32 return(strain_type)
33 elif '/' in mic2:
34 m1a = mic1
35 if float(m1a)==0:
36 strain_type='Strain could not be classified'
37 return(strain_type)
38 m1b = '1'
39 elif float(mic1)==0:
40 strain_type='Strain could not be classified'
41 return(strain_type)
42 else:
43 m1a = mic1
44
45 if '-' in mic2:
46 m2a = mic2.split('-')[0]
47 m2b = mic2.split('-')[1]
48
49 except ValueError:
50 strain_type='Strain could not be classified'
51 return(strain_type)
52 try:
53 if '-' in mic2 and mic_type == 'i': # for intermediate only
54 if '/' in mic2:
55 m2a = mic2.split('-')[0].split('/')[0]
56 m2b = mic2.split('-')[0].split('/')[1]
57 m2aa = mic2.split('-')[1].split('/')[0]
58 m2bb = mic2.split('-')[1].split('/')[1]
59 if (float(m2aa)>=float(m1a)>=float(m2a) and float(m2bb)>=float(m1b)>=float(m2b)):
60 #print('intermediate')
61 m_type='Intermediate'
62 else:
63 #print('not define')
64 m_type='Strain could not be classified'
65 else:
66 m2a = mic2.split('-')[0]
67 m2b = mic2.split('-')[1]
68 if (float(m2b)>=float(m1a)>=float(m2a)):
69 #print('intermediate')
70 m_type='Intermediate'
71 else:
72 #print('not define')
73 m_type='Strain could not be classified'
74 #print (m1a,m1b,m2a,m2b,m2aa,m2bb)
75 elif '/' in mic2:
76 m2a = mic2.split('/')[0]
77 m2b = mic2.split('/')[1]
78 #print(m1a,m1b,m2a,m2b,mic_type)
79 if (mic_type=='s' and (float(m1a)<=float(m2a) and float(m1b)<=float(m2b))):
80 m_type='Susceptible'
81 elif (mic_type=='r' and (float(m1a)>=float(m2a) and float(m1b)>=float(m2b))):
82 m_type='Resistant'
83 elif (mic_type=='i' and (float(m1a)==float(m2a) and float(m1b)==float(m2b))):
84 m_type='Intermediate'
85 else:
86 m_type='Strain could not be classified'
87 elif '-' in mic2:
88 m_type='Strain could not be classified'
89 else:
90 m2a=mic2
91 if (mic_type=='s' and (float(m1a)<=float(m2a))):
92 m_type='Susceptible'
93 elif (mic_type=='r' and (float(m1a)>=float(m2a))):
94 m_type='Resistant'
95 elif (mic_type=='i' and (float(m1a)==float(m2a))):
96 m_type='Intermediate'
97 else:
98 m_type='Strain could not be classified-1'
99 except IndexError:
100 strain_type='Strain could not be classified-2'
101 return(strain_type)
102
103 return(m_type)
104
105 #check_mic('65','32-64','i')
106
107
108 # In[209]:
109
110
111 # compare MIC value in pandas list
112 def sus_res_int(mic):
113 #print(mic)
114 o_mic = mic[0].replace(' ', '')
115 s_mic = mic[1].replace(' ', '')
116 r_mic = mic[2].replace(' ', '')
117 i_mic = mic[3].replace(' ', '')
118 try:
119 if check_mic(o_mic,s_mic,'s')=='Susceptible':
120 strain_type='Susceptible'
121 elif check_mic(o_mic,r_mic,'r')=='Resistant':
122 strain_type='Resistant'
123 elif check_mic(o_mic,i_mic,'i')=='Intermediate':
124 strain_type='Intermediate'
125 else:
126 strain_type='Strain could not be classified'
127 except ValueError:
128 strain_type='Strain could not be classified'
129 return(strain_type)
130
131 #mic=['128','16/4','128/4','32/4-64/4']
132 #sus_res_int(mic)
133
134
135 # In[210]:
136
137
138 # for input argument
139 input_user = sys.argv[1]
140 input_clsi = sys.argv[2]
141 output_table = sys.argv[3]
142
143
144 # In[211]:
145
146 """
147 input_user='input.csv'
148 input_clsi='clsi.csv'
149 output_profile=input_user+'_profile.csv'
150 output_table=input_user+'_table.csv'
151 """
152
153 # In[212]:
154
155
156 # read user AST data with selected 3 columns
157 strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False)
158
159
160 # In[213]:
161
162
163 clsi_bp=pd.read_csv(input_clsi,sep=',')
164
165
166 # In[214]:
167
168
169 #clsi_bp
170 #strain_mic
171
172
173 # In[215]:
174
175
176 # convert MIC to numbers sMIC, rMIC
177 clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x)))
178 clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x)))
179 clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x)))
180
181
182 # In[216]:
183
184
185 #clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x)))
186
187
188 # In[217]:
189
190
191 # Read only numbers in MIC values
192 #try:
193 strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\/]','', x)))
194 #except TypeError:
195 # print('Waring: Error in MIC value')
196
197
198 # In[218]:
199
200
201 #strain_mic
202
203
204 # In[219]:
205
206
207 # capitalize each Antibiotic Name for comparision with removing whitespace
208 strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(" ","")
209 clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(" ","")
210
211
212 # In[220]:
213
214
215 #compare CLSI Antibiotics only
216 #result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner', indicator=True)[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','_merge']]
217 try:
218 result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']]
219 except KeyError:
220 print('Waring: Error in input Values')
221
222
223 # In[221]:
224
225
226 #compare MIC values and assign Susceptible and Resistant to Strain
227 #try:
228 result[['CLSI_profile']] = result[['o_mic','s_mic','r_mic','i_mic']].apply(sus_res_int,axis = 1)
229 #except ValueError:
230 # print('Waring: Error in input MIC value')
231
232
233 # In[222]:
234
235
236 #result
237
238
239 # In[223]:
240
241
242 #result[['Strain name', 'Antibiotics', 'MIC','s_mic','r_mic','CLSI_profile']].to_csv(output_profile,sep=',', index=False, encoding='utf-8-sig')
243
244
245 # In[224]:
246
247
248 #create a pivot table for ASIST
249 table=result[['Strain name', 'Antibiotics','CLSI_profile']].drop_duplicates()
250 result_table=pd.pivot_table(table, values ='CLSI_profile', index =['Strain name'],columns =['Antibiotics'], aggfunc = lambda x: ' '.join(x))
251
252
253 # In[225]:
254
255
256 #result_table
257
258
259 # In[226]:
260
261
262 #result_table.to_csv(output_table,na_rep='NA')
263
264
265 # In[227]:
266
267
268 # reorder the Antibiotics for ASIST
269 clsi_ab=['Amikacin','Tobramycin','Gentamycin','Imipenem','Meropenem','Doripenem','Ciprofloxacin','Levofloxacin',
270 'Piperacillin/tazobactam','Ticarcillin/clavulanicacid','Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime',
271 'Trimethoprim/sulfamethoxazole','Ampicillin/sulbactam','Colistin','Polymyxinb','Tetracycline','Doxicycline ',
272 'Minocycline']
273 result_selected=result_table.filter(clsi_ab)
274
275
276 # In[228]:
277
278
279 #print(result_selected.shape, result_table.shape)
280
281
282 # In[229]:
283
284
285 result_selected.insert(0,'Resistance_phenotype','')
286
287
288 # In[230]:
289
290
291 #rename headers
292 result_selected=result_selected.rename(columns = {'Ticarcillin/clavulanicacid':'Ticarcillin/clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} )
293
294
295 # In[231]:
296
297
298 #result_selected
299
300
301 # In[232]:
302
303
304 result_selected.to_csv(output_table,na_rep='NA')
305