comparison clsi_profile_type2_linux.ipynb @ 11:7dcc0e93288b draft default tip

"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit f590c3b1d71a9b8f2030909fa488b4ac0c3caed8-dirty"
author rakesh4osdd
date Wed, 30 Jun 2021 07:13:29 +0000
parents
children
comparison
equal deleted inserted replaced
10:116ebdf92e39 11:7dcc0e93288b
1 {
2 "cells": [
3 {
4 "cell_type": "code",
5 "execution_count": 7,
6 "id": "9aa0a6f7",
7 "metadata": {},
8 "outputs": [],
9 "source": [
10 "# ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics\n",
11 "# By rakesh4osdd@gmail.com, 06-Jun-2021\n",
12 "import pandas as pd\n",
13 "import re\n",
14 "import sys"
15 ]
16 },
17 {
18 "cell_type": "code",
19 "execution_count": 8,
20 "id": "9af8387e",
21 "metadata": {},
22 "outputs": [],
23 "source": [
24 "#print(pd.__version__, re.__version__)"
25 ]
26 },
27 {
28 "cell_type": "code",
29 "execution_count": 9,
30 "id": "73d0783c",
31 "metadata": {},
32 "outputs": [],
33 "source": [
34 "# compare two MIC value strings\n",
35 "def check_mic(mic1,mic2,mic_type):\n",
36 " #print(mic1,mic2,mic_type)\n",
37 " try:\n",
38 " if '/' in mic1:\n",
39 " m1a = mic1.split('/')[0]\n",
40 " m1b = mic1.split('/')[1]\n",
41 " if float(m1a)==0 or float(m1b)==0:\n",
42 " strain_type='Strain could not be classified'\n",
43 " return(strain_type) \n",
44 " elif '/' in mic2:\n",
45 " m1a = mic1\n",
46 " if float(m1a)==0:\n",
47 " strain_type='Strain could not be classified'\n",
48 " return(strain_type) \n",
49 " m1b = '1'\n",
50 " elif float(mic1)==0:\n",
51 " strain_type='Strain could not be classified'\n",
52 " return(strain_type)\n",
53 " else:\n",
54 " m1a = mic1\n",
55 " \n",
56 " if '-' in mic2:\n",
57 " m2a = mic2.split('-')[0]\n",
58 " m2b = mic2.split('-')[1] \n",
59 " \n",
60 " except ValueError:\n",
61 " strain_type='Strain could not be classified' \n",
62 " return(strain_type)\n",
63 " try:\n",
64 " if '-' in mic2 and mic_type == 'i': # for intermediate only\n",
65 " if '/' in mic2:\n",
66 " m2a = mic2.split('-')[0].split('/')[0]\n",
67 " m2b = mic2.split('-')[0].split('/')[1]\n",
68 " m2aa = mic2.split('-')[1].split('/')[0]\n",
69 " m2bb = mic2.split('-')[1].split('/')[1]\n",
70 " if (float(m2aa)>=float(m1a)>=float(m2a) and float(m2bb)>=float(m1b)>=float(m2b)):\n",
71 " #print('intermediate')\n",
72 " m_type='Intermediate'\n",
73 " else:\n",
74 " #print('not define')\n",
75 " m_type='Strain could not be classified'\n",
76 " else:\n",
77 " m2a = mic2.split('-')[0]\n",
78 " m2b = mic2.split('-')[1] \n",
79 " if (float(m2b)>=float(m1a)>=float(m2a)):\n",
80 " #print('intermediate')\n",
81 " m_type='Intermediate'\n",
82 " else:\n",
83 " #print('not define')\n",
84 " m_type='Strain could not be classified' \n",
85 " #print (m1a,m1b,m2a,m2b,m2aa,m2bb)\n",
86 " elif '/' in mic2:\n",
87 " m2a = mic2.split('/')[0]\n",
88 " m2b = mic2.split('/')[1]\n",
89 " #print(m1a,m1b,m2a,m2b,mic_type)\n",
90 " if (mic_type=='s' and (float(m1a)<=float(m2a) and float(m1b)<=float(m2b))):\n",
91 " m_type='Susceptible'\n",
92 " elif (mic_type=='r' and (float(m1a)>=float(m2a) and float(m1b)>=float(m2b))):\n",
93 " m_type='Resistant'\n",
94 " elif (mic_type=='i' and (float(m1a)==float(m2a) and float(m1b)==float(m2b))):\n",
95 " m_type='Intermediate'\n",
96 " else:\n",
97 " m_type='Strain could not be classified'\n",
98 " elif '-' in mic2:\n",
99 " m_type='Strain could not be classified'\n",
100 " else:\n",
101 " m2a=mic2\n",
102 " if (mic_type=='s' and (float(m1a)<=float(m2a))):\n",
103 " m_type='Susceptible'\n",
104 " elif (mic_type=='r' and (float(m1a)>=float(m2a))):\n",
105 " m_type='Resistant'\n",
106 " elif (mic_type=='i' and (float(m1a)==float(m2a))):\n",
107 " m_type='Intermediate'\n",
108 " else:\n",
109 " m_type='Strain could not be classified' \n",
110 " except IndexError:\n",
111 " strain_type='Strain could not be classified' \n",
112 " return(strain_type)\n",
113 " \n",
114 " return(m_type)\n",
115 "\n",
116 "#check_mic('65','32-64','i')"
117 ]
118 },
119 {
120 "cell_type": "code",
121 "execution_count": 10,
122 "id": "4d2ab1b1",
123 "metadata": {},
124 "outputs": [],
125 "source": [
126 "# compare MIC value in pandas list\n",
127 "def sus_res_int(mic):\n",
128 " #print(mic)\n",
129 " o_mic = mic[0].replace(' ', '')\n",
130 " s_mic = mic[1].replace(' ', '')\n",
131 " r_mic = mic[2].replace(' ', '')\n",
132 " i_mic = mic[3].replace(' ', '')\n",
133 " try:\n",
134 " if check_mic(o_mic,s_mic,'s')=='Susceptible':\n",
135 " strain_type='Susceptible'\n",
136 " elif check_mic(o_mic,r_mic,'r')=='Resistant':\n",
137 " strain_type='Resistant'\n",
138 " elif check_mic(o_mic,i_mic,'i')=='Intermediate':\n",
139 " strain_type='Intermediate' \n",
140 " else:\n",
141 " strain_type='Strain could not be classified'\n",
142 " except ValueError:\n",
143 " strain_type='Strain could not be classified' \n",
144 " return(strain_type)\n",
145 "\n",
146 "#mic=['128','16/4','128/4','32/4-64/4']\n",
147 "#sus_res_int(mic)"
148 ]
149 },
150 {
151 "cell_type": "code",
152 "execution_count": 11,
153 "id": "0e22ef0d",
154 "metadata": {},
155 "outputs": [
156 {
157 "ename": "IndexError",
158 "evalue": "list index out of range",
159 "output_type": "error",
160 "traceback": [
161 "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
162 "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
163 "\u001b[0;32m<ipython-input-11-26f141926f14>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0minput_user\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0minput_clsi\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0moutput_table\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
164 "\u001b[0;31mIndexError\u001b[0m: list index out of range"
165 ]
166 }
167 ],
168 "source": [
169 "# for input argument\n",
170 "input_user = sys.argv[1]\n",
171 "input_clsi = sys.argv[2]\n",
172 "output_table = sys.argv[3]"
173 ]
174 },
175 {
176 "cell_type": "code",
177 "execution_count": 49,
178 "id": "21d5fe63",
179 "metadata": {},
180 "outputs": [],
181 "source": [
182 "\"\"\"input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv'\n",
183 "#input_user='test-data/input2.csv'\n",
184 "input_clsi='test-data/clsi.csv'\n",
185 "output_profile='test-data/input2_profile.csv'\n",
186 "#output_table='test-data/input2_table.csv'\n",
187 "output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'\"\"\""
188 ]
189 },
190 {
191 "cell_type": "code",
192 "execution_count": 60,
193 "id": "1e64b025",
194 "metadata": {},
195 "outputs": [],
196 "source": [
197 "# read user AST data with selected 3 columns\n",
198 "strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False)\n",
199 "#strain_mic"
200 ]
201 },
202 {
203 "cell_type": "code",
204 "execution_count": 61,
205 "id": "0d30ddc3",
206 "metadata": {},
207 "outputs": [],
208 "source": [
209 "clsi_bp=pd.read_csv(input_clsi,sep=',')\n",
210 "\n",
211 "#clsi_bp[clsi_bp[['Antibiotics', 'Susceptible']].duplicated()].shape"
212 ]
213 },
214 {
215 "cell_type": "code",
216 "execution_count": 62,
217 "id": "a818676d",
218 "metadata": {},
219 "outputs": [],
220 "source": [
221 "#clsi_bp\n",
222 "#strain_mic"
223 ]
224 },
225 {
226 "cell_type": "code",
227 "execution_count": 64,
228 "id": "c2aae757",
229 "metadata": {},
230 "outputs": [],
231 "source": [
232 "# warn user for duplicate files\n",
233 "input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()]\n",
234 "if (input_dups.shape[0] == 0):\n",
235 " #print( \"No duplicates\")\n",
236 " pass\n",
237 "else:\n",
238 " with open(output_table, \"w\") as file_object:\n",
239 " # Append 'hello' at the end of file\n",
240 " file_object.write('S.No.,Strain name,Antibiotics,MIC\\nInput File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file\\n')\n",
241 " input_dups.to_csv(output_table,na_rep='NA', mode='a')\n",
242 " exit()"
243 ]
244 },
245 {
246 "cell_type": "code",
247 "execution_count": 17,
248 "id": "c6b4c59b",
249 "metadata": {},
250 "outputs": [],
251 "source": [
252 "# convert MIC to numbers sMIC, rMIC\n",
253 "clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))\n",
254 "clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))\n",
255 "clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))"
256 ]
257 },
258 {
259 "cell_type": "code",
260 "execution_count": 18,
261 "id": "d0171f94",
262 "metadata": {},
263 "outputs": [],
264 "source": [
265 "#clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))"
266 ]
267 },
268 {
269 "cell_type": "code",
270 "execution_count": 19,
271 "id": "fe45b2dd",
272 "metadata": {},
273 "outputs": [],
274 "source": [
275 "# Read only numbers in MIC values\n",
276 "#try:\n",
277 "strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\\/]','', x)))\n",
278 "#except TypeError:\n",
279 "# print('Waring: Error in MIC value')"
280 ]
281 },
282 {
283 "cell_type": "code",
284 "execution_count": 20,
285 "id": "ddbbe4d9",
286 "metadata": {},
287 "outputs": [],
288 "source": [
289 "#strain_mic"
290 ]
291 },
292 {
293 "cell_type": "code",
294 "execution_count": 21,
295 "id": "640508f1",
296 "metadata": {},
297 "outputs": [],
298 "source": [
299 "# capitalize each Antibiotic Name for comparision with removing whitespace\n",
300 "strain_mic['Strain name']=strain_mic['Strain name'].str.capitalize().str.replace(\" \",\"\")\n",
301 "strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(\" \",\"\")\n",
302 "\n",
303 "clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(\" \",\"\")"
304 ]
305 },
306 {
307 "cell_type": "code",
308 "execution_count": 22,
309 "id": "b87426f4",
310 "metadata": {},
311 "outputs": [],
312 "source": [
313 "#find duplicate values in input files\n",
314 "dups=strain_mic[strain_mic[['Strain name', 'Antibiotics']].duplicated(keep=False)]\n",
315 "if dups.shape[0] != 0:\n",
316 " print ('Please provide a single MIC value in input file for given duplicates combination of \\'Strain name and Antibiotics\\' to use the tool:-\\n',dups)\n",
317 " #exit()\n",
318 "else:\n",
319 " #compare CLSI Antibiotics only\n",
320 " #result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner', indicator=True)[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','_merge']]\n",
321 " try:\n",
322 " result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']]\n",
323 " except KeyError:\n",
324 " print('Waring: Error in input Values')"
325 ]
326 },
327 {
328 "cell_type": "code",
329 "execution_count": 23,
330 "id": "91bfc94d",
331 "metadata": {},
332 "outputs": [
333 {
334 "data": {
335 "text/html": [
336 "<div>\n",
337 "<style scoped>\n",
338 " .dataframe tbody tr th:only-of-type {\n",
339 " vertical-align: middle;\n",
340 " }\n",
341 "\n",
342 " .dataframe tbody tr th {\n",
343 " vertical-align: top;\n",
344 " }\n",
345 "\n",
346 " .dataframe thead th {\n",
347 " text-align: right;\n",
348 " }\n",
349 "</style>\n",
350 "<table border=\"1\" class=\"dataframe\">\n",
351 " <thead>\n",
352 " <tr style=\"text-align: right;\">\n",
353 " <th></th>\n",
354 " <th>Strain name</th>\n",
355 " <th>Antibiotics</th>\n",
356 " <th>MIC</th>\n",
357 " <th>o_mic</th>\n",
358 " </tr>\n",
359 " </thead>\n",
360 " <tbody>\n",
361 " </tbody>\n",
362 "</table>\n",
363 "</div>"
364 ],
365 "text/plain": [
366 "Empty DataFrame\n",
367 "Columns: [Strain name, Antibiotics, MIC, o_mic]\n",
368 "Index: []"
369 ]
370 },
371 "execution_count": 23,
372 "metadata": {},
373 "output_type": "execute_result"
374 }
375 ],
376 "source": [
377 "dups.head()"
378 ]
379 },
380 {
381 "cell_type": "code",
382 "execution_count": 132,
383 "id": "b171f205",
384 "metadata": {},
385 "outputs": [],
386 "source": [
387 "#compare MIC values and assign Susceptible and Resistant to Strain\n",
388 "#try:\n",
389 "result[['CLSI_profile']] = result[['o_mic','s_mic','r_mic','i_mic']].apply(sus_res_int,axis = 1)\n",
390 "#except ValueError:\n",
391 "# print('Waring: Error in input MIC value')"
392 ]
393 },
394 {
395 "cell_type": "code",
396 "execution_count": 133,
397 "id": "3336fd92",
398 "metadata": {},
399 "outputs": [],
400 "source": [
401 "#result"
402 ]
403 },
404 {
405 "cell_type": "code",
406 "execution_count": 134,
407 "id": "f0dacfd1",
408 "metadata": {},
409 "outputs": [],
410 "source": [
411 "#result[['Strain name', 'Antibiotics', 'MIC','s_mic','r_mic','CLSI_profile']].to_csv(output_profile,sep=',', index=False, encoding='utf-8-sig')"
412 ]
413 },
414 {
415 "cell_type": "code",
416 "execution_count": 135,
417 "id": "3d8d03f7",
418 "metadata": {},
419 "outputs": [],
420 "source": [
421 "#create a pivot table for ASIST\n",
422 "table=result[['Strain name', 'Antibiotics','CLSI_profile']].drop_duplicates()\n",
423 "result_table=pd.pivot_table(table, values ='CLSI_profile', index =['Strain name'],columns =['Antibiotics'], aggfunc = lambda x: ' '.join(x))"
424 ]
425 },
426 {
427 "cell_type": "code",
428 "execution_count": 136,
429 "id": "7d7223a3",
430 "metadata": {},
431 "outputs": [],
432 "source": [
433 "#result_table"
434 ]
435 },
436 {
437 "cell_type": "code",
438 "execution_count": 137,
439 "id": "8a41b2ef",
440 "metadata": {},
441 "outputs": [],
442 "source": [
443 "#result_table.to_csv(output_table,na_rep='NA')"
444 ]
445 },
446 {
447 "cell_type": "code",
448 "execution_count": 138,
449 "id": "8c9e5f87",
450 "metadata": {},
451 "outputs": [],
452 "source": [
453 "# reorder the Antibiotics for ASIST\n",
454 "clsi_ab=['Amikacin','Tobramycin','Gentamycin','Netilmicin','Imipenem','Meropenem','Doripenem','Ciprofloxacin','Levofloxacin',\n",
455 " 'Piperacillin/tazobactam','Ticarcillin/clavulanicacid','Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime',\n",
456 " 'Trimethoprim/sulfamethoxazole','Ampicillin/sulbactam','Colistin','Polymyxinb','Tetracycline','Doxicycline ',\n",
457 " 'Minocycline']\n",
458 "result_selected=result_table.filter(clsi_ab)"
459 ]
460 },
461 {
462 "cell_type": "code",
463 "execution_count": 139,
464 "id": "cdf43afb",
465 "metadata": {},
466 "outputs": [],
467 "source": [
468 "#print(result_selected.shape, result_table.shape)"
469 ]
470 },
471 {
472 "cell_type": "code",
473 "execution_count": 140,
474 "id": "c4c4df30",
475 "metadata": {},
476 "outputs": [],
477 "source": [
478 "#result_selected.insert(0,'Resistance_phenotype','')"
479 ]
480 },
481 {
482 "cell_type": "code",
483 "execution_count": 141,
484 "id": "9adb2703",
485 "metadata": {},
486 "outputs": [],
487 "source": [
488 "#rename headers\n",
489 "result_selected=result_selected.rename(columns = {'Ticarcillin/clavulanicacid':'Ticarcillin/clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} )"
490 ]
491 },
492 {
493 "cell_type": "code",
494 "execution_count": 142,
495 "id": "50e6cf5f",
496 "metadata": {},
497 "outputs": [],
498 "source": [
499 "#result_selected"
500 ]
501 },
502 {
503 "cell_type": "code",
504 "execution_count": 144,
505 "id": "2833671c",
506 "metadata": {},
507 "outputs": [],
508 "source": [
509 "result_selected.to_csv(output_table,na_rep='NA')"
510 ]
511 }
512 ],
513 "metadata": {
514 "kernelspec": {
515 "display_name": "Python 3",
516 "language": "python",
517 "name": "python3"
518 },
519 "language_info": {
520 "codemirror_mode": {
521 "name": "ipython",
522 "version": 3
523 },
524 "file_extension": ".py",
525 "mimetype": "text/x-python",
526 "name": "python",
527 "nbconvert_exporter": "python",
528 "pygments_lexer": "ipython3",
529 "version": "3.7.10"
530 }
531 },
532 "nbformat": 4,
533 "nbformat_minor": 5
534 }