Mercurial > repos > rakesh4osdd > clsi_profile
comparison clsi_profile_type2_linux.ipynb @ 11:7dcc0e93288b draft default tip
"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit f590c3b1d71a9b8f2030909fa488b4ac0c3caed8-dirty"
author | rakesh4osdd |
---|---|
date | Wed, 30 Jun 2021 07:13:29 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
10:116ebdf92e39 | 11:7dcc0e93288b |
---|---|
1 { | |
2 "cells": [ | |
3 { | |
4 "cell_type": "code", | |
5 "execution_count": 7, | |
6 "id": "9aa0a6f7", | |
7 "metadata": {}, | |
8 "outputs": [], | |
9 "source": [ | |
10 "# ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics\n", | |
11 "# By rakesh4osdd@gmail.com, 06-Jun-2021\n", | |
12 "import pandas as pd\n", | |
13 "import re\n", | |
14 "import sys" | |
15 ] | |
16 }, | |
17 { | |
18 "cell_type": "code", | |
19 "execution_count": 8, | |
20 "id": "9af8387e", | |
21 "metadata": {}, | |
22 "outputs": [], | |
23 "source": [ | |
24 "#print(pd.__version__, re.__version__)" | |
25 ] | |
26 }, | |
27 { | |
28 "cell_type": "code", | |
29 "execution_count": 9, | |
30 "id": "73d0783c", | |
31 "metadata": {}, | |
32 "outputs": [], | |
33 "source": [ | |
34 "# compare two MIC value strings\n", | |
35 "def check_mic(mic1,mic2,mic_type):\n", | |
36 " #print(mic1,mic2,mic_type)\n", | |
37 " try:\n", | |
38 " if '/' in mic1:\n", | |
39 " m1a = mic1.split('/')[0]\n", | |
40 " m1b = mic1.split('/')[1]\n", | |
41 " if float(m1a)==0 or float(m1b)==0:\n", | |
42 " strain_type='Strain could not be classified'\n", | |
43 " return(strain_type) \n", | |
44 " elif '/' in mic2:\n", | |
45 " m1a = mic1\n", | |
46 " if float(m1a)==0:\n", | |
47 " strain_type='Strain could not be classified'\n", | |
48 " return(strain_type) \n", | |
49 " m1b = '1'\n", | |
50 " elif float(mic1)==0:\n", | |
51 " strain_type='Strain could not be classified'\n", | |
52 " return(strain_type)\n", | |
53 " else:\n", | |
54 " m1a = mic1\n", | |
55 " \n", | |
56 " if '-' in mic2:\n", | |
57 " m2a = mic2.split('-')[0]\n", | |
58 " m2b = mic2.split('-')[1] \n", | |
59 " \n", | |
60 " except ValueError:\n", | |
61 " strain_type='Strain could not be classified' \n", | |
62 " return(strain_type)\n", | |
63 " try:\n", | |
64 " if '-' in mic2 and mic_type == 'i': # for intermediate only\n", | |
65 " if '/' in mic2:\n", | |
66 " m2a = mic2.split('-')[0].split('/')[0]\n", | |
67 " m2b = mic2.split('-')[0].split('/')[1]\n", | |
68 " m2aa = mic2.split('-')[1].split('/')[0]\n", | |
69 " m2bb = mic2.split('-')[1].split('/')[1]\n", | |
70 " if (float(m2aa)>=float(m1a)>=float(m2a) and float(m2bb)>=float(m1b)>=float(m2b)):\n", | |
71 " #print('intermediate')\n", | |
72 " m_type='Intermediate'\n", | |
73 " else:\n", | |
74 " #print('not define')\n", | |
75 " m_type='Strain could not be classified'\n", | |
76 " else:\n", | |
77 " m2a = mic2.split('-')[0]\n", | |
78 " m2b = mic2.split('-')[1] \n", | |
79 " if (float(m2b)>=float(m1a)>=float(m2a)):\n", | |
80 " #print('intermediate')\n", | |
81 " m_type='Intermediate'\n", | |
82 " else:\n", | |
83 " #print('not define')\n", | |
84 " m_type='Strain could not be classified' \n", | |
85 " #print (m1a,m1b,m2a,m2b,m2aa,m2bb)\n", | |
86 " elif '/' in mic2:\n", | |
87 " m2a = mic2.split('/')[0]\n", | |
88 " m2b = mic2.split('/')[1]\n", | |
89 " #print(m1a,m1b,m2a,m2b,mic_type)\n", | |
90 " if (mic_type=='s' and (float(m1a)<=float(m2a) and float(m1b)<=float(m2b))):\n", | |
91 " m_type='Susceptible'\n", | |
92 " elif (mic_type=='r' and (float(m1a)>=float(m2a) and float(m1b)>=float(m2b))):\n", | |
93 " m_type='Resistant'\n", | |
94 " elif (mic_type=='i' and (float(m1a)==float(m2a) and float(m1b)==float(m2b))):\n", | |
95 " m_type='Intermediate'\n", | |
96 " else:\n", | |
97 " m_type='Strain could not be classified'\n", | |
98 " elif '-' in mic2:\n", | |
99 " m_type='Strain could not be classified'\n", | |
100 " else:\n", | |
101 " m2a=mic2\n", | |
102 " if (mic_type=='s' and (float(m1a)<=float(m2a))):\n", | |
103 " m_type='Susceptible'\n", | |
104 " elif (mic_type=='r' and (float(m1a)>=float(m2a))):\n", | |
105 " m_type='Resistant'\n", | |
106 " elif (mic_type=='i' and (float(m1a)==float(m2a))):\n", | |
107 " m_type='Intermediate'\n", | |
108 " else:\n", | |
109 " m_type='Strain could not be classified' \n", | |
110 " except IndexError:\n", | |
111 " strain_type='Strain could not be classified' \n", | |
112 " return(strain_type)\n", | |
113 " \n", | |
114 " return(m_type)\n", | |
115 "\n", | |
116 "#check_mic('65','32-64','i')" | |
117 ] | |
118 }, | |
119 { | |
120 "cell_type": "code", | |
121 "execution_count": 10, | |
122 "id": "4d2ab1b1", | |
123 "metadata": {}, | |
124 "outputs": [], | |
125 "source": [ | |
126 "# compare MIC value in pandas list\n", | |
127 "def sus_res_int(mic):\n", | |
128 " #print(mic)\n", | |
129 " o_mic = mic[0].replace(' ', '')\n", | |
130 " s_mic = mic[1].replace(' ', '')\n", | |
131 " r_mic = mic[2].replace(' ', '')\n", | |
132 " i_mic = mic[3].replace(' ', '')\n", | |
133 " try:\n", | |
134 " if check_mic(o_mic,s_mic,'s')=='Susceptible':\n", | |
135 " strain_type='Susceptible'\n", | |
136 " elif check_mic(o_mic,r_mic,'r')=='Resistant':\n", | |
137 " strain_type='Resistant'\n", | |
138 " elif check_mic(o_mic,i_mic,'i')=='Intermediate':\n", | |
139 " strain_type='Intermediate' \n", | |
140 " else:\n", | |
141 " strain_type='Strain could not be classified'\n", | |
142 " except ValueError:\n", | |
143 " strain_type='Strain could not be classified' \n", | |
144 " return(strain_type)\n", | |
145 "\n", | |
146 "#mic=['128','16/4','128/4','32/4-64/4']\n", | |
147 "#sus_res_int(mic)" | |
148 ] | |
149 }, | |
150 { | |
151 "cell_type": "code", | |
152 "execution_count": 11, | |
153 "id": "0e22ef0d", | |
154 "metadata": {}, | |
155 "outputs": [ | |
156 { | |
157 "ename": "IndexError", | |
158 "evalue": "list index out of range", | |
159 "output_type": "error", | |
160 "traceback": [ | |
161 "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
162 "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", | |
163 "\u001b[0;32m<ipython-input-11-26f141926f14>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0minput_user\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0minput_clsi\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0moutput_table\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
164 "\u001b[0;31mIndexError\u001b[0m: list index out of range" | |
165 ] | |
166 } | |
167 ], | |
168 "source": [ | |
169 "# for input argument\n", | |
170 "input_user = sys.argv[1]\n", | |
171 "input_clsi = sys.argv[2]\n", | |
172 "output_table = sys.argv[3]" | |
173 ] | |
174 }, | |
175 { | |
176 "cell_type": "code", | |
177 "execution_count": 49, | |
178 "id": "21d5fe63", | |
179 "metadata": {}, | |
180 "outputs": [], | |
181 "source": [ | |
182 "\"\"\"input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv'\n", | |
183 "#input_user='test-data/input2.csv'\n", | |
184 "input_clsi='test-data/clsi.csv'\n", | |
185 "output_profile='test-data/input2_profile.csv'\n", | |
186 "#output_table='test-data/input2_table.csv'\n", | |
187 "output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'\"\"\"" | |
188 ] | |
189 }, | |
190 { | |
191 "cell_type": "code", | |
192 "execution_count": 60, | |
193 "id": "1e64b025", | |
194 "metadata": {}, | |
195 "outputs": [], | |
196 "source": [ | |
197 "# read user AST data with selected 3 columns\n", | |
198 "strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False)\n", | |
199 "#strain_mic" | |
200 ] | |
201 }, | |
202 { | |
203 "cell_type": "code", | |
204 "execution_count": 61, | |
205 "id": "0d30ddc3", | |
206 "metadata": {}, | |
207 "outputs": [], | |
208 "source": [ | |
209 "clsi_bp=pd.read_csv(input_clsi,sep=',')\n", | |
210 "\n", | |
211 "#clsi_bp[clsi_bp[['Antibiotics', 'Susceptible']].duplicated()].shape" | |
212 ] | |
213 }, | |
214 { | |
215 "cell_type": "code", | |
216 "execution_count": 62, | |
217 "id": "a818676d", | |
218 "metadata": {}, | |
219 "outputs": [], | |
220 "source": [ | |
221 "#clsi_bp\n", | |
222 "#strain_mic" | |
223 ] | |
224 }, | |
225 { | |
226 "cell_type": "code", | |
227 "execution_count": 64, | |
228 "id": "c2aae757", | |
229 "metadata": {}, | |
230 "outputs": [], | |
231 "source": [ | |
232 "# warn user for duplicate files\n", | |
233 "input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()]\n", | |
234 "if (input_dups.shape[0] == 0):\n", | |
235 " #print( \"No duplicates\")\n", | |
236 " pass\n", | |
237 "else:\n", | |
238 " with open(output_table, \"w\") as file_object:\n", | |
239 " # Append 'hello' at the end of file\n", | |
240 " file_object.write('S.No.,Strain name,Antibiotics,MIC\\nInput File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file\\n')\n", | |
241 " input_dups.to_csv(output_table,na_rep='NA', mode='a')\n", | |
242 " exit()" | |
243 ] | |
244 }, | |
245 { | |
246 "cell_type": "code", | |
247 "execution_count": 17, | |
248 "id": "c6b4c59b", | |
249 "metadata": {}, | |
250 "outputs": [], | |
251 "source": [ | |
252 "# convert MIC to numbers sMIC, rMIC\n", | |
253 "clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))\n", | |
254 "clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))\n", | |
255 "clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))" | |
256 ] | |
257 }, | |
258 { | |
259 "cell_type": "code", | |
260 "execution_count": 18, | |
261 "id": "d0171f94", | |
262 "metadata": {}, | |
263 "outputs": [], | |
264 "source": [ | |
265 "#clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))" | |
266 ] | |
267 }, | |
268 { | |
269 "cell_type": "code", | |
270 "execution_count": 19, | |
271 "id": "fe45b2dd", | |
272 "metadata": {}, | |
273 "outputs": [], | |
274 "source": [ | |
275 "# Read only numbers in MIC values\n", | |
276 "#try:\n", | |
277 "strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\\/]','', x)))\n", | |
278 "#except TypeError:\n", | |
279 "# print('Waring: Error in MIC value')" | |
280 ] | |
281 }, | |
282 { | |
283 "cell_type": "code", | |
284 "execution_count": 20, | |
285 "id": "ddbbe4d9", | |
286 "metadata": {}, | |
287 "outputs": [], | |
288 "source": [ | |
289 "#strain_mic" | |
290 ] | |
291 }, | |
292 { | |
293 "cell_type": "code", | |
294 "execution_count": 21, | |
295 "id": "640508f1", | |
296 "metadata": {}, | |
297 "outputs": [], | |
298 "source": [ | |
299 "# capitalize each Antibiotic Name for comparision with removing whitespace\n", | |
300 "strain_mic['Strain name']=strain_mic['Strain name'].str.capitalize().str.replace(\" \",\"\")\n", | |
301 "strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(\" \",\"\")\n", | |
302 "\n", | |
303 "clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(\" \",\"\")" | |
304 ] | |
305 }, | |
306 { | |
307 "cell_type": "code", | |
308 "execution_count": 22, | |
309 "id": "b87426f4", | |
310 "metadata": {}, | |
311 "outputs": [], | |
312 "source": [ | |
313 "#find duplicate values in input files\n", | |
314 "dups=strain_mic[strain_mic[['Strain name', 'Antibiotics']].duplicated(keep=False)]\n", | |
315 "if dups.shape[0] != 0:\n", | |
316 " print ('Please provide a single MIC value in input file for given duplicates combination of \\'Strain name and Antibiotics\\' to use the tool:-\\n',dups)\n", | |
317 " #exit()\n", | |
318 "else:\n", | |
319 " #compare CLSI Antibiotics only\n", | |
320 " #result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner', indicator=True)[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','_merge']]\n", | |
321 " try:\n", | |
322 " result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']]\n", | |
323 " except KeyError:\n", | |
324 " print('Waring: Error in input Values')" | |
325 ] | |
326 }, | |
327 { | |
328 "cell_type": "code", | |
329 "execution_count": 23, | |
330 "id": "91bfc94d", | |
331 "metadata": {}, | |
332 "outputs": [ | |
333 { | |
334 "data": { | |
335 "text/html": [ | |
336 "<div>\n", | |
337 "<style scoped>\n", | |
338 " .dataframe tbody tr th:only-of-type {\n", | |
339 " vertical-align: middle;\n", | |
340 " }\n", | |
341 "\n", | |
342 " .dataframe tbody tr th {\n", | |
343 " vertical-align: top;\n", | |
344 " }\n", | |
345 "\n", | |
346 " .dataframe thead th {\n", | |
347 " text-align: right;\n", | |
348 " }\n", | |
349 "</style>\n", | |
350 "<table border=\"1\" class=\"dataframe\">\n", | |
351 " <thead>\n", | |
352 " <tr style=\"text-align: right;\">\n", | |
353 " <th></th>\n", | |
354 " <th>Strain name</th>\n", | |
355 " <th>Antibiotics</th>\n", | |
356 " <th>MIC</th>\n", | |
357 " <th>o_mic</th>\n", | |
358 " </tr>\n", | |
359 " </thead>\n", | |
360 " <tbody>\n", | |
361 " </tbody>\n", | |
362 "</table>\n", | |
363 "</div>" | |
364 ], | |
365 "text/plain": [ | |
366 "Empty DataFrame\n", | |
367 "Columns: [Strain name, Antibiotics, MIC, o_mic]\n", | |
368 "Index: []" | |
369 ] | |
370 }, | |
371 "execution_count": 23, | |
372 "metadata": {}, | |
373 "output_type": "execute_result" | |
374 } | |
375 ], | |
376 "source": [ | |
377 "dups.head()" | |
378 ] | |
379 }, | |
380 { | |
381 "cell_type": "code", | |
382 "execution_count": 132, | |
383 "id": "b171f205", | |
384 "metadata": {}, | |
385 "outputs": [], | |
386 "source": [ | |
387 "#compare MIC values and assign Susceptible and Resistant to Strain\n", | |
388 "#try:\n", | |
389 "result[['CLSI_profile']] = result[['o_mic','s_mic','r_mic','i_mic']].apply(sus_res_int,axis = 1)\n", | |
390 "#except ValueError:\n", | |
391 "# print('Waring: Error in input MIC value')" | |
392 ] | |
393 }, | |
394 { | |
395 "cell_type": "code", | |
396 "execution_count": 133, | |
397 "id": "3336fd92", | |
398 "metadata": {}, | |
399 "outputs": [], | |
400 "source": [ | |
401 "#result" | |
402 ] | |
403 }, | |
404 { | |
405 "cell_type": "code", | |
406 "execution_count": 134, | |
407 "id": "f0dacfd1", | |
408 "metadata": {}, | |
409 "outputs": [], | |
410 "source": [ | |
411 "#result[['Strain name', 'Antibiotics', 'MIC','s_mic','r_mic','CLSI_profile']].to_csv(output_profile,sep=',', index=False, encoding='utf-8-sig')" | |
412 ] | |
413 }, | |
414 { | |
415 "cell_type": "code", | |
416 "execution_count": 135, | |
417 "id": "3d8d03f7", | |
418 "metadata": {}, | |
419 "outputs": [], | |
420 "source": [ | |
421 "#create a pivot table for ASIST\n", | |
422 "table=result[['Strain name', 'Antibiotics','CLSI_profile']].drop_duplicates()\n", | |
423 "result_table=pd.pivot_table(table, values ='CLSI_profile', index =['Strain name'],columns =['Antibiotics'], aggfunc = lambda x: ' '.join(x))" | |
424 ] | |
425 }, | |
426 { | |
427 "cell_type": "code", | |
428 "execution_count": 136, | |
429 "id": "7d7223a3", | |
430 "metadata": {}, | |
431 "outputs": [], | |
432 "source": [ | |
433 "#result_table" | |
434 ] | |
435 }, | |
436 { | |
437 "cell_type": "code", | |
438 "execution_count": 137, | |
439 "id": "8a41b2ef", | |
440 "metadata": {}, | |
441 "outputs": [], | |
442 "source": [ | |
443 "#result_table.to_csv(output_table,na_rep='NA')" | |
444 ] | |
445 }, | |
446 { | |
447 "cell_type": "code", | |
448 "execution_count": 138, | |
449 "id": "8c9e5f87", | |
450 "metadata": {}, | |
451 "outputs": [], | |
452 "source": [ | |
453 "# reorder the Antibiotics for ASIST\n", | |
454 "clsi_ab=['Amikacin','Tobramycin','Gentamycin','Netilmicin','Imipenem','Meropenem','Doripenem','Ciprofloxacin','Levofloxacin',\n", | |
455 " 'Piperacillin/tazobactam','Ticarcillin/clavulanicacid','Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime',\n", | |
456 " 'Trimethoprim/sulfamethoxazole','Ampicillin/sulbactam','Colistin','Polymyxinb','Tetracycline','Doxicycline ',\n", | |
457 " 'Minocycline']\n", | |
458 "result_selected=result_table.filter(clsi_ab)" | |
459 ] | |
460 }, | |
461 { | |
462 "cell_type": "code", | |
463 "execution_count": 139, | |
464 "id": "cdf43afb", | |
465 "metadata": {}, | |
466 "outputs": [], | |
467 "source": [ | |
468 "#print(result_selected.shape, result_table.shape)" | |
469 ] | |
470 }, | |
471 { | |
472 "cell_type": "code", | |
473 "execution_count": 140, | |
474 "id": "c4c4df30", | |
475 "metadata": {}, | |
476 "outputs": [], | |
477 "source": [ | |
478 "#result_selected.insert(0,'Resistance_phenotype','')" | |
479 ] | |
480 }, | |
481 { | |
482 "cell_type": "code", | |
483 "execution_count": 141, | |
484 "id": "9adb2703", | |
485 "metadata": {}, | |
486 "outputs": [], | |
487 "source": [ | |
488 "#rename headers\n", | |
489 "result_selected=result_selected.rename(columns = {'Ticarcillin/clavulanicacid':'Ticarcillin/clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} )" | |
490 ] | |
491 }, | |
492 { | |
493 "cell_type": "code", | |
494 "execution_count": 142, | |
495 "id": "50e6cf5f", | |
496 "metadata": {}, | |
497 "outputs": [], | |
498 "source": [ | |
499 "#result_selected" | |
500 ] | |
501 }, | |
502 { | |
503 "cell_type": "code", | |
504 "execution_count": 144, | |
505 "id": "2833671c", | |
506 "metadata": {}, | |
507 "outputs": [], | |
508 "source": [ | |
509 "result_selected.to_csv(output_table,na_rep='NA')" | |
510 ] | |
511 } | |
512 ], | |
513 "metadata": { | |
514 "kernelspec": { | |
515 "display_name": "Python 3", | |
516 "language": "python", | |
517 "name": "python3" | |
518 }, | |
519 "language_info": { | |
520 "codemirror_mode": { | |
521 "name": "ipython", | |
522 "version": 3 | |
523 }, | |
524 "file_extension": ".py", | |
525 "mimetype": "text/x-python", | |
526 "name": "python", | |
527 "nbconvert_exporter": "python", | |
528 "pygments_lexer": "ipython3", | |
529 "version": "3.7.10" | |
530 } | |
531 }, | |
532 "nbformat": 4, | |
533 "nbformat_minor": 5 | |
534 } |