comparison clsi_profile_type2_linux.ipynb @ 5:3c27e5c2a8e9 draft

"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit d4b81c15919b4b87d215eaf1b792c8f251665647"
author rakesh4osdd
date Tue, 29 Jun 2021 12:58:49 +0000
parents 2a5861818faf
children
comparison
equal deleted inserted replaced
4:2a5861818faf 5:3c27e5c2a8e9
1 { 1 {
2 "cells": [ 2 "cells": [
3 { 3 {
4 "cell_type": "code", 4 "cell_type": "code",
5 "execution_count": 115, 5 "execution_count": 7,
6 "id": "9aa0a6f7", 6 "id": "9aa0a6f7",
7 "metadata": {}, 7 "metadata": {},
8 "outputs": [], 8 "outputs": [],
9 "source": [ 9 "source": [
10 "# ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics\n", 10 "# ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics\n",
14 "import sys" 14 "import sys"
15 ] 15 ]
16 }, 16 },
17 { 17 {
18 "cell_type": "code", 18 "cell_type": "code",
19 "execution_count": 116, 19 "execution_count": 8,
20 "id": "9af8387e", 20 "id": "9af8387e",
21 "metadata": {}, 21 "metadata": {},
22 "outputs": [], 22 "outputs": [],
23 "source": [ 23 "source": [
24 "#print(pd.__version__, re.__version__)" 24 "#print(pd.__version__, re.__version__)"
25 ] 25 ]
26 }, 26 },
27 { 27 {
28 "cell_type": "code", 28 "cell_type": "code",
29 "execution_count": 117, 29 "execution_count": 9,
30 "id": "73d0783c", 30 "id": "73d0783c",
31 "metadata": {}, 31 "metadata": {},
32 "outputs": [], 32 "outputs": [],
33 "source": [ 33 "source": [
34 "# compare two MIC value strings\n", 34 "# compare two MIC value strings\n",
116 "#check_mic('65','32-64','i')" 116 "#check_mic('65','32-64','i')"
117 ] 117 ]
118 }, 118 },
119 { 119 {
120 "cell_type": "code", 120 "cell_type": "code",
121 "execution_count": 118, 121 "execution_count": 10,
122 "id": "4d2ab1b1", 122 "id": "4d2ab1b1",
123 "metadata": {}, 123 "metadata": {},
124 "outputs": [], 124 "outputs": [],
125 "source": [ 125 "source": [
126 "# compare MIC value in pandas list\n", 126 "# compare MIC value in pandas list\n",
147 "#sus_res_int(mic)" 147 "#sus_res_int(mic)"
148 ] 148 ]
149 }, 149 },
150 { 150 {
151 "cell_type": "code", 151 "cell_type": "code",
152 "execution_count": 119, 152 "execution_count": 11,
153 "id": "0e22ef0d", 153 "id": "0e22ef0d",
154 "metadata": {}, 154 "metadata": {},
155 "outputs": [ 155 "outputs": [
156 { 156 {
157 "data": { 157 "ename": "IndexError",
158 "text/plain": [ 158 "evalue": "list index out of range",
159 "'# for input argument\\ninput_user = sys.argv[1]\\ninput_clsi = sys.argv[2]\\noutput_table = sys.argv[3]'" 159 "output_type": "error",
160 ] 160 "traceback": [
161 }, 161 "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
162 "execution_count": 119, 162 "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
163 "metadata": {}, 163 "\u001b[0;32m<ipython-input-11-26f141926f14>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0minput_user\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0minput_clsi\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0moutput_table\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
164 "output_type": "execute_result" 164 "\u001b[0;31mIndexError\u001b[0m: list index out of range"
165 ]
165 } 166 }
166 ], 167 ],
167 "source": [ 168 "source": [
168 "# for input argument\n", 169 "# for input argument\n",
169 "input_user = sys.argv[1]\n", 170 "input_user = sys.argv[1]\n",
171 "output_table = sys.argv[3]" 172 "output_table = sys.argv[3]"
172 ] 173 ]
173 }, 174 },
174 { 175 {
175 "cell_type": "code", 176 "cell_type": "code",
176 "execution_count": 3, 177 "execution_count": 49,
177 "id": "21d5fe63", 178 "id": "21d5fe63",
178 "metadata": {}, 179 "metadata": {},
179 "outputs": [ 180 "outputs": [],
180 { 181 "source": [
181 "data": { 182 "\"\"\"input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv'\n",
182 "text/plain": [ 183 "#input_user='test-data/input2.csv'\n",
183 "\"#input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv'\\ninput_user='test-data/input2.csv'\\ninput_clsi='test-data/clsi.csv'\\noutput_profile='test-data/input2_profile.csv'\\noutput_table='test-data/input2_table.csv'\\n#output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'\""
184 ]
185 },
186 "execution_count": 3,
187 "metadata": {},
188 "output_type": "execute_result"
189 }
190 ],
191 "source": [
192 "\"\"\"#input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv'\n",
193 "input_user='test-data/input2.csv'\n",
194 "input_clsi='test-data/clsi.csv'\n", 184 "input_clsi='test-data/clsi.csv'\n",
195 "output_profile='test-data/input2_profile.csv'\n", 185 "output_profile='test-data/input2_profile.csv'\n",
196 "output_table='test-data/input2_table.csv'\n", 186 "#output_table='test-data/input2_table.csv'\n",
197 "#output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'\"\"\"" 187 "output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'\"\"\""
198 ] 188 ]
199 }, 189 },
200 { 190 {
201 "cell_type": "code", 191 "cell_type": "code",
202 "execution_count": 146, 192 "execution_count": 60,
203 "id": "1e64b025", 193 "id": "1e64b025",
204 "metadata": {}, 194 "metadata": {},
205 "outputs": [], 195 "outputs": [],
206 "source": [ 196 "source": [
207 "# read user AST data with selected 3 columns\n", 197 "# read user AST data with selected 3 columns\n",
209 "#strain_mic" 199 "#strain_mic"
210 ] 200 ]
211 }, 201 },
212 { 202 {
213 "cell_type": "code", 203 "cell_type": "code",
214 "execution_count": 147, 204 "execution_count": 61,
215 "id": "0d30ddc3", 205 "id": "0d30ddc3",
216 "metadata": {}, 206 "metadata": {},
217 "outputs": [], 207 "outputs": [],
218 "source": [ 208 "source": [
219 "clsi_bp=pd.read_csv(input_clsi,sep=',')\n", 209 "clsi_bp=pd.read_csv(input_clsi,sep=',')\n",
221 "#clsi_bp[clsi_bp[['Antibiotics', 'Susceptible']].duplicated()].shape" 211 "#clsi_bp[clsi_bp[['Antibiotics', 'Susceptible']].duplicated()].shape"
222 ] 212 ]
223 }, 213 },
224 { 214 {
225 "cell_type": "code", 215 "cell_type": "code",
226 "execution_count": 148, 216 "execution_count": 62,
227 "id": "a818676d", 217 "id": "a818676d",
228 "metadata": {}, 218 "metadata": {},
229 "outputs": [], 219 "outputs": [],
230 "source": [ 220 "source": [
231 "#clsi_bp\n", 221 "#clsi_bp\n",
232 "#strain_mic" 222 "#strain_mic"
233 ] 223 ]
234 }, 224 },
235 { 225 {
236 "cell_type": "code", 226 "cell_type": "code",
237 "execution_count": 149, 227 "execution_count": 64,
238 "id": "c2aae757", 228 "id": "c2aae757",
239 "metadata": {}, 229 "metadata": {},
240 "outputs": [], 230 "outputs": [],
241 "source": [ 231 "source": [
232 "# warn user for duplicate files\n",
242 "input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()]\n", 233 "input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()]\n",
243 "if (input_dups.shape[0] == 0):\n", 234 "if (input_dups.shape[0] == 0):\n",
244 " #print( \"No duplicates\")\n", 235 " #print( \"No duplicates\")\n",
245 " pass\n", 236 " pass\n",
246 "else:\n", 237 "else:\n",
247 " input_dups.to_csv(output_table,na_rep='NA')\n", 238 " with open(output_table, \"w\") as file_object:\n",
248 " with open(output_table, \"a\") as file_object:\n",
249 " # Append 'hello' at the end of file\n", 239 " # Append 'hello' at the end of file\n",
250 " file_object.write('Input File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file')\n", 240 " file_object.write('S.No.,Strain name,Antibiotics,MIC\\nInput File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file\\n')\n",
251 " exit()\n", 241 " input_dups.to_csv(output_table,na_rep='NA', mode='a')\n",
252 "#input_dups.head()" 242 " exit()"
253 ] 243 ]
254 }, 244 },
255 { 245 {
256 "cell_type": "code", 246 "cell_type": "code",
257 "execution_count": 125, 247 "execution_count": 17,
258 "id": "c6b4c59b", 248 "id": "c6b4c59b",
259 "metadata": {}, 249 "metadata": {},
260 "outputs": [], 250 "outputs": [],
261 "source": [ 251 "source": [
262 "# convert MIC to numbers sMIC, rMIC\n", 252 "# convert MIC to numbers sMIC, rMIC\n",
265 "clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))" 255 "clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))"
266 ] 256 ]
267 }, 257 },
268 { 258 {
269 "cell_type": "code", 259 "cell_type": "code",
270 "execution_count": 126, 260 "execution_count": 18,
271 "id": "d0171f94", 261 "id": "d0171f94",
272 "metadata": {}, 262 "metadata": {},
273 "outputs": [], 263 "outputs": [],
274 "source": [ 264 "source": [
275 "#clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))" 265 "#clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))"
276 ] 266 ]
277 }, 267 },
278 { 268 {
279 "cell_type": "code", 269 "cell_type": "code",
280 "execution_count": 127, 270 "execution_count": 19,
281 "id": "fe45b2dd", 271 "id": "fe45b2dd",
282 "metadata": {}, 272 "metadata": {},
283 "outputs": [], 273 "outputs": [],
284 "source": [ 274 "source": [
285 "# Read only numbers in MIC values\n", 275 "# Read only numbers in MIC values\n",
289 "# print('Waring: Error in MIC value')" 279 "# print('Waring: Error in MIC value')"
290 ] 280 ]
291 }, 281 },
292 { 282 {
293 "cell_type": "code", 283 "cell_type": "code",
294 "execution_count": 128, 284 "execution_count": 20,
295 "id": "ddbbe4d9", 285 "id": "ddbbe4d9",
296 "metadata": {}, 286 "metadata": {},
297 "outputs": [], 287 "outputs": [],
298 "source": [ 288 "source": [
299 "#strain_mic" 289 "#strain_mic"
300 ] 290 ]
301 }, 291 },
302 { 292 {
303 "cell_type": "code", 293 "cell_type": "code",
304 "execution_count": 129, 294 "execution_count": 21,
305 "id": "640508f1", 295 "id": "640508f1",
306 "metadata": {}, 296 "metadata": {},
307 "outputs": [], 297 "outputs": [],
308 "source": [ 298 "source": [
309 "# capitalize each Antibiotic Name for comparision with removing whitespace\n", 299 "# capitalize each Antibiotic Name for comparision with removing whitespace\n",
313 "clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(\" \",\"\")" 303 "clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(\" \",\"\")"
314 ] 304 ]
315 }, 305 },
316 { 306 {
317 "cell_type": "code", 307 "cell_type": "code",
318 "execution_count": 130, 308 "execution_count": 22,
319 "id": "b87426f4", 309 "id": "b87426f4",
320 "metadata": {}, 310 "metadata": {},
321 "outputs": [], 311 "outputs": [],
322 "source": [ 312 "source": [
323 "#find duplicate values in input files\n", 313 "#find duplicate values in input files\n",
334 " print('Waring: Error in input Values')" 324 " print('Waring: Error in input Values')"
335 ] 325 ]
336 }, 326 },
337 { 327 {
338 "cell_type": "code", 328 "cell_type": "code",
339 "execution_count": 131, 329 "execution_count": 23,
340 "id": "91bfc94d", 330 "id": "91bfc94d",
341 "metadata": {}, 331 "metadata": {},
342 "outputs": [], 332 "outputs": [
343 "source": [ 333 {
344 "#result" 334 "data": {
335 "text/html": [
336 "<div>\n",
337 "<style scoped>\n",
338 " .dataframe tbody tr th:only-of-type {\n",
339 " vertical-align: middle;\n",
340 " }\n",
341 "\n",
342 " .dataframe tbody tr th {\n",
343 " vertical-align: top;\n",
344 " }\n",
345 "\n",
346 " .dataframe thead th {\n",
347 " text-align: right;\n",
348 " }\n",
349 "</style>\n",
350 "<table border=\"1\" class=\"dataframe\">\n",
351 " <thead>\n",
352 " <tr style=\"text-align: right;\">\n",
353 " <th></th>\n",
354 " <th>Strain name</th>\n",
355 " <th>Antibiotics</th>\n",
356 " <th>MIC</th>\n",
357 " <th>o_mic</th>\n",
358 " </tr>\n",
359 " </thead>\n",
360 " <tbody>\n",
361 " </tbody>\n",
362 "</table>\n",
363 "</div>"
364 ],
365 "text/plain": [
366 "Empty DataFrame\n",
367 "Columns: [Strain name, Antibiotics, MIC, o_mic]\n",
368 "Index: []"
369 ]
370 },
371 "execution_count": 23,
372 "metadata": {},
373 "output_type": "execute_result"
374 }
375 ],
376 "source": [
377 "dups.head()"
345 ] 378 ]
346 }, 379 },
347 { 380 {
348 "cell_type": "code", 381 "cell_type": "code",
349 "execution_count": 132, 382 "execution_count": 132,