Mercurial > repos > rakesh4osdd > clsi_profile
view clsi_profile_type2_linux.ipynb @ 11:7dcc0e93288b draft default tip
"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit f590c3b1d71a9b8f2030909fa488b4ac0c3caed8-dirty"
author | rakesh4osdd |
---|---|
date | Wed, 30 Jun 2021 07:13:29 +0000 |
parents | |
children |
line wrap: on
line source
{ "cells": [ { "cell_type": "code", "execution_count": 7, "id": "9aa0a6f7", "metadata": {}, "outputs": [], "source": [ "# ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics\n", "# By rakesh4osdd@gmail.com, 06-Jun-2021\n", "import pandas as pd\n", "import re\n", "import sys" ] }, { "cell_type": "code", "execution_count": 8, "id": "9af8387e", "metadata": {}, "outputs": [], "source": [ "#print(pd.__version__, re.__version__)" ] }, { "cell_type": "code", "execution_count": 9, "id": "73d0783c", "metadata": {}, "outputs": [], "source": [ "# compare two MIC value strings\n", "def check_mic(mic1,mic2,mic_type):\n", " #print(mic1,mic2,mic_type)\n", " try:\n", " if '/' in mic1:\n", " m1a = mic1.split('/')[0]\n", " m1b = mic1.split('/')[1]\n", " if float(m1a)==0 or float(m1b)==0:\n", " strain_type='Strain could not be classified'\n", " return(strain_type) \n", " elif '/' in mic2:\n", " m1a = mic1\n", " if float(m1a)==0:\n", " strain_type='Strain could not be classified'\n", " return(strain_type) \n", " m1b = '1'\n", " elif float(mic1)==0:\n", " strain_type='Strain could not be classified'\n", " return(strain_type)\n", " else:\n", " m1a = mic1\n", " \n", " if '-' in mic2:\n", " m2a = mic2.split('-')[0]\n", " m2b = mic2.split('-')[1] \n", " \n", " except ValueError:\n", " strain_type='Strain could not be classified' \n", " return(strain_type)\n", " try:\n", " if '-' in mic2 and mic_type == 'i': # for intermediate only\n", " if '/' in mic2:\n", " m2a = mic2.split('-')[0].split('/')[0]\n", " m2b = mic2.split('-')[0].split('/')[1]\n", " m2aa = mic2.split('-')[1].split('/')[0]\n", " m2bb = mic2.split('-')[1].split('/')[1]\n", " if (float(m2aa)>=float(m1a)>=float(m2a) and float(m2bb)>=float(m1b)>=float(m2b)):\n", " #print('intermediate')\n", " m_type='Intermediate'\n", " else:\n", " #print('not define')\n", " m_type='Strain could not be classified'\n", " else:\n", " m2a = mic2.split('-')[0]\n", " m2b = mic2.split('-')[1] \n", " if (float(m2b)>=float(m1a)>=float(m2a)):\n", " #print('intermediate')\n", " m_type='Intermediate'\n", " else:\n", " #print('not define')\n", " m_type='Strain could not be classified' \n", " #print (m1a,m1b,m2a,m2b,m2aa,m2bb)\n", " elif '/' in mic2:\n", " m2a = mic2.split('/')[0]\n", " m2b = mic2.split('/')[1]\n", " #print(m1a,m1b,m2a,m2b,mic_type)\n", " if (mic_type=='s' and (float(m1a)<=float(m2a) and float(m1b)<=float(m2b))):\n", " m_type='Susceptible'\n", " elif (mic_type=='r' and (float(m1a)>=float(m2a) and float(m1b)>=float(m2b))):\n", " m_type='Resistant'\n", " elif (mic_type=='i' and (float(m1a)==float(m2a) and float(m1b)==float(m2b))):\n", " m_type='Intermediate'\n", " else:\n", " m_type='Strain could not be classified'\n", " elif '-' in mic2:\n", " m_type='Strain could not be classified'\n", " else:\n", " m2a=mic2\n", " if (mic_type=='s' and (float(m1a)<=float(m2a))):\n", " m_type='Susceptible'\n", " elif (mic_type=='r' and (float(m1a)>=float(m2a))):\n", " m_type='Resistant'\n", " elif (mic_type=='i' and (float(m1a)==float(m2a))):\n", " m_type='Intermediate'\n", " else:\n", " m_type='Strain could not be classified' \n", " except IndexError:\n", " strain_type='Strain could not be classified' \n", " return(strain_type)\n", " \n", " return(m_type)\n", "\n", "#check_mic('65','32-64','i')" ] }, { "cell_type": "code", "execution_count": 10, "id": "4d2ab1b1", "metadata": {}, "outputs": [], "source": [ "# compare MIC value in pandas list\n", "def sus_res_int(mic):\n", " #print(mic)\n", " o_mic = mic[0].replace(' ', '')\n", " s_mic = mic[1].replace(' ', '')\n", " r_mic = mic[2].replace(' ', '')\n", " i_mic = mic[3].replace(' ', '')\n", " try:\n", " if check_mic(o_mic,s_mic,'s')=='Susceptible':\n", " strain_type='Susceptible'\n", " elif check_mic(o_mic,r_mic,'r')=='Resistant':\n", " strain_type='Resistant'\n", " elif check_mic(o_mic,i_mic,'i')=='Intermediate':\n", " strain_type='Intermediate' \n", " else:\n", " strain_type='Strain could not be classified'\n", " except ValueError:\n", " strain_type='Strain could not be classified' \n", " return(strain_type)\n", "\n", "#mic=['128','16/4','128/4','32/4-64/4']\n", "#sus_res_int(mic)" ] }, { "cell_type": "code", "execution_count": 11, "id": "0e22ef0d", "metadata": {}, "outputs": [ { "ename": "IndexError", "evalue": "list index out of range", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m<ipython-input-11-26f141926f14>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0minput_user\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0minput_clsi\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0moutput_table\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mIndexError\u001b[0m: list index out of range" ] } ], "source": [ "# for input argument\n", "input_user = sys.argv[1]\n", "input_clsi = sys.argv[2]\n", "output_table = sys.argv[3]" ] }, { "cell_type": "code", "execution_count": 49, "id": "21d5fe63", "metadata": {}, "outputs": [], "source": [ "\"\"\"input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv'\n", "#input_user='test-data/input2.csv'\n", "input_clsi='test-data/clsi.csv'\n", "output_profile='test-data/input2_profile.csv'\n", "#output_table='test-data/input2_table.csv'\n", "output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'\"\"\"" ] }, { "cell_type": "code", "execution_count": 60, "id": "1e64b025", "metadata": {}, "outputs": [], "source": [ "# read user AST data with selected 3 columns\n", "strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False)\n", "#strain_mic" ] }, { "cell_type": "code", "execution_count": 61, "id": "0d30ddc3", "metadata": {}, "outputs": [], "source": [ "clsi_bp=pd.read_csv(input_clsi,sep=',')\n", "\n", "#clsi_bp[clsi_bp[['Antibiotics', 'Susceptible']].duplicated()].shape" ] }, { "cell_type": "code", "execution_count": 62, "id": "a818676d", "metadata": {}, "outputs": [], "source": [ "#clsi_bp\n", "#strain_mic" ] }, { "cell_type": "code", "execution_count": 64, "id": "c2aae757", "metadata": {}, "outputs": [], "source": [ "# warn user for duplicate files\n", "input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()]\n", "if (input_dups.shape[0] == 0):\n", " #print( \"No duplicates\")\n", " pass\n", "else:\n", " with open(output_table, \"w\") as file_object:\n", " # Append 'hello' at the end of file\n", " file_object.write('S.No.,Strain name,Antibiotics,MIC\\nInput File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file\\n')\n", " input_dups.to_csv(output_table,na_rep='NA', mode='a')\n", " exit()" ] }, { "cell_type": "code", "execution_count": 17, "id": "c6b4c59b", "metadata": {}, "outputs": [], "source": [ "# convert MIC to numbers sMIC, rMIC\n", "clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))\n", "clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))\n", "clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))" ] }, { "cell_type": "code", "execution_count": 18, "id": "d0171f94", "metadata": {}, "outputs": [], "source": [ "#clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))" ] }, { "cell_type": "code", "execution_count": 19, "id": "fe45b2dd", "metadata": {}, "outputs": [], "source": [ "# Read only numbers in MIC values\n", "#try:\n", "strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\\/]','', x)))\n", "#except TypeError:\n", "# print('Waring: Error in MIC value')" ] }, { "cell_type": "code", "execution_count": 20, "id": "ddbbe4d9", "metadata": {}, "outputs": [], "source": [ "#strain_mic" ] }, { "cell_type": "code", "execution_count": 21, "id": "640508f1", "metadata": {}, "outputs": [], "source": [ "# capitalize each Antibiotic Name for comparision with removing whitespace\n", "strain_mic['Strain name']=strain_mic['Strain name'].str.capitalize().str.replace(\" \",\"\")\n", "strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(\" \",\"\")\n", "\n", "clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(\" \",\"\")" ] }, { "cell_type": "code", "execution_count": 22, "id": "b87426f4", "metadata": {}, "outputs": [], "source": [ "#find duplicate values in input files\n", "dups=strain_mic[strain_mic[['Strain name', 'Antibiotics']].duplicated(keep=False)]\n", "if dups.shape[0] != 0:\n", " print ('Please provide a single MIC value in input file for given duplicates combination of \\'Strain name and Antibiotics\\' to use the tool:-\\n',dups)\n", " #exit()\n", "else:\n", " #compare CLSI Antibiotics only\n", " #result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner', indicator=True)[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','_merge']]\n", " try:\n", " result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']]\n", " except KeyError:\n", " print('Waring: Error in input Values')" ] }, { "cell_type": "code", "execution_count": 23, "id": "91bfc94d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Strain name</th>\n", " <th>Antibiotics</th>\n", " <th>MIC</th>\n", " <th>o_mic</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ "Empty DataFrame\n", "Columns: [Strain name, Antibiotics, MIC, o_mic]\n", "Index: []" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dups.head()" ] }, { "cell_type": "code", "execution_count": 132, "id": "b171f205", "metadata": {}, "outputs": [], "source": [ "#compare MIC values and assign Susceptible and Resistant to Strain\n", "#try:\n", "result[['CLSI_profile']] = result[['o_mic','s_mic','r_mic','i_mic']].apply(sus_res_int,axis = 1)\n", "#except ValueError:\n", "# print('Waring: Error in input MIC value')" ] }, { "cell_type": "code", "execution_count": 133, "id": "3336fd92", "metadata": {}, "outputs": [], "source": [ "#result" ] }, { "cell_type": "code", "execution_count": 134, "id": "f0dacfd1", "metadata": {}, "outputs": [], "source": [ "#result[['Strain name', 'Antibiotics', 'MIC','s_mic','r_mic','CLSI_profile']].to_csv(output_profile,sep=',', index=False, encoding='utf-8-sig')" ] }, { "cell_type": "code", "execution_count": 135, "id": "3d8d03f7", "metadata": {}, "outputs": [], "source": [ "#create a pivot table for ASIST\n", "table=result[['Strain name', 'Antibiotics','CLSI_profile']].drop_duplicates()\n", "result_table=pd.pivot_table(table, values ='CLSI_profile', index =['Strain name'],columns =['Antibiotics'], aggfunc = lambda x: ' '.join(x))" ] }, { "cell_type": "code", "execution_count": 136, "id": "7d7223a3", "metadata": {}, "outputs": [], "source": [ "#result_table" ] }, { "cell_type": "code", "execution_count": 137, "id": "8a41b2ef", "metadata": {}, "outputs": [], "source": [ "#result_table.to_csv(output_table,na_rep='NA')" ] }, { "cell_type": "code", "execution_count": 138, "id": "8c9e5f87", "metadata": {}, "outputs": [], "source": [ "# reorder the Antibiotics for ASIST\n", "clsi_ab=['Amikacin','Tobramycin','Gentamycin','Netilmicin','Imipenem','Meropenem','Doripenem','Ciprofloxacin','Levofloxacin',\n", " 'Piperacillin/tazobactam','Ticarcillin/clavulanicacid','Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime',\n", " 'Trimethoprim/sulfamethoxazole','Ampicillin/sulbactam','Colistin','Polymyxinb','Tetracycline','Doxicycline ',\n", " 'Minocycline']\n", "result_selected=result_table.filter(clsi_ab)" ] }, { "cell_type": "code", "execution_count": 139, "id": "cdf43afb", "metadata": {}, "outputs": [], "source": [ "#print(result_selected.shape, result_table.shape)" ] }, { "cell_type": "code", "execution_count": 140, "id": "c4c4df30", "metadata": {}, "outputs": [], "source": [ "#result_selected.insert(0,'Resistance_phenotype','')" ] }, { "cell_type": "code", "execution_count": 141, "id": "9adb2703", "metadata": {}, "outputs": [], "source": [ "#rename headers\n", "result_selected=result_selected.rename(columns = {'Ticarcillin/clavulanicacid':'Ticarcillin/clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} )" ] }, { "cell_type": "code", "execution_count": 142, "id": "50e6cf5f", "metadata": {}, "outputs": [], "source": [ "#result_selected" ] }, { "cell_type": "code", "execution_count": 144, "id": "2833671c", "metadata": {}, "outputs": [], "source": [ "result_selected.to_csv(output_table,na_rep='NA')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 5 }