changeset 11:7dcc0e93288b draft default tip

"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit f590c3b1d71a9b8f2030909fa488b4ac0c3caed8-dirty"
author rakesh4osdd
date Wed, 30 Jun 2021 07:13:29 +0000
parents 116ebdf92e39
children
files LICENSE LICENSE.txt README.md asist_dynamic.ipynb clsi_profile_type2_linux.ipynb tool.sh tool_asist.sh tool_shed.sh
diffstat 8 files changed, 571 insertions(+), 333 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE	Wed Jun 30 07:13:29 2021 +0000
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 RAKESH KUMAR
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/LICENSE.txt	Wed Jun 30 06:59:29 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-This work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. 
-To view a copy of this license, visit http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
\ No newline at end of file
--- a/README.md	Wed Jun 30 06:59:29 2021 +0000
+++ b/README.md	Wed Jun 30 07:13:29 2021 +0000
@@ -12,7 +12,7 @@
 	Strain_1,Phenotype_1,Resistant,Resistant,Resistant
 	Strain_2,Phenotype_2,Resistant,Susceptible,Resistant
 
- Link to the code : https://github.com/rakesh4osdd/clsi_profile , https://github.com/rakesh4osdd/asist .
+ Link to the code : https://github.com/rakesh4osdd/clsi_profile , https://github.com/rakesh4osdd/asist
  
  
 # ASIST tool suite
--- a/asist_dynamic.ipynb	Wed Jun 30 06:59:29 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,312 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1309,
-   "id": "27cfc66f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#ASIST program for phenotype based on Antibiotics profile\n",
-    "# create a profile based on selected antibiotics only\n",
-    "# rakesh4osdd@gmail.com, 14-June-2021"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "75a352b7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import sys\n",
-    "import os\n",
-    "from collections import Counter"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 162,
-   "id": "d66ec0d2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#input_file=sys.argv[1]\n",
-    "#output_file=sys.argv[2]\n",
-    "input_file='test-data/strains_788_input_16k.csv'\n",
-    "output_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/strains_788_output_16k.csv'\n",
-    "#input_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/asist_example15.csv'\n",
-    "#output_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/asist_example15_output.csv'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 163,
-   "id": "bf24c946",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# strain_profile to phenotype condition\n",
-    "def s_phen(sus,res,intm,na,pb_sus):\n",
-    "    if (sus>0 and res==0 and na>=0):\n",
-    "        #print('Possible Susceptible')\n",
-    "        phen='Possible Susceptible'\n",
-    "    elif (sus>=0 and 3<=res<7 and na>=0 and pb_sus==0):\n",
-    "        #print('Possible MDR')\n",
-    "        phen='Possible MDR'\n",
-    "    elif (sus>=0 and 7<=res<9 and na>=0 and pb_sus==0):\n",
-    "        #print('Possible XDR')\n",
-    "        phen='Possible XDR'\n",
-    "    #special cases\n",
-    "    elif (sus>=1 and res>0 and na>=0 and pb_sus==1):\n",
-    "        #print('Possible XDR')\n",
-    "        phen='Possible XDR'\n",
-    "    #special cases\n",
-    "    elif (sus>0 and res==9 and na>=0):\n",
-    "        #print('Possible XDR')\n",
-    "        phen='Possible XDR'\n",
-    "    elif (sus==0 and res==9 and na>=0):\n",
-    "        #print('Possible TDR')\n",
-    "        phen='Possible TDR'\n",
-    "    else:\n",
-    "        #print('Strain could not be classified')\n",
-    "        phen='Strain could not be classified ('+ str(intm)+' | ' + str(na) +')'\n",
-    "    return(phen)\n",
-    "\n",
-    "#print(s_phen(1,9,0,0))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 164,
-   "id": "8bad7d9d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# define Antibiotic groups as per antibiotic of CLSI breakpoints MIC\n",
-    "#Aminoglycoside\n",
-    "cat1=['Amikacin','Tobramycin','Gentamycin','Netilmicin']\n",
-    "#Beta-lactams- Carbapenems\n",
-    "cat2=['Imipenem','Meropenam','Doripenem']\n",
-    "#Fluoroquinolone\n",
-    "cat3=['Ciprofloxacin','Levofloxacin']\n",
-    "#Beta-lactam inhibitor\n",
-    "cat4=['Piperacillin/tazobactam','Ticarcillin/clavulanicacid']\n",
-    "#Cephalosporin\n",
-    "cat5=['Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime']\n",
-    "#Sulfonamides\n",
-    "cat6=['Trimethoprim/sulfamethoxazole']\n",
-    "#Penicillins/beta-lactamase\n",
-    "cat7=['Ampicillin/sulbactam']\n",
-    "#Polymyxins\n",
-    "cat8=['Colistin','Polymyxinb']\n",
-    "#Tetracycline\n",
-    "cat9=['Tetracycline','Doxicycline','Minocycline']\n",
-    "\n",
-    "def s_profiler(pd_series):\n",
-    "    #print(type(pd_series),'\\n', pd_series)\n",
-    "    #create a dictionary of dataframe series\n",
-    "    cats={'s1':cat1,'s2':cat2,'s3':cat3,'s4':cat4,'s5':cat5,'s6':cat6,'s7':cat7,'s8':cat8,'s9':cat9}\n",
-    "    # find the antibiotics name in input series\n",
-    "    for cat in cats:\n",
-    "        #print(cats[cat])\n",
-    "        cats[cat]=pd_series.filter(cats[cat])\n",
-    "        #print(cats[cat])\n",
-    "    #define res,sus,intm,na,pb_sus\n",
-    "    res=0\n",
-    "    sus=0\n",
-    "    intm=0\n",
-    "    na=0\n",
-    "    pb_sus=0\n",
-    "    # special case of 'Polymyxin b' for its value\n",
-    "    if 'Polymyxinb' in pd_series:\n",
-    "        ctp=cats['s8']['Polymyxinb'].strip().lower()\n",
-    "        if ctp == 'susceptible':\n",
-    "            pb_sus=1\n",
-    "        #print((ctp,p_sus))\n",
-    "    # check all categories\n",
-    "    for cat in cats:\n",
-    "        #ctp=cats['s8'].iloc[i:i+1].stack().value_counts().to_dict()\n",
-    "        #print(ctp)\n",
-    "        # Pandas series\n",
-    "        ct=cats[cat].value_counts().to_dict()\n",
-    "        #print(ct)\n",
-    "        # remove whitespace and convert to lowercase words\n",
-    "        ct =  {k.strip().lower(): v for k, v in ct.items()}\n",
-    "        #print(ct)\n",
-    "        k=Counter(ct)\n",
-    "        #j=Counter(ct)+Counter(j)\n",
-    "        #print(j)\n",
-    "        # category wise marking\n",
-    "        if k['resistant']>=1:\n",
-    "            res=res+1\n",
-    "        if k['susceptible']>=1:\n",
-    "            sus=sus+1\n",
-    "        if k['intermediate']>=1:\n",
-    "            intm=intm+1\n",
-    "        if k['na']>=1:\n",
-    "            na=na+1\n",
-    "    #print(sus,res,intm,na,pb_sus)\n",
-    "    #print(s_phen(sus,res,intm,na,pb_sus))\n",
-    "    return(s_phen(sus,res,intm,na,pb_sus))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 165,
-   "id": "7629fc10",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#input_file='input2.csv_table.csv'\n",
-    "#output_file=input_file+'_output.txt'\n",
-    "strain_profile=pd.read_csv(input_file, sep=',',na_filter=False,skipinitialspace = True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 166,
-   "id": "bed1abba",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "old_strain_name=strain_profile.columns[0]\n",
-    "new_strain_name=old_strain_name.capitalize().strip().replace(' ', '')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 167,
-   "id": "a64b5022",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# make header capitalization, remove leading,lagging, and multiple whitespace for comparision\n",
-    "strain_profile.columns=strain_profile.columns.str.capitalize().str.strip().str.replace('\\s+', '', regex=True)\n",
-    "#print(strain_profile.columns)\n",
-    "#strain_profile.head()\n",
-    "#strain_profile.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 168,
-   "id": "caac57d7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# add new column in dataframe on second position\n",
-    "strain_profile.insert(1, 'Strain phenotype','')\n",
-    "#strain_profile.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 169,
-   "id": "eb4b0c4d",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "strain_profile['Strain phenotype'] = strain_profile.apply(lambda x: (s_profiler(x)), axis=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 170,
-   "id": "86441c0f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#strain_profile.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 171,
-   "id": "75698be5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#rename headers for old name\n",
-    "strain_profile=strain_profile.rename(columns = {new_strain_name:old_strain_name, 'Ticarcillin/clavulanicacid':'Ticarcillin/ clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 172,
-   "id": "c14a13eb",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "#strain_profile.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 173,
-   "id": "1b113050",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#strain_profile"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 174,
-   "id": "5ab72211",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "strain_profile.to_csv(output_file,na_rep='NA',index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 175,
-   "id": "c17c84c4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Open a file with access mode 'a'\n",
-    "with open(output_file, \"a\") as file_object:\n",
-    "    # Append 'hello' at the end of file\n",
-    "    file_object.write(\"Note: \\n1. 'MDR': Multidrug-resistant, 'XDR': Extensively drug-resistant, 'TDR':totally drug resistant, NA': Data Not Available.\\n2. 'Strain could not be classified' numbers follow the format as ('Number of antibiotics categories count as Intermediate' | 'Number of antibiotics categories count as NA')\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7e8e1fa8",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/clsi_profile_type2_linux.ipynb	Wed Jun 30 07:13:29 2021 +0000
@@ -0,0 +1,534 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "9aa0a6f7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics\n",
+    "# By rakesh4osdd@gmail.com, 06-Jun-2021\n",
+    "import pandas as pd\n",
+    "import re\n",
+    "import sys"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "9af8387e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#print(pd.__version__, re.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "73d0783c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# compare two MIC value strings\n",
+    "def check_mic(mic1,mic2,mic_type):\n",
+    "    #print(mic1,mic2,mic_type)\n",
+    "    try:\n",
+    "        if '/' in mic1:\n",
+    "            m1a = mic1.split('/')[0]\n",
+    "            m1b = mic1.split('/')[1]\n",
+    "            if float(m1a)==0 or float(m1b)==0:\n",
+    "                strain_type='Strain could not be classified'\n",
+    "                return(strain_type)          \n",
+    "        elif '/' in mic2:\n",
+    "            m1a = mic1\n",
+    "            if float(m1a)==0:\n",
+    "                strain_type='Strain could not be classified'\n",
+    "                return(strain_type)            \n",
+    "            m1b = '1'\n",
+    "        elif float(mic1)==0:\n",
+    "            strain_type='Strain could not be classified'\n",
+    "            return(strain_type)\n",
+    "        else:\n",
+    "            m1a = mic1\n",
+    "            \n",
+    "        if '-' in mic2:\n",
+    "            m2a = mic2.split('-')[0]\n",
+    "            m2b = mic2.split('-')[1]           \n",
+    "         \n",
+    "    except ValueError:\n",
+    "        strain_type='Strain could not be classified' \n",
+    "        return(strain_type)\n",
+    "    try:\n",
+    "        if '-' in mic2 and mic_type == 'i':   # for intermediate only\n",
+    "            if '/' in mic2:\n",
+    "                m2a = mic2.split('-')[0].split('/')[0]\n",
+    "                m2b = mic2.split('-')[0].split('/')[1]\n",
+    "                m2aa = mic2.split('-')[1].split('/')[0]\n",
+    "                m2bb = mic2.split('-')[1].split('/')[1]\n",
+    "                if (float(m2aa)>=float(m1a)>=float(m2a) and float(m2bb)>=float(m1b)>=float(m2b)):\n",
+    "                    #print('intermediate')\n",
+    "                    m_type='Intermediate'\n",
+    "                else:\n",
+    "                    #print('not define')\n",
+    "                    m_type='Strain could not be classified'\n",
+    "            else:\n",
+    "                m2a = mic2.split('-')[0]\n",
+    "                m2b = mic2.split('-')[1] \n",
+    "                if (float(m2b)>=float(m1a)>=float(m2a)):\n",
+    "                    #print('intermediate')\n",
+    "                    m_type='Intermediate'\n",
+    "                else:\n",
+    "                    #print('not define')\n",
+    "                    m_type='Strain could not be classified'                \n",
+    "            #print (m1a,m1b,m2a,m2b,m2aa,m2bb)\n",
+    "        elif '/' in mic2:\n",
+    "            m2a = mic2.split('/')[0]\n",
+    "            m2b = mic2.split('/')[1]\n",
+    "            #print(m1a,m1b,m2a,m2b,mic_type)\n",
+    "            if (mic_type=='s' and (float(m1a)<=float(m2a) and float(m1b)<=float(m2b))):\n",
+    "                m_type='Susceptible'\n",
+    "            elif (mic_type=='r' and (float(m1a)>=float(m2a) and float(m1b)>=float(m2b))):\n",
+    "                m_type='Resistant'\n",
+    "            elif (mic_type=='i' and (float(m1a)==float(m2a) and float(m1b)==float(m2b))):\n",
+    "                m_type='Intermediate'\n",
+    "            else:\n",
+    "                m_type='Strain could not be classified'\n",
+    "        elif '-' in mic2:\n",
+    "                m_type='Strain could not be classified'\n",
+    "        else:\n",
+    "            m2a=mic2\n",
+    "            if (mic_type=='s' and (float(m1a)<=float(m2a))):\n",
+    "                m_type='Susceptible'\n",
+    "            elif (mic_type=='r' and (float(m1a)>=float(m2a))):\n",
+    "                m_type='Resistant'\n",
+    "            elif (mic_type=='i' and (float(m1a)==float(m2a))):\n",
+    "                m_type='Intermediate'\n",
+    "            else:\n",
+    "                m_type='Strain could not be classified'        \n",
+    "    except IndexError:\n",
+    "        strain_type='Strain could not be classified' \n",
+    "        return(strain_type)\n",
+    "    \n",
+    "    return(m_type)\n",
+    "\n",
+    "#check_mic('65','32-64','i')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "4d2ab1b1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# compare MIC value in pandas list\n",
+    "def sus_res_int(mic):\n",
+    "    #print(mic)\n",
+    "    o_mic = mic[0].replace(' ', '')\n",
+    "    s_mic = mic[1].replace(' ', '')\n",
+    "    r_mic = mic[2].replace(' ', '')\n",
+    "    i_mic = mic[3].replace(' ', '')\n",
+    "    try:\n",
+    "        if check_mic(o_mic,s_mic,'s')=='Susceptible':\n",
+    "            strain_type='Susceptible'\n",
+    "        elif check_mic(o_mic,r_mic,'r')=='Resistant':\n",
+    "            strain_type='Resistant'\n",
+    "        elif check_mic(o_mic,i_mic,'i')=='Intermediate':\n",
+    "            strain_type='Intermediate'                    \n",
+    "        else:\n",
+    "            strain_type='Strain could not be classified'\n",
+    "    except ValueError:\n",
+    "        strain_type='Strain could not be classified'            \n",
+    "    return(strain_type)\n",
+    "\n",
+    "#mic=['128','16/4','128/4','32/4-64/4']\n",
+    "#sus_res_int(mic)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "0e22ef0d",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "IndexError",
+     "evalue": "list index out of range",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-11-26f141926f14>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0minput_user\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0minput_clsi\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0moutput_table\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mIndexError\u001b[0m: list index out of range"
+     ]
+    }
+   ],
+   "source": [
+    "# for input argument\n",
+    "input_user = sys.argv[1]\n",
+    "input_clsi = sys.argv[2]\n",
+    "output_table = sys.argv[3]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "id": "21d5fe63",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv'\n",
+    "#input_user='test-data/input2.csv'\n",
+    "input_clsi='test-data/clsi.csv'\n",
+    "output_profile='test-data/input2_profile.csv'\n",
+    "#output_table='test-data/input2_table.csv'\n",
+    "output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "id": "1e64b025",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# read user AST data with selected 3 columns\n",
+    "strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False)\n",
+    "#strain_mic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "id": "0d30ddc3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "clsi_bp=pd.read_csv(input_clsi,sep=',')\n",
+    "\n",
+    "#clsi_bp[clsi_bp[['Antibiotics', 'Susceptible']].duplicated()].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "id": "a818676d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#clsi_bp\n",
+    "#strain_mic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "id": "c2aae757",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# warn user for duplicate files\n",
+    "input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()]\n",
+    "if (input_dups.shape[0] == 0):\n",
+    "    #print( \"No duplicates\")\n",
+    "    pass\n",
+    "else:\n",
+    "    with open(output_table, \"w\") as file_object:\n",
+    "    # Append 'hello' at the end of file\n",
+    "        file_object.write('S.No.,Strain name,Antibiotics,MIC\\nInput File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file\\n')\n",
+    "    input_dups.to_csv(output_table,na_rep='NA', mode='a')\n",
+    "    exit()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "c6b4c59b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# convert MIC to numbers sMIC, rMIC\n",
+    "clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))\n",
+    "clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))\n",
+    "clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "d0171f94",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "fe45b2dd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read only numbers in MIC values\n",
+    "#try:\n",
+    "strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\\/]','', x)))\n",
+    "#except TypeError:\n",
+    "#    print('Waring: Error in MIC value')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "ddbbe4d9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#strain_mic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "640508f1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# capitalize each Antibiotic Name for comparision with removing whitespace\n",
+    "strain_mic['Strain name']=strain_mic['Strain name'].str.capitalize().str.replace(\" \",\"\")\n",
+    "strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(\" \",\"\")\n",
+    "\n",
+    "clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(\" \",\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "b87426f4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#find duplicate values in input files\n",
+    "dups=strain_mic[strain_mic[['Strain name', 'Antibiotics']].duplicated(keep=False)]\n",
+    "if dups.shape[0] != 0:\n",
+    "    print ('Please provide a single MIC value in input file for given duplicates combination of \\'Strain name and Antibiotics\\' to use the tool:-\\n',dups)\n",
+    "    #exit()\n",
+    "else:\n",
+    "    #compare CLSI Antibiotics only\n",
+    "    #result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner',  indicator=True)[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','_merge']]\n",
+    "    try:\n",
+    "        result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']]\n",
+    "    except KeyError:\n",
+    "        print('Waring: Error in input Values')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "91bfc94d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Strain name</th>\n",
+       "      <th>Antibiotics</th>\n",
+       "      <th>MIC</th>\n",
+       "      <th>o_mic</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [Strain name, Antibiotics, MIC, o_mic]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dups.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 132,
+   "id": "b171f205",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#compare MIC values and assign Susceptible and Resistant to Strain\n",
+    "#try:\n",
+    "result[['CLSI_profile']] = result[['o_mic','s_mic','r_mic','i_mic']].apply(sus_res_int,axis = 1)\n",
+    "#except ValueError:\n",
+    "#    print('Waring: Error in input MIC value')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 133,
+   "id": "3336fd92",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 134,
+   "id": "f0dacfd1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#result[['Strain name', 'Antibiotics', 'MIC','s_mic','r_mic','CLSI_profile']].to_csv(output_profile,sep=',', index=False, encoding='utf-8-sig')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 135,
+   "id": "3d8d03f7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#create a pivot table for ASIST\n",
+    "table=result[['Strain name', 'Antibiotics','CLSI_profile']].drop_duplicates()\n",
+    "result_table=pd.pivot_table(table, values ='CLSI_profile', index =['Strain name'],columns =['Antibiotics'], aggfunc = lambda x: ' '.join(x))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 136,
+   "id": "7d7223a3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#result_table"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 137,
+   "id": "8a41b2ef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#result_table.to_csv(output_table,na_rep='NA')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 138,
+   "id": "8c9e5f87",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reorder the Antibiotics for ASIST\n",
+    "clsi_ab=['Amikacin','Tobramycin','Gentamycin','Netilmicin','Imipenem','Meropenem','Doripenem','Ciprofloxacin','Levofloxacin',\n",
+    "         'Piperacillin/tazobactam','Ticarcillin/clavulanicacid','Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime',\n",
+    "         'Trimethoprim/sulfamethoxazole','Ampicillin/sulbactam','Colistin','Polymyxinb','Tetracycline','Doxicycline ',\n",
+    "         'Minocycline']\n",
+    "result_selected=result_table.filter(clsi_ab)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 139,
+   "id": "cdf43afb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#print(result_selected.shape, result_table.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 140,
+   "id": "c4c4df30",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#result_selected.insert(0,'Resistance_phenotype','')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 141,
+   "id": "9adb2703",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#rename headers\n",
+    "result_selected=result_selected.rename(columns = {'Ticarcillin/clavulanicacid':'Ticarcillin/clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 142,
+   "id": "50e6cf5f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#result_selected"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 144,
+   "id": "2833671c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "result_selected.to_csv(output_table,na_rep='NA')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool.sh	Wed Jun 30 07:13:29 2021 +0000
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+planemo tool_init --id 'clsi_profile' --name 'CLSI Profile' \
+          --description 'MIC profile using CLSI MIC breakpoints' \
+          --requirement 'python' \
+          --example_command "clsi_profile_type2.py 'input.csv' 'clsi.csv' 'output.csv'" \
+          --example_input test-data/input.csv \
+	  --example_input test-data/clsi.csv \
+          --example_output test-data/output.csv \
+          --test_case \
+#          --version_command 'python3 -V' \
+#          --help_from_command '' \
+#          --doi ''
+
+planemo lint
--- a/tool_asist.sh	Wed Jun 30 06:59:29 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-#!/bin/bash
-
-planemo tool_init --id 'asist' --name 'ASIST' --version 1.0.1 \
-          --description 'Antimicrobial Susceptibility standards based phenotypes' \
-          --requirement 'pandas' \
-          --example_command "\"${__tool_directory__}/asist_dynamic.py\" 'asist_input.csv'  'asist_output.csv'" \
-          --example_input test-data/asist_input.csv \
-          --example_output test-data/asist_output.csv \
-          --test_case \
-	  --version_command 'python -c "import pandas; print(pandas.__version__)"' \
-          --doi 'https://ab-openlab.csir.res.in/asist'
-
-planemo lint asist.xml
--- a/tool_shed.sh	Wed Jun 30 06:59:29 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-#!/bin/bash
-# After reviewing .shed.yml, this configuration file and relevant shed artifacts can be quickly linted using the following command.
-planemo shed_lint --tools
-#planemo shed_create --shed_target toolshed 
-planemo shed_update --check_diff --shed_target toolshed