# HG changeset patch # User rakesh4osdd # Date 1625037209 0 # Node ID 7dcc0e93288b6412dc57d774b05fced560fe5e49 # Parent 116ebdf92e391e74713a12869f45bb09ecf3e318 "planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit f590c3b1d71a9b8f2030909fa488b4ac0c3caed8-dirty" diff -r 116ebdf92e39 -r 7dcc0e93288b LICENSE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE Wed Jun 30 07:13:29 2021 +0000 @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 RAKESH KUMAR + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff -r 116ebdf92e39 -r 7dcc0e93288b LICENSE.txt --- a/LICENSE.txt Wed Jun 30 06:59:29 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -This work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. -To view a copy of this license, visit http://creativecommons.org/licenses/by-nc-sa/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. \ No newline at end of file diff -r 116ebdf92e39 -r 7dcc0e93288b README.md --- a/README.md Wed Jun 30 06:59:29 2021 +0000 +++ b/README.md Wed Jun 30 07:13:29 2021 +0000 @@ -12,7 +12,7 @@ Strain_1,Phenotype_1,Resistant,Resistant,Resistant Strain_2,Phenotype_2,Resistant,Susceptible,Resistant - Link to the code : https://github.com/rakesh4osdd/clsi_profile , https://github.com/rakesh4osdd/asist . + Link to the code : https://github.com/rakesh4osdd/clsi_profile , https://github.com/rakesh4osdd/asist # ASIST tool suite diff -r 116ebdf92e39 -r 7dcc0e93288b asist_dynamic.ipynb --- a/asist_dynamic.ipynb Wed Jun 30 06:59:29 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,312 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1309, - "id": "27cfc66f", - "metadata": {}, - "outputs": [], - "source": [ - "#ASIST program for phenotype based on Antibiotics profile\n", - "# create a profile based on selected antibiotics only\n", - "# rakesh4osdd@gmail.com, 14-June-2021" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "75a352b7", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import sys\n", - "import os\n", - "from collections import Counter" - ] - }, - { - "cell_type": "code", - "execution_count": 162, - "id": "d66ec0d2", - "metadata": {}, - "outputs": [], - "source": [ - "#input_file=sys.argv[1]\n", - "#output_file=sys.argv[2]\n", - "input_file='test-data/strains_788_input_16k.csv'\n", - "output_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/strains_788_output_16k.csv'\n", - "#input_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/asist_example15.csv'\n", - "#output_file='/mnt/d/PhD_Work/Tina_Work/ASIST_Galaxy/ASIST/asist_example15_output.csv'" - ] - }, - { - "cell_type": "code", - "execution_count": 163, - "id": "bf24c946", - "metadata": {}, - "outputs": [], - "source": [ - "# strain_profile to phenotype condition\n", - "def s_phen(sus,res,intm,na,pb_sus):\n", - " if (sus>0 and res==0 and na>=0):\n", - " #print('Possible Susceptible')\n", - " phen='Possible Susceptible'\n", - " elif (sus>=0 and 3<=res<7 and na>=0 and pb_sus==0):\n", - " #print('Possible MDR')\n", - " phen='Possible MDR'\n", - " elif (sus>=0 and 7<=res<9 and na>=0 and pb_sus==0):\n", - " #print('Possible XDR')\n", - " phen='Possible XDR'\n", - " #special cases\n", - " elif (sus>=1 and res>0 and na>=0 and pb_sus==1):\n", - " #print('Possible XDR')\n", - " phen='Possible XDR'\n", - " #special cases\n", - " elif (sus>0 and res==9 and na>=0):\n", - " #print('Possible XDR')\n", - " phen='Possible XDR'\n", - " elif (sus==0 and res==9 and na>=0):\n", - " #print('Possible TDR')\n", - " phen='Possible TDR'\n", - " else:\n", - " #print('Strain could not be classified')\n", - " phen='Strain could not be classified ('+ str(intm)+' | ' + str(na) +')'\n", - " return(phen)\n", - "\n", - "#print(s_phen(1,9,0,0))" - ] - }, - { - "cell_type": "code", - "execution_count": 164, - "id": "8bad7d9d", - "metadata": {}, - "outputs": [], - "source": [ - "# define Antibiotic groups as per antibiotic of CLSI breakpoints MIC\n", - "#Aminoglycoside\n", - "cat1=['Amikacin','Tobramycin','Gentamycin','Netilmicin']\n", - "#Beta-lactams- Carbapenems\n", - "cat2=['Imipenem','Meropenam','Doripenem']\n", - "#Fluoroquinolone\n", - "cat3=['Ciprofloxacin','Levofloxacin']\n", - "#Beta-lactam inhibitor\n", - "cat4=['Piperacillin/tazobactam','Ticarcillin/clavulanicacid']\n", - "#Cephalosporin\n", - "cat5=['Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime']\n", - "#Sulfonamides\n", - "cat6=['Trimethoprim/sulfamethoxazole']\n", - "#Penicillins/beta-lactamase\n", - "cat7=['Ampicillin/sulbactam']\n", - "#Polymyxins\n", - "cat8=['Colistin','Polymyxinb']\n", - "#Tetracycline\n", - "cat9=['Tetracycline','Doxicycline','Minocycline']\n", - "\n", - "def s_profiler(pd_series):\n", - " #print(type(pd_series),'\\n', pd_series)\n", - " #create a dictionary of dataframe series\n", - " cats={'s1':cat1,'s2':cat2,'s3':cat3,'s4':cat4,'s5':cat5,'s6':cat6,'s7':cat7,'s8':cat8,'s9':cat9}\n", - " # find the antibiotics name in input series\n", - " for cat in cats:\n", - " #print(cats[cat])\n", - " cats[cat]=pd_series.filter(cats[cat])\n", - " #print(cats[cat])\n", - " #define res,sus,intm,na,pb_sus\n", - " res=0\n", - " sus=0\n", - " intm=0\n", - " na=0\n", - " pb_sus=0\n", - " # special case of 'Polymyxin b' for its value\n", - " if 'Polymyxinb' in pd_series:\n", - " ctp=cats['s8']['Polymyxinb'].strip().lower()\n", - " if ctp == 'susceptible':\n", - " pb_sus=1\n", - " #print((ctp,p_sus))\n", - " # check all categories\n", - " for cat in cats:\n", - " #ctp=cats['s8'].iloc[i:i+1].stack().value_counts().to_dict()\n", - " #print(ctp)\n", - " # Pandas series\n", - " ct=cats[cat].value_counts().to_dict()\n", - " #print(ct)\n", - " # remove whitespace and convert to lowercase words\n", - " ct = {k.strip().lower(): v for k, v in ct.items()}\n", - " #print(ct)\n", - " k=Counter(ct)\n", - " #j=Counter(ct)+Counter(j)\n", - " #print(j)\n", - " # category wise marking\n", - " if k['resistant']>=1:\n", - " res=res+1\n", - " if k['susceptible']>=1:\n", - " sus=sus+1\n", - " if k['intermediate']>=1:\n", - " intm=intm+1\n", - " if k['na']>=1:\n", - " na=na+1\n", - " #print(sus,res,intm,na,pb_sus)\n", - " #print(s_phen(sus,res,intm,na,pb_sus))\n", - " return(s_phen(sus,res,intm,na,pb_sus))" - ] - }, - { - "cell_type": "code", - "execution_count": 165, - "id": "7629fc10", - "metadata": {}, - "outputs": [], - "source": [ - "#input_file='input2.csv_table.csv'\n", - "#output_file=input_file+'_output.txt'\n", - "strain_profile=pd.read_csv(input_file, sep=',',na_filter=False,skipinitialspace = True)" - ] - }, - { - "cell_type": "code", - "execution_count": 166, - "id": "bed1abba", - "metadata": {}, - "outputs": [], - "source": [ - "old_strain_name=strain_profile.columns[0]\n", - "new_strain_name=old_strain_name.capitalize().strip().replace(' ', '')" - ] - }, - { - "cell_type": "code", - "execution_count": 167, - "id": "a64b5022", - "metadata": {}, - "outputs": [], - "source": [ - "# make header capitalization, remove leading,lagging, and multiple whitespace for comparision\n", - "strain_profile.columns=strain_profile.columns.str.capitalize().str.strip().str.replace('\\s+', '', regex=True)\n", - "#print(strain_profile.columns)\n", - "#strain_profile.head()\n", - "#strain_profile.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 168, - "id": "caac57d7", - "metadata": {}, - "outputs": [], - "source": [ - "# add new column in dataframe on second position\n", - "strain_profile.insert(1, 'Strain phenotype','')\n", - "#strain_profile.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 169, - "id": "eb4b0c4d", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "strain_profile['Strain phenotype'] = strain_profile.apply(lambda x: (s_profiler(x)), axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 170, - "id": "86441c0f", - "metadata": {}, - "outputs": [], - "source": [ - "#strain_profile.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 171, - "id": "75698be5", - "metadata": {}, - "outputs": [], - "source": [ - "#rename headers for old name\n", - "strain_profile=strain_profile.rename(columns = {new_strain_name:old_strain_name, 'Ticarcillin/clavulanicacid':'Ticarcillin/ clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} )" - ] - }, - { - "cell_type": "code", - "execution_count": 172, - "id": "c14a13eb", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "#strain_profile.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 173, - "id": "1b113050", - "metadata": {}, - "outputs": [], - "source": [ - "#strain_profile" - ] - }, - { - "cell_type": "code", - "execution_count": 174, - "id": "5ab72211", - "metadata": {}, - "outputs": [], - "source": [ - "strain_profile.to_csv(output_file,na_rep='NA',index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 175, - "id": "c17c84c4", - "metadata": {}, - "outputs": [], - "source": [ - "# Open a file with access mode 'a'\n", - "with open(output_file, \"a\") as file_object:\n", - " # Append 'hello' at the end of file\n", - " file_object.write(\"Note: \\n1. 'MDR': Multidrug-resistant, 'XDR': Extensively drug-resistant, 'TDR':totally drug resistant, NA': Data Not Available.\\n2. 'Strain could not be classified' numbers follow the format as ('Number of antibiotics categories count as Intermediate' | 'Number of antibiotics categories count as NA')\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7e8e1fa8", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff -r 116ebdf92e39 -r 7dcc0e93288b clsi_profile_type2_linux.ipynb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clsi_profile_type2_linux.ipynb Wed Jun 30 07:13:29 2021 +0000 @@ -0,0 +1,534 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "id": "9aa0a6f7", + "metadata": {}, + "outputs": [], + "source": [ + "# ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics\n", + "# By rakesh4osdd@gmail.com, 06-Jun-2021\n", + "import pandas as pd\n", + "import re\n", + "import sys" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "9af8387e", + "metadata": {}, + "outputs": [], + "source": [ + "#print(pd.__version__, re.__version__)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "73d0783c", + "metadata": {}, + "outputs": [], + "source": [ + "# compare two MIC value strings\n", + "def check_mic(mic1,mic2,mic_type):\n", + " #print(mic1,mic2,mic_type)\n", + " try:\n", + " if '/' in mic1:\n", + " m1a = mic1.split('/')[0]\n", + " m1b = mic1.split('/')[1]\n", + " if float(m1a)==0 or float(m1b)==0:\n", + " strain_type='Strain could not be classified'\n", + " return(strain_type) \n", + " elif '/' in mic2:\n", + " m1a = mic1\n", + " if float(m1a)==0:\n", + " strain_type='Strain could not be classified'\n", + " return(strain_type) \n", + " m1b = '1'\n", + " elif float(mic1)==0:\n", + " strain_type='Strain could not be classified'\n", + " return(strain_type)\n", + " else:\n", + " m1a = mic1\n", + " \n", + " if '-' in mic2:\n", + " m2a = mic2.split('-')[0]\n", + " m2b = mic2.split('-')[1] \n", + " \n", + " except ValueError:\n", + " strain_type='Strain could not be classified' \n", + " return(strain_type)\n", + " try:\n", + " if '-' in mic2 and mic_type == 'i': # for intermediate only\n", + " if '/' in mic2:\n", + " m2a = mic2.split('-')[0].split('/')[0]\n", + " m2b = mic2.split('-')[0].split('/')[1]\n", + " m2aa = mic2.split('-')[1].split('/')[0]\n", + " m2bb = mic2.split('-')[1].split('/')[1]\n", + " if (float(m2aa)>=float(m1a)>=float(m2a) and float(m2bb)>=float(m1b)>=float(m2b)):\n", + " #print('intermediate')\n", + " m_type='Intermediate'\n", + " else:\n", + " #print('not define')\n", + " m_type='Strain could not be classified'\n", + " else:\n", + " m2a = mic2.split('-')[0]\n", + " m2b = mic2.split('-')[1] \n", + " if (float(m2b)>=float(m1a)>=float(m2a)):\n", + " #print('intermediate')\n", + " m_type='Intermediate'\n", + " else:\n", + " #print('not define')\n", + " m_type='Strain could not be classified' \n", + " #print (m1a,m1b,m2a,m2b,m2aa,m2bb)\n", + " elif '/' in mic2:\n", + " m2a = mic2.split('/')[0]\n", + " m2b = mic2.split('/')[1]\n", + " #print(m1a,m1b,m2a,m2b,mic_type)\n", + " if (mic_type=='s' and (float(m1a)<=float(m2a) and float(m1b)<=float(m2b))):\n", + " m_type='Susceptible'\n", + " elif (mic_type=='r' and (float(m1a)>=float(m2a) and float(m1b)>=float(m2b))):\n", + " m_type='Resistant'\n", + " elif (mic_type=='i' and (float(m1a)==float(m2a) and float(m1b)==float(m2b))):\n", + " m_type='Intermediate'\n", + " else:\n", + " m_type='Strain could not be classified'\n", + " elif '-' in mic2:\n", + " m_type='Strain could not be classified'\n", + " else:\n", + " m2a=mic2\n", + " if (mic_type=='s' and (float(m1a)<=float(m2a))):\n", + " m_type='Susceptible'\n", + " elif (mic_type=='r' and (float(m1a)>=float(m2a))):\n", + " m_type='Resistant'\n", + " elif (mic_type=='i' and (float(m1a)==float(m2a))):\n", + " m_type='Intermediate'\n", + " else:\n", + " m_type='Strain could not be classified' \n", + " except IndexError:\n", + " strain_type='Strain could not be classified' \n", + " return(strain_type)\n", + " \n", + " return(m_type)\n", + "\n", + "#check_mic('65','32-64','i')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "4d2ab1b1", + "metadata": {}, + "outputs": [], + "source": [ + "# compare MIC value in pandas list\n", + "def sus_res_int(mic):\n", + " #print(mic)\n", + " o_mic = mic[0].replace(' ', '')\n", + " s_mic = mic[1].replace(' ', '')\n", + " r_mic = mic[2].replace(' ', '')\n", + " i_mic = mic[3].replace(' ', '')\n", + " try:\n", + " if check_mic(o_mic,s_mic,'s')=='Susceptible':\n", + " strain_type='Susceptible'\n", + " elif check_mic(o_mic,r_mic,'r')=='Resistant':\n", + " strain_type='Resistant'\n", + " elif check_mic(o_mic,i_mic,'i')=='Intermediate':\n", + " strain_type='Intermediate' \n", + " else:\n", + " strain_type='Strain could not be classified'\n", + " except ValueError:\n", + " strain_type='Strain could not be classified' \n", + " return(strain_type)\n", + "\n", + "#mic=['128','16/4','128/4','32/4-64/4']\n", + "#sus_res_int(mic)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "0e22ef0d", + "metadata": {}, + "outputs": [ + { + "ename": "IndexError", + "evalue": "list index out of range", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0minput_user\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0minput_clsi\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0moutput_table\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mIndexError\u001b[0m: list index out of range" + ] + } + ], + "source": [ + "# for input argument\n", + "input_user = sys.argv[1]\n", + "input_clsi = sys.argv[2]\n", + "output_table = sys.argv[3]" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "21d5fe63", + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv'\n", + "#input_user='test-data/input2.csv'\n", + "input_clsi='test-data/clsi.csv'\n", + "output_profile='test-data/input2_profile.csv'\n", + "#output_table='test-data/input2_table.csv'\n", + "output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "1e64b025", + "metadata": {}, + "outputs": [], + "source": [ + "# read user AST data with selected 3 columns\n", + "strain_mic=pd.read_csv(input_user, sep=',', usecols =['Strain name', 'Antibiotics', 'MIC'],na_filter=False)\n", + "#strain_mic" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "0d30ddc3", + "metadata": {}, + "outputs": [], + "source": [ + "clsi_bp=pd.read_csv(input_clsi,sep=',')\n", + "\n", + "#clsi_bp[clsi_bp[['Antibiotics', 'Susceptible']].duplicated()].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "a818676d", + "metadata": {}, + "outputs": [], + "source": [ + "#clsi_bp\n", + "#strain_mic" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "c2aae757", + "metadata": {}, + "outputs": [], + "source": [ + "# warn user for duplicate files\n", + "input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()]\n", + "if (input_dups.shape[0] == 0):\n", + " #print( \"No duplicates\")\n", + " pass\n", + "else:\n", + " with open(output_table, \"w\") as file_object:\n", + " # Append 'hello' at the end of file\n", + " file_object.write('S.No.,Strain name,Antibiotics,MIC\\nInput File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file\\n')\n", + " input_dups.to_csv(output_table,na_rep='NA', mode='a')\n", + " exit()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "c6b4c59b", + "metadata": {}, + "outputs": [], + "source": [ + "# convert MIC to numbers sMIC, rMIC\n", + "clsi_bp['s_mic'] =clsi_bp[['Susceptible']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))\n", + "clsi_bp['r_mic'] =clsi_bp[['Resistant']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))\n", + "clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "d0171f94", + "metadata": {}, + "outputs": [], + "source": [ + "#clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\\/-]', '', x)))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "fe45b2dd", + "metadata": {}, + "outputs": [], + "source": [ + "# Read only numbers in MIC values\n", + "#try:\n", + "strain_mic['o_mic']=strain_mic[['MIC']].applymap(lambda x: (re.sub(r'[^0-9.\\/]','', x)))\n", + "#except TypeError:\n", + "# print('Waring: Error in MIC value')" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "ddbbe4d9", + "metadata": {}, + "outputs": [], + "source": [ + "#strain_mic" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "640508f1", + "metadata": {}, + "outputs": [], + "source": [ + "# capitalize each Antibiotic Name for comparision with removing whitespace\n", + "strain_mic['Strain name']=strain_mic['Strain name'].str.capitalize().str.replace(\" \",\"\")\n", + "strain_mic['Antibiotics']=strain_mic['Antibiotics'].str.capitalize().str.replace(\" \",\"\")\n", + "\n", + "clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(\" \",\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "b87426f4", + "metadata": {}, + "outputs": [], + "source": [ + "#find duplicate values in input files\n", + "dups=strain_mic[strain_mic[['Strain name', 'Antibiotics']].duplicated(keep=False)]\n", + "if dups.shape[0] != 0:\n", + " print ('Please provide a single MIC value in input file for given duplicates combination of \\'Strain name and Antibiotics\\' to use the tool:-\\n',dups)\n", + " #exit()\n", + "else:\n", + " #compare CLSI Antibiotics only\n", + " #result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner', indicator=True)[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','_merge']]\n", + " try:\n", + " result=pd.merge(strain_mic, clsi_bp, on='Antibiotics',how='inner')[['Strain name','Antibiotics', 'MIC', 'o_mic', 's_mic', 'r_mic','i_mic']]\n", + " except KeyError:\n", + " print('Waring: Error in input Values')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "91bfc94d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Strain nameAntibioticsMICo_mic
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [Strain name, Antibiotics, MIC, o_mic]\n", + "Index: []" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dups.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "id": "b171f205", + "metadata": {}, + "outputs": [], + "source": [ + "#compare MIC values and assign Susceptible and Resistant to Strain\n", + "#try:\n", + "result[['CLSI_profile']] = result[['o_mic','s_mic','r_mic','i_mic']].apply(sus_res_int,axis = 1)\n", + "#except ValueError:\n", + "# print('Waring: Error in input MIC value')" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "id": "3336fd92", + "metadata": {}, + "outputs": [], + "source": [ + "#result" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "id": "f0dacfd1", + "metadata": {}, + "outputs": [], + "source": [ + "#result[['Strain name', 'Antibiotics', 'MIC','s_mic','r_mic','CLSI_profile']].to_csv(output_profile,sep=',', index=False, encoding='utf-8-sig')" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "id": "3d8d03f7", + "metadata": {}, + "outputs": [], + "source": [ + "#create a pivot table for ASIST\n", + "table=result[['Strain name', 'Antibiotics','CLSI_profile']].drop_duplicates()\n", + "result_table=pd.pivot_table(table, values ='CLSI_profile', index =['Strain name'],columns =['Antibiotics'], aggfunc = lambda x: ' '.join(x))" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "id": "7d7223a3", + "metadata": {}, + "outputs": [], + "source": [ + "#result_table" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "id": "8a41b2ef", + "metadata": {}, + "outputs": [], + "source": [ + "#result_table.to_csv(output_table,na_rep='NA')" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "id": "8c9e5f87", + "metadata": {}, + "outputs": [], + "source": [ + "# reorder the Antibiotics for ASIST\n", + "clsi_ab=['Amikacin','Tobramycin','Gentamycin','Netilmicin','Imipenem','Meropenem','Doripenem','Ciprofloxacin','Levofloxacin',\n", + " 'Piperacillin/tazobactam','Ticarcillin/clavulanicacid','Cefotaxime','Ceftriaxone','Ceftazidime','Cefepime',\n", + " 'Trimethoprim/sulfamethoxazole','Ampicillin/sulbactam','Colistin','Polymyxinb','Tetracycline','Doxicycline ',\n", + " 'Minocycline']\n", + "result_selected=result_table.filter(clsi_ab)" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "id": "cdf43afb", + "metadata": {}, + "outputs": [], + "source": [ + "#print(result_selected.shape, result_table.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "id": "c4c4df30", + "metadata": {}, + "outputs": [], + "source": [ + "#result_selected.insert(0,'Resistance_phenotype','')" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "id": "9adb2703", + "metadata": {}, + "outputs": [], + "source": [ + "#rename headers\n", + "result_selected=result_selected.rename(columns = {'Ticarcillin/clavulanicacid':'Ticarcillin/clavulanic acid','Piperacillin/tazobactam':'Piperacillin/ tazobactam','Trimethoprim/sulfamethoxazole': 'Trimethoprim/ sulfamethoxazole','Ampicillin/sulbactam':'Ampicillin/ sulbactam', 'Polymyxinb': 'Polymyxin B'} )" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "id": "50e6cf5f", + "metadata": {}, + "outputs": [], + "source": [ + "#result_selected" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "id": "2833671c", + "metadata": {}, + "outputs": [], + "source": [ + "result_selected.to_csv(output_table,na_rep='NA')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff -r 116ebdf92e39 -r 7dcc0e93288b tool.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool.sh Wed Jun 30 07:13:29 2021 +0000 @@ -0,0 +1,15 @@ +#!/bin/bash + +planemo tool_init --id 'clsi_profile' --name 'CLSI Profile' \ + --description 'MIC profile using CLSI MIC breakpoints' \ + --requirement 'python' \ + --example_command "clsi_profile_type2.py 'input.csv' 'clsi.csv' 'output.csv'" \ + --example_input test-data/input.csv \ + --example_input test-data/clsi.csv \ + --example_output test-data/output.csv \ + --test_case \ +# --version_command 'python3 -V' \ +# --help_from_command '' \ +# --doi '' + +planemo lint diff -r 116ebdf92e39 -r 7dcc0e93288b tool_asist.sh --- a/tool_asist.sh Wed Jun 30 06:59:29 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ -#!/bin/bash - -planemo tool_init --id 'asist' --name 'ASIST' --version 1.0.1 \ - --description 'Antimicrobial Susceptibility standards based phenotypes' \ - --requirement 'pandas' \ - --example_command "\"${__tool_directory__}/asist_dynamic.py\" 'asist_input.csv' 'asist_output.csv'" \ - --example_input test-data/asist_input.csv \ - --example_output test-data/asist_output.csv \ - --test_case \ - --version_command 'python -c "import pandas; print(pandas.__version__)"' \ - --doi 'https://ab-openlab.csir.res.in/asist' - -planemo lint asist.xml diff -r 116ebdf92e39 -r 7dcc0e93288b tool_shed.sh --- a/tool_shed.sh Wed Jun 30 06:59:29 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -#!/bin/bash -# After reviewing .shed.yml, this configuration file and relevant shed artifacts can be quickly linted using the following command. -planemo shed_lint --tools -#planemo shed_create --shed_target toolshed -planemo shed_update --check_diff --shed_target toolshed