changeset 5:3c27e5c2a8e9 draft

"planemo upload for repository https://github.com/rakesh4osdd/asist/tree/master commit d4b81c15919b4b87d215eaf1b792c8f251665647"
author rakesh4osdd
date Tue, 29 Jun 2021 12:58:49 +0000
parents 2a5861818faf
children b67a3c53cc69
files clsi_profile.py clsi_profile_type2_linux.ipynb
diffstat 2 files changed, 107 insertions(+), 74 deletions(-) [+]
line wrap: on
line diff
--- a/clsi_profile.py	Tue Jun 29 12:15:06 2021 +0000
+++ b/clsi_profile.py	Tue Jun 29 12:58:49 2021 +0000
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # coding: utf-8
 
-# In[115]:
+# In[7]:
 
 
 # ASIST module2 | map AST result to the CLSI breakporints with combination antibiotics
@@ -11,13 +11,13 @@
 import sys
 
 
-# In[116]:
+# In[8]:
 
 
 #print(pd.__version__, re.__version__)
 
 
-# In[117]:
+# In[9]:
 
 
 # compare two MIC value strings
@@ -105,7 +105,7 @@
 #check_mic('65','32-64','i')
 
 
-# In[118]:
+# In[10]:
 
 
 # compare MIC value in pandas list
@@ -132,7 +132,7 @@
 #sus_res_int(mic)
 
 
-# In[119]:
+# In[11]:
 
 
 # for input argument
@@ -141,18 +141,18 @@
 output_table = sys.argv[3]
 
 
-# In[3]:
+# In[49]:
 
 
-"""#input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv'
-input_user='test-data/input2.csv'
+"""input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv'
+#input_user='test-data/input2.csv'
 input_clsi='test-data/clsi.csv'
 output_profile='test-data/input2_profile.csv'
-output_table='test-data/input2_table.csv'
-#output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'"""
+#output_table='test-data/input2_table.csv'
+output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'"""
 
 
-# In[146]:
+# In[60]:
 
 
 # read user AST data with selected 3 columns
@@ -160,7 +160,7 @@
 #strain_mic
 
 
-# In[147]:
+# In[61]:
 
 
 clsi_bp=pd.read_csv(input_clsi,sep=',')
@@ -168,30 +168,30 @@
 #clsi_bp[clsi_bp[['Antibiotics', 'Susceptible']].duplicated()].shape
 
 
-# In[148]:
+# In[62]:
 
 
 #clsi_bp
 #strain_mic
 
 
-# In[149]:
+# In[64]:
 
 
+# warn user for duplicate files
 input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()]
 if (input_dups.shape[0] == 0):
     #print( "No duplicates")
     pass
 else:
-    input_dups.to_csv(output_table,na_rep='NA')
-    with open(output_table, "a") as file_object:
+    with open(output_table, "w") as file_object:
     # Append 'hello' at the end of file
-        file_object.write('Input File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file')
+        file_object.write('S.No.,Strain name,Antibiotics,MIC\nInput File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file\n')
+    input_dups.to_csv(output_table,na_rep='NA', mode='a')
     exit()
-#input_dups.head()
 
 
-# In[125]:
+# In[17]:
 
 
 # convert MIC to numbers sMIC, rMIC
@@ -200,13 +200,13 @@
 clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x)))
 
 
-# In[126]:
+# In[18]:
 
 
 #clsi_bp['i_mic'] = clsi_bp[['Intermediate']].applymap(lambda x: (re.sub(r'[^0-9.\/-]', '', x)))
 
 
-# In[127]:
+# In[19]:
 
 
 # Read only numbers in MIC values
@@ -216,13 +216,13 @@
 #    print('Waring: Error in MIC value')
 
 
-# In[128]:
+# In[20]:
 
 
 #strain_mic
 
 
-# In[129]:
+# In[21]:
 
 
 # capitalize each Antibiotic Name for comparision with removing whitespace
@@ -232,7 +232,7 @@
 clsi_bp['Antibiotics']=clsi_bp['Antibiotics'].str.capitalize().str.replace(" ","")
 
 
-# In[130]:
+# In[22]:
 
 
 #find duplicate values in input files
@@ -249,10 +249,10 @@
         print('Waring: Error in input Values')
 
 
-# In[131]:
+# In[23]:
 
 
-#result
+dups.head()
 
 
 # In[132]:
--- a/clsi_profile_type2_linux.ipynb	Tue Jun 29 12:15:06 2021 +0000
+++ b/clsi_profile_type2_linux.ipynb	Tue Jun 29 12:58:49 2021 +0000
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 115,
+   "execution_count": 7,
    "id": "9aa0a6f7",
    "metadata": {},
    "outputs": [],
@@ -16,7 +16,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 116,
+   "execution_count": 8,
    "id": "9af8387e",
    "metadata": {},
    "outputs": [],
@@ -26,7 +26,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 117,
+   "execution_count": 9,
    "id": "73d0783c",
    "metadata": {},
    "outputs": [],
@@ -118,7 +118,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 118,
+   "execution_count": 10,
    "id": "4d2ab1b1",
    "metadata": {},
    "outputs": [],
@@ -149,19 +149,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 119,
+   "execution_count": 11,
    "id": "0e22ef0d",
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "'# for input argument\\ninput_user = sys.argv[1]\\ninput_clsi = sys.argv[2]\\noutput_table = sys.argv[3]'"
-      ]
-     },
-     "execution_count": 119,
-     "metadata": {},
-     "output_type": "execute_result"
+     "ename": "IndexError",
+     "evalue": "list index out of range",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-11-26f141926f14>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0minput_user\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0minput_clsi\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0moutput_table\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margv\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mIndexError\u001b[0m: list index out of range"
+     ]
     }
    ],
    "source": [
@@ -173,33 +174,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 49,
    "id": "21d5fe63",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "\"#input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv'\\ninput_user='test-data/input2.csv'\\ninput_clsi='test-data/clsi.csv'\\noutput_profile='test-data/input2_profile.csv'\\noutput_table='test-data/input2_table.csv'\\n#output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'\""
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "\"\"\"#input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv'\n",
-    "input_user='test-data/input2.csv'\n",
+    "\"\"\"input_user='~/Jupyterlab_notebook/ASIST_module/strain_profiles_16k.csv.csv'\n",
+    "#input_user='test-data/input2.csv'\n",
     "input_clsi='test-data/clsi.csv'\n",
     "output_profile='test-data/input2_profile.csv'\n",
-    "output_table='test-data/input2_table.csv'\n",
-    "#output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'\"\"\""
+    "#output_table='test-data/input2_table.csv'\n",
+    "output_table='/home/rakesh/Jupyterlab_notebook/ASIST_module/strain_profiles_16k_table.csv'\"\"\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 146,
+   "execution_count": 60,
    "id": "1e64b025",
    "metadata": {},
    "outputs": [],
@@ -211,7 +201,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 147,
+   "execution_count": 61,
    "id": "0d30ddc3",
    "metadata": {},
    "outputs": [],
@@ -223,7 +213,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 148,
+   "execution_count": 62,
    "id": "a818676d",
    "metadata": {},
    "outputs": [],
@@ -234,27 +224,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 149,
+   "execution_count": 64,
    "id": "c2aae757",
    "metadata": {},
    "outputs": [],
    "source": [
+    "# warn user for duplicate files\n",
     "input_dups=strain_mic[strain_mic[['Strain name','Antibiotics']].duplicated()]\n",
     "if (input_dups.shape[0] == 0):\n",
     "    #print( \"No duplicates\")\n",
     "    pass\n",
     "else:\n",
-    "    input_dups.to_csv(output_table,na_rep='NA')\n",
-    "    with open(output_table, \"a\") as file_object:\n",
+    "    with open(output_table, \"w\") as file_object:\n",
     "    # Append 'hello' at the end of file\n",
-    "        file_object.write('Input File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file')\n",
-    "    exit()\n",
-    "#input_dups.head()"
+    "        file_object.write('S.No.,Strain name,Antibiotics,MIC\\nInput File Error: Please remove duplicate/mutiple MIC values for same combination of Strain name and Antibiotics from input file\\n')\n",
+    "    input_dups.to_csv(output_table,na_rep='NA', mode='a')\n",
+    "    exit()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 125,
+   "execution_count": 17,
    "id": "c6b4c59b",
    "metadata": {},
    "outputs": [],
@@ -267,7 +257,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 126,
+   "execution_count": 18,
    "id": "d0171f94",
    "metadata": {},
    "outputs": [],
@@ -277,7 +267,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 127,
+   "execution_count": 19,
    "id": "fe45b2dd",
    "metadata": {},
    "outputs": [],
@@ -291,7 +281,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 128,
+   "execution_count": 20,
    "id": "ddbbe4d9",
    "metadata": {},
    "outputs": [],
@@ -301,7 +291,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 129,
+   "execution_count": 21,
    "id": "640508f1",
    "metadata": {},
    "outputs": [],
@@ -315,7 +305,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 130,
+   "execution_count": 22,
    "id": "b87426f4",
    "metadata": {},
    "outputs": [],
@@ -336,12 +326,55 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 131,
+   "execution_count": 23,
    "id": "91bfc94d",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Strain name</th>\n",
+       "      <th>Antibiotics</th>\n",
+       "      <th>MIC</th>\n",
+       "      <th>o_mic</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [Strain name, Antibiotics, MIC, o_mic]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "#result"
+    "dups.head()"
    ]
   },
   {