Mercurial > repos > bgruening > biomodels_biomd0000001066

--- a/biomodels_BIOMD0000001066.xml	Tue Mar 12 17:03:04 2024 +0000
+++ b/biomodels_BIOMD0000001066.xml	Wed Mar 13 08:48:03 2024 +0000
@@ -41,36 +41,36 @@
         </configfile>
     </configfiles>
     <inputs>
-        <param name="input_file" type="data" label="Test data" format="tabular,csv,xlsx" multiple="false"/>
+	<param name="input_file" type="data" label="Test data" format="tabular,csv,xlsx" multiple="false" />
     </inputs>
     <outputs>
-        <data format="tabular" name="output_file" label="Predicted data"/>
+        <data format="tabular" name="output_file" label="Predicted data"></data>
     </outputs>
     <tests>
         <test>
-            <param name="input_file" value="test_data.tabular" ftype="tabular"/>
+            <param name="input_file" value="test_data.tabular" ftype="tabular" />
             <output name="output_file" file="pred_data.tabular">
                 <assert_contents>
-                    <has_n_columns n="17"/>
-                    <has_n_lines n="296"/>
+                    <has_n_columns n="17" />
+                    <has_n_lines n="296" />
                 </assert_contents>
             </output>
         </test>
         <test>
-            <param name="input_file" value="test_data.csv" ftype="csv"/>
+            <param name="input_file" value="test_data.csv" ftype="csv" />
             <output name="output_file" file="pred_data.tabular">
                 <assert_contents>
-                    <has_n_columns n="17"/>
-                    <has_n_lines n="296"/>
+                    <has_n_columns n="17" />
+                    <has_n_lines n="296" />
                 </assert_contents>
             </output>
         </test>
         <test>
-            <param name="input_file" value="test_data.xlsx" ftype="xlsx"/>
+            <param name="input_file" value="test_data.xlsx" ftype="xlsx" />
             <output name="output_file" file="pred_data.tabular">
                 <assert_contents>
-                    <has_n_columns n="17"/>
-                    <has_n_lines n="296"/>
+                    <has_n_columns n="17" />
+                    <has_n_lines n="296" />
                 </assert_contents>
             </output>
         </test>
--- a/biomodels_BIOMD0000001076.xml	Tue Mar 12 17:03:04 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,93 +0,0 @@
-<tool id="biomodels_BIOMD0000001076" name="Prediction of Breast Cancer (BC)" version="@VERSION@" profile="23.05">
-    <description>proteins involved in cancer immunotherapy using molecular descriptors and Multi Layer Perceptron (MLP) neural network</description>
-    <macros>
-        <token name="@VERSION@">1</token>
-    </macros>
-    <creator>
-        <organization name="EMBL's European Bioinformatics Institute (EMBL-EBI)" url="https://www.ebi.ac.uk/biomodels/" email="biomodels-net-support@lists.sf.net"/>
-        <person name="Anup Kumar" email="kumara@informatik.uni-freiburg.de"/>
-    </creator>
-    <requirements>
-        <container type="docker">docker.io/anupkumar/biomodels_biomd0000001076:@VERSION@</container>
-    </requirements>
-    <command><![CDATA[
-    cp /home/\$NB_USER/MLP_model3.onnx ./ &&
-    python '$biom_script_BIOMD0000001076'
-]]>
-    </command>
-    <configfiles>
-        <configfile name="biom_script_BIOMD0000001076">
-            <![CDATA[
-import os
-import numpy
-import onnxruntime as rt
-import pandas as pd
-
-#if $input_file.ext == "tabular":
-data_test = pd.read_csv('$input_file', sep="\t")
-#elif $input_file.ext == "csv":
-data_test = pd.read_csv('$input_file', sep=",")
-#end if
-
-Xdatap = data_test.iloc[:,:-1].values
-sess = rt.InferenceSession(os.getcwd() + "/MLP_model3.onnx")
-input_name = sess.get_inputs()[0].name
-label_name = sess.get_outputs()[0].name
-pred_onx = sess.run([label_name], {input_name: Xdatap.astype(numpy.float32)})[0]
-data_test["Predictions"] = pred_onx
-data_test.to_csv('$output_file', sep="\t", index=None)
-
-]]>
-        </configfile>
-    </configfiles>
-    <inputs>
-        <param name="input_file" type="data" label="Test data" format="tabular,csv" multiple="false"/>
-    </inputs>
-    <outputs>
-        <data format="tabular" name="output_file" label="Predicted data"/>
-    </outputs>
-    <tests>
-        <test>
-            <param name="input_file" value="test_data_BIOMD0000001076.tabular" ftype="tabular"/>
-            <output name="output_file" file="pred_data_BIOMD0000001076.tabular">
-                <assert_contents>
-                    <has_n_columns n="302"/>
-                    <has_n_lines n="401"/>
-                </assert_contents>
-            </output>
-        </test>
-        <test>
-            <param name="input_file" value="test_data_BIOMD0000001076.csv" ftype="csv"/>
-            <output name="output_file" file="pred_data_BIOMD0000001076.tabular">
-                <assert_contents>
-                    <has_n_columns n="302"/>
-                    <has_n_lines n="401"/>
-                </assert_contents>
-            </output>
-        </test>
-    </tests>
-    <help>
-        <![CDATA[
-**What it does**
-
-The tool makes prediction of breast cancer-related proteins: non-breast cancer related protein (0) or related to breast cancer protein (1). 300 features are used to train a Multi Layer Perceptron (artificial neural network) for such a prediction. The model is available via a Docker container. The original GitHub repository can be accessed at https://github.com/muntisa/neural-networks-for-breast-cancer-proteins.
-
-
-**Description**
-
-This study introduces a predictive classifier for breast cancer-related proteins, utilising a combination of protein sequence descriptors and machine learning techniques. The best-performing classifier is a Multi Layer Perceptron (artificial neural network) with 300 features, achieving an average Area Under the Receiver Operating Characteristics (AUROC) score of 0.984 through 3-fold cross-validation. Notably, the model identified top-ranked cancer immunotherapy proteins associated with breast cancer that should be studied for further biomarker discovery and therapeutic targeting. Please note that in this model, the output '0' means BC non-related protein and '1' means BC related protein.
-
-**Input file**
-
-Provide a tabular file (as tabular or csv) containing all the features as mentioned above.
-
-**Output file**
-
-Returns predicted data as a tabular file - the predictions are concatenated as a last column of the test data.
-
-        ]]>
-    </help>
-    <citations>
-        <citation type="doi">10.1038/s41598-020-65584-y</citation>
-    </citations>
-</tool>