diff deepmicro.xml @ 2:2d20b3a1babd draft default tip

planemo upload for repository https://github.com/paulzierep/DeepMicro commit 574cb8c241e18a15f006bf307235c7dd23f07c69
author iuc
date Tue, 23 Jul 2024 15:56:32 +0000
parents c58c1a99578b
children
line wrap: on
line diff
--- a/deepmicro.xml	Sat Jun 10 21:59:04 2023 +0000
+++ b/deepmicro.xml	Tue Jul 23 15:56:32 2024 +0000
@@ -18,9 +18,20 @@
             #for $params in $mode.parameter_set:
 
                 #if $params.rl_type.rl_type_choice == "--pca" or $params.rl_type.rl_type_choice == "--rp":
-                    DM.py -r 1 -cd features.csv '$params.rl_type.rl_type_choice' --save_rep --no_clf -t \${GALAXY_SLOTS:-8} &&
+                    DM.py -r 1 
+                    -cd features.csv 
+                    '$params.rl_type.rl_type_choice' 
+                    --save_rep 
+                    --no_clf 
+                    -t \${GALAXY_SLOTS:-8} &&
                 #else: 
-                    DM.py -r 1 -cd features.csv '$params.rl_type.rl_type_choice' -dm '$params.rl_type.dm' --save_rep --no_clf -t \${GALAXY_SLOTS:-8} &&
+                    DM.py -r 1 
+                    -cd features.csv 
+                    '$params.rl_type.rl_type_choice' 
+                    -dm '$params.rl_type.dm' 
+                    --save_rep 
+                    --no_clf 
+                    -t \${GALAXY_SLOTS:-8} &&
                 #end if
             #end for
 
@@ -30,14 +41,29 @@
 
             #for $params in $mode.parameter_set:
 
+                ## general args
+                DM.py
+                -r '$mode.repeat'
+                -cd features.csv 
+                -cl labels.csv 
+                --save_rep 
+                -m '$params.rl_type.classifier'  
+                --scoring '$mode.scoring'
+                -f '$mode.folds'
+                -t \${GALAXY_SLOTS:-8} 
+
                 ## only train classifier without encoding
                 #if $params.rl_type.rl_type_choice == "no_rl":
-                    DM.py -r 1 -cd features.csv -cl labels.csv --save_rep -m '$params.rl_type.classifier' -t \${GALAXY_SLOTS:-8} &&
+                    && echo "Only train Clf - no encoding!"
+                ## add rl type
                 #elif $params.rl_type.rl_type_choice == "--pca" or $params.rl_type.rl_type_choice == "--rp":
-                    DM.py -r 1 -cd features.csv -cl labels.csv '$params.rl_type.rl_type_choice' --save_rep -m '$params.rl_type.classifier' -t \${GALAXY_SLOTS:-8} &&
+                    '$params.rl_type.rl_type_choice' 
+                ## add rl type and dm options
                 #else: 
-                    DM.py -r 1 -cd features.csv -cl labels.csv '$params.rl_type.rl_type_choice' -dm '$params.rl_type.dm' --save_rep -m '$params.rl_type.classifier'  -t \${GALAXY_SLOTS:-8} &&
+                    '$params.rl_type.rl_type_choice' 
+                    -dm '$params.rl_type.dm' 
                 #end if
+                &&
             #end for
 
         #end if
@@ -59,9 +85,9 @@
                         <param name="rl_type_choice" type="select" label="Representation learning type" help="The type of representation learning" >
                             <option value="--pca">PCA</option>
                             <option value="--rp">Random Projection</option>
-                            <option value="--ae">Autoencoder or Deep Autoencoder</option>
-                            <option value="--vae">Variational Autoencoder</option>
-                            <option value="--cae">Convolutional Autoencoder</option>
+                            <option value="--ae">Shallow Autoencoder or Deep Autoencoder (SAE, DAE)</option>
+                            <option value="--vae">Variational Autoencoder (VAE)</option>
+                            <option value="--cae">Convolutional Autoencoder (CAE)</option>
                         </param>
                         <when value="--pca"/>
                         <when value="--rp"/>
@@ -79,14 +105,23 @@
             </when>
             <when value="e_and_c">
                 <param argument="--class_labels" type="data" format="tabular" label="Class labels" help="Dataset containing the class labels corresponding to the features"/>
+                <param name="scoring" type="select" label="Scoring function for the classifiere" help="The classifiere will be optimized for this scoring function." >
+                    <option value="roc_auc">ROC AUC</option>
+                    <option value="accuracy">Accuracy</option>
+                    <option value="f1">F1 Score</option>
+                    <option value="recall">Recall</option>
+                    <option value="precision">Precision</option>
+                </param>   
+                <param name="folds" type="integer" value="5" label="Cross-validation folds" min="2" max="10" help="The number of folds for cross-validation in the tranining set"/>
+                <param name="repeat" type="integer" value="1" label="Repeat the experiment with different seed" min="1" max="5" help="Repeat the experiment with different seeds. Leads to a different train / test split each time."/>
                 <repeat name="parameter_set" title="Parameter Set">
                     <conditional name="rl_type">
                         <param name="rl_type_choice" type="select" label="Representation learning type" help="The type of representation learning. `Train on input` trains the classifier on the input features without representation learning" >
                             <option value="--pca">PCA</option>
                             <option value="--rp">Random Projection</option>
-                            <option value="--ae">Autoencoder or Deep Autoencoder</option>
-                            <option value="--vae">Variational Autoencoder</option>
-                            <option value="--cae">Convolutional Autoencoder</option>
+                            <option value="--ae">Shallow Autoencoder or Deep Autoencoder (SAE, DAE)</option>
+                            <option value="--vae">Variational Autoencoder (VAE)</option>
+                            <option value="--cae">Convolutional Autoencoder (CAE)</option>
                             <option value="no_rl">Train on input</option>
                         </param>
                         <when value="no_rl">
@@ -126,13 +161,19 @@
             <filter>mode["mode_type"] == "only_encoding"</filter>
             <discover_datasets directory="results" pattern="(?P&lt;designation&gt;.*)_rep\.csv" format="tabular" visible="false" />
         </collection>
+        <collection name="model" type="list" label="Keras Models">
+            <!-- the encoded features generated by the tool are only for the training set, this is not very useful, therefore omitting
+            todo change tool do export features complete dataset also when classification is performed -->
+            <filter>mode["mode_type"] == "only_encoding"</filter>
+            <discover_datasets directory="." pattern="(?P&lt;designation&gt;.*).h5" format="data" visible="false" />
+        </collection>
     </outputs>
     <tests>
 
     <!-- only encoding -->
     <!-- test one parameter sets -->
 
-        <test expect_num_outputs="1">
+        <test expect_num_outputs="2">
             <param name="mode_type" value="only_encoding" />
             <param name="features" value="UserDataExample.csv" />
             <param name="rl_type_choice" value="--ae" />
@@ -148,7 +189,7 @@
             </output_collection>
         </test>
 
-        <test expect_num_outputs="1">
+        <test expect_num_outputs="2">
             <param name="mode_type" value="only_encoding" />
             <param name="features" value="UserDataExample.csv" />
             <param name="rl_type_choice" value="--pca" />
@@ -162,8 +203,27 @@
             </output_collection>
         </test>
 
+        <!-- only encoding -->
+
+        <test expect_num_outputs="2">
+            <param name="mode_type" value="only_encoding" />
+            <param name="features" value="UserDataExample.csv" />
+            <param name="rl_type_choice" value="--ae" />
+            <param name="dm" value="40" />
+
+            <output_collection name="encoded_features" type="list">
+                <!-- output is non determinisitc -->
+                <element name="AE[40]_features" ftype="tabular" >
+                    <assert_contents>
+                        <has_n_lines n="20"/>
+                        <!-- <has_n_columns n="40" sep="," /> -->
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+
         <!-- test multiple parameter sets -->
-        <test expect_num_outputs="1">
+        <test expect_num_outputs="2">
             <param name="features" value="UserDataExample.csv" />
             <conditional name="mode">
                 <param name="mode_type" value="only_encoding" />
@@ -202,13 +262,16 @@
 
         <!-- encoding and clf -->
         <!-- test one parameter set -->
-
+        <!-- test additional parameters scoring / folds and repeat -->
         <test expect_num_outputs="1">
             <param name="features" value="UserDataExample.csv" />
             <param name="mode_type" value="e_and_c" />
             <param name="class_labels" value="UserLabelExample.csv" />
             <param name="rl_type_choice" value="--vae" />
             <param name="dm" value="40" />
+            <param name="scoring" value="roc_auc" />
+            <param name="folds" value="2" />
+            <param name="repeat" value="2" />
             <param name="classifier" value="rf" /> 
             <output ftype="tabular" name="results" >
                 <assert_contents>
@@ -218,6 +281,8 @@
 
         </test>
 
+
+        <!-- no rl -->
         <test expect_num_outputs="1">
             <param name="features" value="UserDataExample.csv" />
             <param name="mode_type" value="e_and_c" />