diff train_test_eval.xml @ 9:ead7adad8d0e draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
author bgruening
date Tue, 13 Apr 2021 18:45:35 +0000
parents 1b68acd5ac08
children 2eb5c017958d
line wrap: on
line diff
--- a/train_test_eval.xml	Fri Oct 02 08:43:15 2020 +0000
+++ b/train_test_eval.xml	Tue Apr 13 18:45:35 2021 +0000
@@ -1,11 +1,11 @@
-<tool id="sklearn_train_test_eval" name="Train, Test and Evaluation" version="@VERSION@">
+<tool id="sklearn_train_test_eval" name="Train, Test and Evaluation" version="@VERSION@" profile="20.05">
     <description>fit a model using part of dataset and evaluate using the rest</description>
     <macros>
         <import>main_macros.xml</import>
         <import>keras_macros.xml</import>
     </macros>
-    <expand macro="python_requirements"/>
-    <expand macro="macro_stdio"/>
+    <expand macro="python_requirements" />
+    <expand macro="macro_stdio" />
     <version_command>echo "@VERSION@"</version_command>
     <command detect_errors="aggressive">
         <![CDATA[
@@ -51,32 +51,32 @@
                 <option value="train_val_test">Train, Validate and Test</option>
             </param>
             <when value="train_test">
-                <expand macro="estimator_and_hyperparameter"/>
+                <expand macro="estimator_and_hyperparameter" />
                 <section name="test_split" title="Test holdout" expanded="false">
                     <expand macro="train_test_split_params">
-                        <expand macro="cv_groups"/>
+                        <expand macro="cv_groups" />
                     </expand>
                 </section>
                 <section name="metrics" title="Metrics for evaluation" expanded="false">
-                    <expand macro="scoring_selection"/>
+                    <expand macro="scoring_selection" />
                 </section>
             </when>
             <when value="train_val_test">
-                <expand macro="estimator_and_hyperparameter"/>
+                <expand macro="estimator_and_hyperparameter" />
                 <section name="test_split" title="Test holdout" expanded="false">
                     <expand macro="train_test_split_params">
-                        <expand macro="cv_groups"/>
+                        <expand macro="cv_groups" />
                     </expand>
                 </section>
                 <section name="val_split" title="Validation holdout (recommend using the same method for both validation and test)" expanded="false">
-                    <expand macro="train_test_split_params"/>
+                    <expand macro="train_test_split_params" />
                 </section>
                 <section name="metrics" title="Metrics for evaluation" expanded="false">
-                    <expand macro="scoring_selection"/>
+                    <expand macro="scoring_selection" />
                 </section>
             </when>
         </conditional>
-        <expand macro="sl_mixed_input_plus_sequence"/>
+        <expand macro="sl_mixed_input_plus_sequence" />
         <param name="save" type="select" label="Save the fitted model" help="For security reason, deep learning models will be saved into two datasets, model skeleton and weights.">
             <option value="nope" selected="true">Nope, save is unnecessary</option>
             <option value="save_estimator">Fitted whole estimator (excluding deep learning)</option>
@@ -84,7 +84,7 @@
         </param>
     </inputs>
     <outputs>
-        <data format="tabular" name="outfile_result"/>
+        <data format="tabular" name="outfile_result" />
         <data format="zip" name="outfile_object" label="Fitted estimator or estimator skeleton on ${on_string}">
             <filter>save != 'nope'</filter>
         </data>
@@ -95,175 +95,175 @@
     <tests>
         <test>
             <conditional name="experiment_schemes">
-                <param name="selected_exp_scheme" value="train_val_test"/>
-                <param name="infile_estimator" value="keras_model04" ftype="zip"/>
+                <param name="selected_exp_scheme" value="train_val_test" />
+                <param name="infile_estimator" value="keras_model04" ftype="zip" />
                 <section name="hyperparams_swapping">
-                    <param name="infile_params" value="keras_params04.tabular" ftype="tabular"/>
+                    <param name="infile_params" value="keras_params04.tabular" ftype="tabular" />
                     <repeat name="param_set">
-                        <param name="sp_value" value="999"/>
-                        <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed"/>
+                        <param name="sp_value" value="999" />
+                        <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed" />
                     </repeat>
                     <repeat name="param_set">
-                        <param name="sp_value" value="999"/>
-                        <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed"/>
+                        <param name="sp_value" value="999" />
+                        <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed" />
                     </repeat>
                     <repeat name="param_set">
-                        <param name="sp_value" value="0.1"/>
-                        <param name="sp_name" value="lr"/>
+                        <param name="sp_value" value="0.1" />
+                        <param name="sp_name" value="lr" />
                     </repeat>
                     <repeat name="param_set">
-                        <param name="sp_value" value="'adamax'"/>
-                        <param name="sp_name" value="optimizer"/>
+                        <param name="sp_value" value="'adamax'" />
+                        <param name="sp_name" value="optimizer" />
                     </repeat>
                 </section>
                 <section name="test_split">
                     <conditional name="split_algos">
-                        <param name="shuffle" value="simple"/>
-                        <param name="test_size" value="0.2"/>
-                        <param name="random_state" value="123"/>
+                        <param name="shuffle" value="simple" />
+                        <param name="test_size" value="0.2" />
+                        <param name="random_state" value="123" />
                     </conditional>
                 </section>
                 <section name="val_split">
                     <conditional name="split_algos">
-                        <param name="shuffle" value="simple"/>
-                        <param name="test_size" value="0.2"/>
-                        <param name="random_state" value="456"/>
+                        <param name="shuffle" value="simple" />
+                        <param name="test_size" value="0.2" />
+                        <param name="random_state" value="456" />
                     </conditional>
                 </section>
                 <section name="metrics">
                     <conditional name="scoring">
-                        <param name="primary_scoring" value="r2"/>
-                        <param name="secondary_scoring" value="neg_mean_absolute_error"/>
+                        <param name="primary_scoring" value="r2" />
+                        <param name="secondary_scoring" value="neg_mean_absolute_error" />
                     </conditional>
                 </section>
             </conditional>
-            <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
+            <param name="infile1" value="regression_X.tabular" ftype="tabular" />
             <param name="header1" value="true" />
-            <param name="selected_column_selector_option" value="all_columns"/>
-            <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
+            <param name="selected_column_selector_option" value="all_columns" />
+            <param name="infile2" value="regression_y.tabular" ftype="tabular" />
             <param name="header2" value="true" />
-            <param name="selected_column_selector_option2" value="all_columns"/>
-            <param name="save" value="save_weights"/>
+            <param name="selected_column_selector_option2" value="all_columns" />
+            <param name="save" value="save_weights" />
             <output name="outfile_result">
                 <assert_contents>
-                    <has_n_columns n="2"/>
-                    <has_text text="0.6626"/>
-                    <has_text text="5.598"/>
+                    <has_n_columns n="2" />
+                    <has_text text="0.6384" />
+                    <has_text text="-6.072" />
                 </assert_contents>
             </output>
-            <output name="outfile_object" file="train_test_eval_model01" compare="sim_size" delta="5"/>
-            <output name="outfile_weights" file="train_test_eval_weights01.h5" compare="sim_size" delta="5"/>
+            <output name="outfile_object" file="train_test_eval_model01" compare="sim_size" delta="5" />
+            <output name="outfile_weights" file="train_test_eval_weights01.h5" compare="sim_size" delta="5" />
         </test>
         <test>
             <conditional name="experiment_schemes">
-                <param name="selected_exp_scheme" value="train_val_test"/>
-                <param name="infile_estimator" value="keras_model04" ftype="zip"/>
+                <param name="selected_exp_scheme" value="train_val_test" />
+                <param name="infile_estimator" value="keras_model04" ftype="zip" />
                 <section name="hyperparams_swapping">
-                    <param name="infile_params" value="keras_params04.tabular" ftype="tabular"/>
+                    <param name="infile_params" value="keras_params04.tabular" ftype="tabular" />
                     <repeat name="param_set">
-                        <param name="sp_value" value="999"/>
-                        <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed"/>
+                        <param name="sp_value" value="999" />
+                        <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed" />
                     </repeat>
                     <repeat name="param_set">
-                        <param name="sp_value" value="999"/>
-                        <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed"/>
+                        <param name="sp_value" value="999" />
+                        <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed" />
                     </repeat>
                     <repeat name="param_set">
-                        <param name="sp_value" value="0.1"/>
-                        <param name="sp_name" value="lr"/>
+                        <param name="sp_value" value="0.1" />
+                        <param name="sp_name" value="lr" />
                     </repeat>
                     <repeat name="param_set">
-                        <param name="sp_value" value="'adamax'"/>
-                        <param name="sp_name" value="optimizer"/>
+                        <param name="sp_value" value="'adamax'" />
+                        <param name="sp_name" value="optimizer" />
                     </repeat>
                 </section>
                 <section name="test_split">
                     <conditional name="split_algos">
-                        <param name="shuffle" value="group"/>
-                        <param name="group_names" value="test"/>
+                        <param name="shuffle" value="group" />
+                        <param name="group_names" value="test" />
                         <section name="groups_selector">
-                            <param name="infile_g" value="regression_groups.tabular" ftype="tabular"/>
-                            <param name="header_g" value="true"/>
+                            <param name="infile_g" value="regression_groups.tabular" ftype="tabular" />
+                            <param name="header_g" value="true" />
                             <conditional name="column_selector_options_g">
-                                <param name="selected_column_selector_option_g" value="by_index_number"/>
-                                <param name="col_g" value="1"/>
+                                <param name="selected_column_selector_option_g" value="by_index_number" />
+                                <param name="col_g" value="1" />
                             </conditional>
                         </section>
                     </conditional>
                 </section>
                 <section name="val_split">
                     <conditional name="split_algos">
-                        <param name="shuffle" value="group"/>
-                        <param name="group_names" value="validation"/>
+                        <param name="shuffle" value="group" />
+                        <param name="group_names" value="validation" />
                     </conditional>
                 </section>
                 <section name="metrics">
                     <conditional name="scoring">
-                        <param name="primary_scoring" value="r2"/>
-                        <param name="secondary_scoring" value="neg_mean_absolute_error"/>
+                        <param name="primary_scoring" value="r2" />
+                        <param name="secondary_scoring" value="neg_mean_absolute_error" />
                     </conditional>
                 </section>
             </conditional>
-            <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
+            <param name="infile1" value="regression_X.tabular" ftype="tabular" />
             <param name="header1" value="true" />
-            <param name="selected_column_selector_option" value="all_columns"/>
-            <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
+            <param name="selected_column_selector_option" value="all_columns" />
+            <param name="infile2" value="regression_y.tabular" ftype="tabular" />
             <param name="header2" value="true" />
-            <param name="selected_column_selector_option2" value="all_columns"/>
-            <param name="save" value="save_weights"/>
-            <output name="outfile_result" >
+            <param name="selected_column_selector_option2" value="all_columns" />
+            <param name="save" value="save_weights" />
+            <output name="outfile_result">
                 <assert_contents>
-                    <has_n_columns n="2"/>
-                    <has_text text="0.667"/>
-                    <has_text text="-5.586"/>
+                    <has_n_columns n="2" />
+                    <has_text text="0.627" />
+                    <has_text text="-6.012" />
                 </assert_contents>
             </output>
-            <output name="outfile_weights" file="train_test_eval_weights02.h5" compare="sim_size" delta="5"/>
+            <output name="outfile_weights" file="train_test_eval_weights02.h5" compare="sim_size" delta="5" />
         </test>
         <test>
             <conditional name="experiment_schemes">
-                <param name="selected_exp_scheme" value="train_test"/>
-                <param name="infile_estimator" value="pipeline10" ftype="zip"/>
+                <param name="selected_exp_scheme" value="train_test" />
+                <param name="infile_estimator" value="pipeline10" ftype="zip" />
                 <section name="hyperparams_swapping">
-                    <param name="infile_params" value="get_params10.tabular" ftype="tabular"/>
+                    <param name="infile_params" value="get_params10.tabular" ftype="tabular" />
                     <repeat name="param_set">
-                        <param name="sp_value" value="10"/>
-                        <param name="sp_name" value="adaboostregressor__random_state"/>
+                        <param name="sp_value" value="10" />
+                        <param name="sp_name" value="adaboostregressor__random_state" />
                     </repeat>
                     <repeat name="param_set">
-                        <param name="sp_value" value=": sklearn_tree.ExtraTreeRegressor(random_state=0)"/>
-                        <param name="sp_name" value="adaboostregressor__base_estimator"/>
+                        <param name="sp_value" value=": sklearn_tree.ExtraTreeRegressor(random_state=0)" />
+                        <param name="sp_name" value="adaboostregressor__base_estimator" />
                     </repeat>
                 </section>
                 <section name="test_split">
                     <conditional name="split_algos">
-                        <param name="shuffle" value="simple"/>
-                        <param name="test_size" value="0.2"/>
-                        <param name="random_state" value="123"/>
+                        <param name="shuffle" value="simple" />
+                        <param name="test_size" value="0.2" />
+                        <param name="random_state" value="123" />
                     </conditional>
                 </section>
                 <section name="val_split">
                     <conditional name="split_algos">
-                        <param name="shuffle" value="simple"/>
-                        <param name="test_size" value="0.2"/>
-                        <param name="random_state" value="456"/>
+                        <param name="shuffle" value="simple" />
+                        <param name="test_size" value="0.2" />
+                        <param name="random_state" value="456" />
                     </conditional>
                 </section>
                 <section name="metrics">
                     <conditional name="scoring">
-                        <param name="primary_scoring" value="r2"/>
-                        <param name="secondary_scoring" value="neg_mean_absolute_error"/>
+                        <param name="primary_scoring" value="r2" />
+                        <param name="secondary_scoring" value="neg_mean_absolute_error" />
                     </conditional>
                 </section>
             </conditional>
-            <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
+            <param name="infile1" value="regression_X.tabular" ftype="tabular" />
             <param name="header1" value="true" />
-            <param name="selected_column_selector_option" value="all_columns"/>
-            <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
+            <param name="selected_column_selector_option" value="all_columns" />
+            <param name="infile2" value="regression_y.tabular" ftype="tabular" />
             <param name="header2" value="true" />
-            <param name="selected_column_selector_option2" value="all_columns"/>
-            <param name="save" value="nope"/>
-            <output name="outfile_result" file="train_test_eval03.tabular"/>
+            <param name="selected_column_selector_option2" value="all_columns" />
+            <param name="save" value="nope" />
+            <output name="outfile_result" file="train_test_eval03.tabular" />
         </test>
     </tests>
     <help>
@@ -283,8 +283,8 @@
         ]]>
     </help>
     <expand macro="sklearn_citation">
-        <expand macro="skrebate_citation"/>
-        <expand macro="xgboost_citation"/>
-        <expand macro="keras_citation"/>
+        <expand macro="skrebate_citation" />
+        <expand macro="xgboost_citation" />
+        <expand macro="keras_citation" />
     </expand>
 </tool>