comparison train_test_eval.xml @ 9:ead7adad8d0e draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
author bgruening
date Tue, 13 Apr 2021 18:45:35 +0000
parents 1b68acd5ac08
children 2eb5c017958d
comparison
equal deleted inserted replaced
8:e03a58b31c12 9:ead7adad8d0e
1 <tool id="sklearn_train_test_eval" name="Train, Test and Evaluation" version="@VERSION@"> 1 <tool id="sklearn_train_test_eval" name="Train, Test and Evaluation" version="@VERSION@" profile="20.05">
2 <description>fit a model using part of dataset and evaluate using the rest</description> 2 <description>fit a model using part of dataset and evaluate using the rest</description>
3 <macros> 3 <macros>
4 <import>main_macros.xml</import> 4 <import>main_macros.xml</import>
5 <import>keras_macros.xml</import> 5 <import>keras_macros.xml</import>
6 </macros> 6 </macros>
7 <expand macro="python_requirements"/> 7 <expand macro="python_requirements" />
8 <expand macro="macro_stdio"/> 8 <expand macro="macro_stdio" />
9 <version_command>echo "@VERSION@"</version_command> 9 <version_command>echo "@VERSION@"</version_command>
10 <command detect_errors="aggressive"> 10 <command detect_errors="aggressive">
11 <![CDATA[ 11 <![CDATA[
12 export HDF5_USE_FILE_LOCKING='FALSE'; 12 export HDF5_USE_FILE_LOCKING='FALSE';
13 #if $input_options.selected_input == 'refseq_and_interval' 13 #if $input_options.selected_input == 'refseq_and_interval'
49 <param name="selected_exp_scheme" type="select" label="Select a scheme"> 49 <param name="selected_exp_scheme" type="select" label="Select a scheme">
50 <option value="train_test" selected="true">Train and Test</option> 50 <option value="train_test" selected="true">Train and Test</option>
51 <option value="train_val_test">Train, Validate and Test</option> 51 <option value="train_val_test">Train, Validate and Test</option>
52 </param> 52 </param>
53 <when value="train_test"> 53 <when value="train_test">
54 <expand macro="estimator_and_hyperparameter"/> 54 <expand macro="estimator_and_hyperparameter" />
55 <section name="test_split" title="Test holdout" expanded="false"> 55 <section name="test_split" title="Test holdout" expanded="false">
56 <expand macro="train_test_split_params"> 56 <expand macro="train_test_split_params">
57 <expand macro="cv_groups"/> 57 <expand macro="cv_groups" />
58 </expand> 58 </expand>
59 </section> 59 </section>
60 <section name="metrics" title="Metrics for evaluation" expanded="false"> 60 <section name="metrics" title="Metrics for evaluation" expanded="false">
61 <expand macro="scoring_selection"/> 61 <expand macro="scoring_selection" />
62 </section> 62 </section>
63 </when> 63 </when>
64 <when value="train_val_test"> 64 <when value="train_val_test">
65 <expand macro="estimator_and_hyperparameter"/> 65 <expand macro="estimator_and_hyperparameter" />
66 <section name="test_split" title="Test holdout" expanded="false"> 66 <section name="test_split" title="Test holdout" expanded="false">
67 <expand macro="train_test_split_params"> 67 <expand macro="train_test_split_params">
68 <expand macro="cv_groups"/> 68 <expand macro="cv_groups" />
69 </expand> 69 </expand>
70 </section> 70 </section>
71 <section name="val_split" title="Validation holdout (recommend using the same method for both validation and test)" expanded="false"> 71 <section name="val_split" title="Validation holdout (recommend using the same method for both validation and test)" expanded="false">
72 <expand macro="train_test_split_params"/> 72 <expand macro="train_test_split_params" />
73 </section> 73 </section>
74 <section name="metrics" title="Metrics for evaluation" expanded="false"> 74 <section name="metrics" title="Metrics for evaluation" expanded="false">
75 <expand macro="scoring_selection"/> 75 <expand macro="scoring_selection" />
76 </section> 76 </section>
77 </when> 77 </when>
78 </conditional> 78 </conditional>
79 <expand macro="sl_mixed_input_plus_sequence"/> 79 <expand macro="sl_mixed_input_plus_sequence" />
80 <param name="save" type="select" label="Save the fitted model" help="For security reason, deep learning models will be saved into two datasets, model skeleton and weights."> 80 <param name="save" type="select" label="Save the fitted model" help="For security reason, deep learning models will be saved into two datasets, model skeleton and weights.">
81 <option value="nope" selected="true">Nope, save is unnecessary</option> 81 <option value="nope" selected="true">Nope, save is unnecessary</option>
82 <option value="save_estimator">Fitted whole estimator (excluding deep learning)</option> 82 <option value="save_estimator">Fitted whole estimator (excluding deep learning)</option>
83 <option value="save_weights">Model skeleton and weights, for deep learning exclusively</option> 83 <option value="save_weights">Model skeleton and weights, for deep learning exclusively</option>
84 </param> 84 </param>
85 </inputs> 85 </inputs>
86 <outputs> 86 <outputs>
87 <data format="tabular" name="outfile_result"/> 87 <data format="tabular" name="outfile_result" />
88 <data format="zip" name="outfile_object" label="Fitted estimator or estimator skeleton on ${on_string}"> 88 <data format="zip" name="outfile_object" label="Fitted estimator or estimator skeleton on ${on_string}">
89 <filter>save != 'nope'</filter> 89 <filter>save != 'nope'</filter>
90 </data> 90 </data>
91 <data format="h5" name="outfile_weights" label="Weights trained on ${on_string}"> 91 <data format="h5" name="outfile_weights" label="Weights trained on ${on_string}">
92 <filter>save == 'save_weights'</filter> 92 <filter>save == 'save_weights'</filter>
93 </data> 93 </data>
94 </outputs> 94 </outputs>
95 <tests> 95 <tests>
96 <test> 96 <test>
97 <conditional name="experiment_schemes"> 97 <conditional name="experiment_schemes">
98 <param name="selected_exp_scheme" value="train_val_test"/> 98 <param name="selected_exp_scheme" value="train_val_test" />
99 <param name="infile_estimator" value="keras_model04" ftype="zip"/> 99 <param name="infile_estimator" value="keras_model04" ftype="zip" />
100 <section name="hyperparams_swapping"> 100 <section name="hyperparams_swapping">
101 <param name="infile_params" value="keras_params04.tabular" ftype="tabular"/> 101 <param name="infile_params" value="keras_params04.tabular" ftype="tabular" />
102 <repeat name="param_set"> 102 <repeat name="param_set">
103 <param name="sp_value" value="999"/> 103 <param name="sp_value" value="999" />
104 <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed"/> 104 <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed" />
105 </repeat> 105 </repeat>
106 <repeat name="param_set"> 106 <repeat name="param_set">
107 <param name="sp_value" value="999"/> 107 <param name="sp_value" value="999" />
108 <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed"/> 108 <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed" />
109 </repeat> 109 </repeat>
110 <repeat name="param_set"> 110 <repeat name="param_set">
111 <param name="sp_value" value="0.1"/> 111 <param name="sp_value" value="0.1" />
112 <param name="sp_name" value="lr"/> 112 <param name="sp_name" value="lr" />
113 </repeat> 113 </repeat>
114 <repeat name="param_set"> 114 <repeat name="param_set">
115 <param name="sp_value" value="'adamax'"/> 115 <param name="sp_value" value="'adamax'" />
116 <param name="sp_name" value="optimizer"/> 116 <param name="sp_name" value="optimizer" />
117 </repeat> 117 </repeat>
118 </section> 118 </section>
119 <section name="test_split"> 119 <section name="test_split">
120 <conditional name="split_algos"> 120 <conditional name="split_algos">
121 <param name="shuffle" value="simple"/> 121 <param name="shuffle" value="simple" />
122 <param name="test_size" value="0.2"/> 122 <param name="test_size" value="0.2" />
123 <param name="random_state" value="123"/> 123 <param name="random_state" value="123" />
124 </conditional> 124 </conditional>
125 </section> 125 </section>
126 <section name="val_split"> 126 <section name="val_split">
127 <conditional name="split_algos"> 127 <conditional name="split_algos">
128 <param name="shuffle" value="simple"/> 128 <param name="shuffle" value="simple" />
129 <param name="test_size" value="0.2"/> 129 <param name="test_size" value="0.2" />
130 <param name="random_state" value="456"/> 130 <param name="random_state" value="456" />
131 </conditional> 131 </conditional>
132 </section> 132 </section>
133 <section name="metrics"> 133 <section name="metrics">
134 <conditional name="scoring"> 134 <conditional name="scoring">
135 <param name="primary_scoring" value="r2"/> 135 <param name="primary_scoring" value="r2" />
136 <param name="secondary_scoring" value="neg_mean_absolute_error"/> 136 <param name="secondary_scoring" value="neg_mean_absolute_error" />
137 </conditional> 137 </conditional>
138 </section> 138 </section>
139 </conditional> 139 </conditional>
140 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 140 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
141 <param name="header1" value="true" /> 141 <param name="header1" value="true" />
142 <param name="selected_column_selector_option" value="all_columns"/> 142 <param name="selected_column_selector_option" value="all_columns" />
143 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 143 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
144 <param name="header2" value="true" /> 144 <param name="header2" value="true" />
145 <param name="selected_column_selector_option2" value="all_columns"/> 145 <param name="selected_column_selector_option2" value="all_columns" />
146 <param name="save" value="save_weights"/> 146 <param name="save" value="save_weights" />
147 <output name="outfile_result"> 147 <output name="outfile_result">
148 <assert_contents> 148 <assert_contents>
149 <has_n_columns n="2"/> 149 <has_n_columns n="2" />
150 <has_text text="0.6626"/> 150 <has_text text="0.6384" />
151 <has_text text="5.598"/> 151 <has_text text="-6.072" />
152 </assert_contents> 152 </assert_contents>
153 </output> 153 </output>
154 <output name="outfile_object" file="train_test_eval_model01" compare="sim_size" delta="5"/> 154 <output name="outfile_object" file="train_test_eval_model01" compare="sim_size" delta="5" />
155 <output name="outfile_weights" file="train_test_eval_weights01.h5" compare="sim_size" delta="5"/> 155 <output name="outfile_weights" file="train_test_eval_weights01.h5" compare="sim_size" delta="5" />
156 </test> 156 </test>
157 <test> 157 <test>
158 <conditional name="experiment_schemes"> 158 <conditional name="experiment_schemes">
159 <param name="selected_exp_scheme" value="train_val_test"/> 159 <param name="selected_exp_scheme" value="train_val_test" />
160 <param name="infile_estimator" value="keras_model04" ftype="zip"/> 160 <param name="infile_estimator" value="keras_model04" ftype="zip" />
161 <section name="hyperparams_swapping"> 161 <section name="hyperparams_swapping">
162 <param name="infile_params" value="keras_params04.tabular" ftype="tabular"/> 162 <param name="infile_params" value="keras_params04.tabular" ftype="tabular" />
163 <repeat name="param_set"> 163 <repeat name="param_set">
164 <param name="sp_value" value="999"/> 164 <param name="sp_value" value="999" />
165 <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed"/> 165 <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed" />
166 </repeat> 166 </repeat>
167 <repeat name="param_set"> 167 <repeat name="param_set">
168 <param name="sp_value" value="999"/> 168 <param name="sp_value" value="999" />
169 <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed"/> 169 <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed" />
170 </repeat> 170 </repeat>
171 <repeat name="param_set"> 171 <repeat name="param_set">
172 <param name="sp_value" value="0.1"/> 172 <param name="sp_value" value="0.1" />
173 <param name="sp_name" value="lr"/> 173 <param name="sp_name" value="lr" />
174 </repeat> 174 </repeat>
175 <repeat name="param_set"> 175 <repeat name="param_set">
176 <param name="sp_value" value="'adamax'"/> 176 <param name="sp_value" value="'adamax'" />
177 <param name="sp_name" value="optimizer"/> 177 <param name="sp_name" value="optimizer" />
178 </repeat> 178 </repeat>
179 </section> 179 </section>
180 <section name="test_split"> 180 <section name="test_split">
181 <conditional name="split_algos"> 181 <conditional name="split_algos">
182 <param name="shuffle" value="group"/> 182 <param name="shuffle" value="group" />
183 <param name="group_names" value="test"/> 183 <param name="group_names" value="test" />
184 <section name="groups_selector"> 184 <section name="groups_selector">
185 <param name="infile_g" value="regression_groups.tabular" ftype="tabular"/> 185 <param name="infile_g" value="regression_groups.tabular" ftype="tabular" />
186 <param name="header_g" value="true"/> 186 <param name="header_g" value="true" />
187 <conditional name="column_selector_options_g"> 187 <conditional name="column_selector_options_g">
188 <param name="selected_column_selector_option_g" value="by_index_number"/> 188 <param name="selected_column_selector_option_g" value="by_index_number" />
189 <param name="col_g" value="1"/> 189 <param name="col_g" value="1" />
190 </conditional> 190 </conditional>
191 </section> 191 </section>
192 </conditional> 192 </conditional>
193 </section> 193 </section>
194 <section name="val_split"> 194 <section name="val_split">
195 <conditional name="split_algos"> 195 <conditional name="split_algos">
196 <param name="shuffle" value="group"/> 196 <param name="shuffle" value="group" />
197 <param name="group_names" value="validation"/> 197 <param name="group_names" value="validation" />
198 </conditional> 198 </conditional>
199 </section> 199 </section>
200 <section name="metrics"> 200 <section name="metrics">
201 <conditional name="scoring"> 201 <conditional name="scoring">
202 <param name="primary_scoring" value="r2"/> 202 <param name="primary_scoring" value="r2" />
203 <param name="secondary_scoring" value="neg_mean_absolute_error"/> 203 <param name="secondary_scoring" value="neg_mean_absolute_error" />
204 </conditional> 204 </conditional>
205 </section> 205 </section>
206 </conditional> 206 </conditional>
207 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 207 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
208 <param name="header1" value="true" /> 208 <param name="header1" value="true" />
209 <param name="selected_column_selector_option" value="all_columns"/> 209 <param name="selected_column_selector_option" value="all_columns" />
210 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 210 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
211 <param name="header2" value="true" /> 211 <param name="header2" value="true" />
212 <param name="selected_column_selector_option2" value="all_columns"/> 212 <param name="selected_column_selector_option2" value="all_columns" />
213 <param name="save" value="save_weights"/> 213 <param name="save" value="save_weights" />
214 <output name="outfile_result" > 214 <output name="outfile_result">
215 <assert_contents> 215 <assert_contents>
216 <has_n_columns n="2"/> 216 <has_n_columns n="2" />
217 <has_text text="0.667"/> 217 <has_text text="0.627" />
218 <has_text text="-5.586"/> 218 <has_text text="-6.012" />
219 </assert_contents> 219 </assert_contents>
220 </output> 220 </output>
221 <output name="outfile_weights" file="train_test_eval_weights02.h5" compare="sim_size" delta="5"/> 221 <output name="outfile_weights" file="train_test_eval_weights02.h5" compare="sim_size" delta="5" />
222 </test> 222 </test>
223 <test> 223 <test>
224 <conditional name="experiment_schemes"> 224 <conditional name="experiment_schemes">
225 <param name="selected_exp_scheme" value="train_test"/> 225 <param name="selected_exp_scheme" value="train_test" />
226 <param name="infile_estimator" value="pipeline10" ftype="zip"/> 226 <param name="infile_estimator" value="pipeline10" ftype="zip" />
227 <section name="hyperparams_swapping"> 227 <section name="hyperparams_swapping">
228 <param name="infile_params" value="get_params10.tabular" ftype="tabular"/> 228 <param name="infile_params" value="get_params10.tabular" ftype="tabular" />
229 <repeat name="param_set"> 229 <repeat name="param_set">
230 <param name="sp_value" value="10"/> 230 <param name="sp_value" value="10" />
231 <param name="sp_name" value="adaboostregressor__random_state"/> 231 <param name="sp_name" value="adaboostregressor__random_state" />
232 </repeat> 232 </repeat>
233 <repeat name="param_set"> 233 <repeat name="param_set">
234 <param name="sp_value" value=": sklearn_tree.ExtraTreeRegressor(random_state=0)"/> 234 <param name="sp_value" value=": sklearn_tree.ExtraTreeRegressor(random_state=0)" />
235 <param name="sp_name" value="adaboostregressor__base_estimator"/> 235 <param name="sp_name" value="adaboostregressor__base_estimator" />
236 </repeat> 236 </repeat>
237 </section> 237 </section>
238 <section name="test_split"> 238 <section name="test_split">
239 <conditional name="split_algos"> 239 <conditional name="split_algos">
240 <param name="shuffle" value="simple"/> 240 <param name="shuffle" value="simple" />
241 <param name="test_size" value="0.2"/> 241 <param name="test_size" value="0.2" />
242 <param name="random_state" value="123"/> 242 <param name="random_state" value="123" />
243 </conditional> 243 </conditional>
244 </section> 244 </section>
245 <section name="val_split"> 245 <section name="val_split">
246 <conditional name="split_algos"> 246 <conditional name="split_algos">
247 <param name="shuffle" value="simple"/> 247 <param name="shuffle" value="simple" />
248 <param name="test_size" value="0.2"/> 248 <param name="test_size" value="0.2" />
249 <param name="random_state" value="456"/> 249 <param name="random_state" value="456" />
250 </conditional> 250 </conditional>
251 </section> 251 </section>
252 <section name="metrics"> 252 <section name="metrics">
253 <conditional name="scoring"> 253 <conditional name="scoring">
254 <param name="primary_scoring" value="r2"/> 254 <param name="primary_scoring" value="r2" />
255 <param name="secondary_scoring" value="neg_mean_absolute_error"/> 255 <param name="secondary_scoring" value="neg_mean_absolute_error" />
256 </conditional> 256 </conditional>
257 </section> 257 </section>
258 </conditional> 258 </conditional>
259 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 259 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
260 <param name="header1" value="true" /> 260 <param name="header1" value="true" />
261 <param name="selected_column_selector_option" value="all_columns"/> 261 <param name="selected_column_selector_option" value="all_columns" />
262 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 262 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
263 <param name="header2" value="true" /> 263 <param name="header2" value="true" />
264 <param name="selected_column_selector_option2" value="all_columns"/> 264 <param name="selected_column_selector_option2" value="all_columns" />
265 <param name="save" value="nope"/> 265 <param name="save" value="nope" />
266 <output name="outfile_result" file="train_test_eval03.tabular"/> 266 <output name="outfile_result" file="train_test_eval03.tabular" />
267 </test> 267 </test>
268 </tests> 268 </tests>
269 <help> 269 <help>
270 <![CDATA[ 270 <![CDATA[
271 **What it does** 271 **What it does**
281 Performance scores. 281 Performance scores.
282 282
283 ]]> 283 ]]>
284 </help> 284 </help>
285 <expand macro="sklearn_citation"> 285 <expand macro="sklearn_citation">
286 <expand macro="skrebate_citation"/> 286 <expand macro="skrebate_citation" />
287 <expand macro="xgboost_citation"/> 287 <expand macro="xgboost_citation" />
288 <expand macro="keras_citation"/> 288 <expand macro="keras_citation" />
289 </expand> 289 </expand>
290 </tool> 290 </tool>