Mercurial > repos > bgruening > sklearn_train_test_eval
comparison train_test_eval.xml @ 9:ead7adad8d0e draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
author | bgruening |
---|---|
date | Tue, 13 Apr 2021 18:45:35 +0000 |
parents | 1b68acd5ac08 |
children | 2eb5c017958d |
comparison
equal
deleted
inserted
replaced
8:e03a58b31c12 | 9:ead7adad8d0e |
---|---|
1 <tool id="sklearn_train_test_eval" name="Train, Test and Evaluation" version="@VERSION@"> | 1 <tool id="sklearn_train_test_eval" name="Train, Test and Evaluation" version="@VERSION@" profile="20.05"> |
2 <description>fit a model using part of dataset and evaluate using the rest</description> | 2 <description>fit a model using part of dataset and evaluate using the rest</description> |
3 <macros> | 3 <macros> |
4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
5 <import>keras_macros.xml</import> | 5 <import>keras_macros.xml</import> |
6 </macros> | 6 </macros> |
7 <expand macro="python_requirements"/> | 7 <expand macro="python_requirements" /> |
8 <expand macro="macro_stdio"/> | 8 <expand macro="macro_stdio" /> |
9 <version_command>echo "@VERSION@"</version_command> | 9 <version_command>echo "@VERSION@"</version_command> |
10 <command detect_errors="aggressive"> | 10 <command detect_errors="aggressive"> |
11 <![CDATA[ | 11 <![CDATA[ |
12 export HDF5_USE_FILE_LOCKING='FALSE'; | 12 export HDF5_USE_FILE_LOCKING='FALSE'; |
13 #if $input_options.selected_input == 'refseq_and_interval' | 13 #if $input_options.selected_input == 'refseq_and_interval' |
49 <param name="selected_exp_scheme" type="select" label="Select a scheme"> | 49 <param name="selected_exp_scheme" type="select" label="Select a scheme"> |
50 <option value="train_test" selected="true">Train and Test</option> | 50 <option value="train_test" selected="true">Train and Test</option> |
51 <option value="train_val_test">Train, Validate and Test</option> | 51 <option value="train_val_test">Train, Validate and Test</option> |
52 </param> | 52 </param> |
53 <when value="train_test"> | 53 <when value="train_test"> |
54 <expand macro="estimator_and_hyperparameter"/> | 54 <expand macro="estimator_and_hyperparameter" /> |
55 <section name="test_split" title="Test holdout" expanded="false"> | 55 <section name="test_split" title="Test holdout" expanded="false"> |
56 <expand macro="train_test_split_params"> | 56 <expand macro="train_test_split_params"> |
57 <expand macro="cv_groups"/> | 57 <expand macro="cv_groups" /> |
58 </expand> | 58 </expand> |
59 </section> | 59 </section> |
60 <section name="metrics" title="Metrics for evaluation" expanded="false"> | 60 <section name="metrics" title="Metrics for evaluation" expanded="false"> |
61 <expand macro="scoring_selection"/> | 61 <expand macro="scoring_selection" /> |
62 </section> | 62 </section> |
63 </when> | 63 </when> |
64 <when value="train_val_test"> | 64 <when value="train_val_test"> |
65 <expand macro="estimator_and_hyperparameter"/> | 65 <expand macro="estimator_and_hyperparameter" /> |
66 <section name="test_split" title="Test holdout" expanded="false"> | 66 <section name="test_split" title="Test holdout" expanded="false"> |
67 <expand macro="train_test_split_params"> | 67 <expand macro="train_test_split_params"> |
68 <expand macro="cv_groups"/> | 68 <expand macro="cv_groups" /> |
69 </expand> | 69 </expand> |
70 </section> | 70 </section> |
71 <section name="val_split" title="Validation holdout (recommend using the same method for both validation and test)" expanded="false"> | 71 <section name="val_split" title="Validation holdout (recommend using the same method for both validation and test)" expanded="false"> |
72 <expand macro="train_test_split_params"/> | 72 <expand macro="train_test_split_params" /> |
73 </section> | 73 </section> |
74 <section name="metrics" title="Metrics for evaluation" expanded="false"> | 74 <section name="metrics" title="Metrics for evaluation" expanded="false"> |
75 <expand macro="scoring_selection"/> | 75 <expand macro="scoring_selection" /> |
76 </section> | 76 </section> |
77 </when> | 77 </when> |
78 </conditional> | 78 </conditional> |
79 <expand macro="sl_mixed_input_plus_sequence"/> | 79 <expand macro="sl_mixed_input_plus_sequence" /> |
80 <param name="save" type="select" label="Save the fitted model" help="For security reason, deep learning models will be saved into two datasets, model skeleton and weights."> | 80 <param name="save" type="select" label="Save the fitted model" help="For security reason, deep learning models will be saved into two datasets, model skeleton and weights."> |
81 <option value="nope" selected="true">Nope, save is unnecessary</option> | 81 <option value="nope" selected="true">Nope, save is unnecessary</option> |
82 <option value="save_estimator">Fitted whole estimator (excluding deep learning)</option> | 82 <option value="save_estimator">Fitted whole estimator (excluding deep learning)</option> |
83 <option value="save_weights">Model skeleton and weights, for deep learning exclusively</option> | 83 <option value="save_weights">Model skeleton and weights, for deep learning exclusively</option> |
84 </param> | 84 </param> |
85 </inputs> | 85 </inputs> |
86 <outputs> | 86 <outputs> |
87 <data format="tabular" name="outfile_result"/> | 87 <data format="tabular" name="outfile_result" /> |
88 <data format="zip" name="outfile_object" label="Fitted estimator or estimator skeleton on ${on_string}"> | 88 <data format="zip" name="outfile_object" label="Fitted estimator or estimator skeleton on ${on_string}"> |
89 <filter>save != 'nope'</filter> | 89 <filter>save != 'nope'</filter> |
90 </data> | 90 </data> |
91 <data format="h5" name="outfile_weights" label="Weights trained on ${on_string}"> | 91 <data format="h5" name="outfile_weights" label="Weights trained on ${on_string}"> |
92 <filter>save == 'save_weights'</filter> | 92 <filter>save == 'save_weights'</filter> |
93 </data> | 93 </data> |
94 </outputs> | 94 </outputs> |
95 <tests> | 95 <tests> |
96 <test> | 96 <test> |
97 <conditional name="experiment_schemes"> | 97 <conditional name="experiment_schemes"> |
98 <param name="selected_exp_scheme" value="train_val_test"/> | 98 <param name="selected_exp_scheme" value="train_val_test" /> |
99 <param name="infile_estimator" value="keras_model04" ftype="zip"/> | 99 <param name="infile_estimator" value="keras_model04" ftype="zip" /> |
100 <section name="hyperparams_swapping"> | 100 <section name="hyperparams_swapping"> |
101 <param name="infile_params" value="keras_params04.tabular" ftype="tabular"/> | 101 <param name="infile_params" value="keras_params04.tabular" ftype="tabular" /> |
102 <repeat name="param_set"> | 102 <repeat name="param_set"> |
103 <param name="sp_value" value="999"/> | 103 <param name="sp_value" value="999" /> |
104 <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed"/> | 104 <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed" /> |
105 </repeat> | 105 </repeat> |
106 <repeat name="param_set"> | 106 <repeat name="param_set"> |
107 <param name="sp_value" value="999"/> | 107 <param name="sp_value" value="999" /> |
108 <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed"/> | 108 <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed" /> |
109 </repeat> | 109 </repeat> |
110 <repeat name="param_set"> | 110 <repeat name="param_set"> |
111 <param name="sp_value" value="0.1"/> | 111 <param name="sp_value" value="0.1" /> |
112 <param name="sp_name" value="lr"/> | 112 <param name="sp_name" value="lr" /> |
113 </repeat> | 113 </repeat> |
114 <repeat name="param_set"> | 114 <repeat name="param_set"> |
115 <param name="sp_value" value="'adamax'"/> | 115 <param name="sp_value" value="'adamax'" /> |
116 <param name="sp_name" value="optimizer"/> | 116 <param name="sp_name" value="optimizer" /> |
117 </repeat> | 117 </repeat> |
118 </section> | 118 </section> |
119 <section name="test_split"> | 119 <section name="test_split"> |
120 <conditional name="split_algos"> | 120 <conditional name="split_algos"> |
121 <param name="shuffle" value="simple"/> | 121 <param name="shuffle" value="simple" /> |
122 <param name="test_size" value="0.2"/> | 122 <param name="test_size" value="0.2" /> |
123 <param name="random_state" value="123"/> | 123 <param name="random_state" value="123" /> |
124 </conditional> | 124 </conditional> |
125 </section> | 125 </section> |
126 <section name="val_split"> | 126 <section name="val_split"> |
127 <conditional name="split_algos"> | 127 <conditional name="split_algos"> |
128 <param name="shuffle" value="simple"/> | 128 <param name="shuffle" value="simple" /> |
129 <param name="test_size" value="0.2"/> | 129 <param name="test_size" value="0.2" /> |
130 <param name="random_state" value="456"/> | 130 <param name="random_state" value="456" /> |
131 </conditional> | 131 </conditional> |
132 </section> | 132 </section> |
133 <section name="metrics"> | 133 <section name="metrics"> |
134 <conditional name="scoring"> | 134 <conditional name="scoring"> |
135 <param name="primary_scoring" value="r2"/> | 135 <param name="primary_scoring" value="r2" /> |
136 <param name="secondary_scoring" value="neg_mean_absolute_error"/> | 136 <param name="secondary_scoring" value="neg_mean_absolute_error" /> |
137 </conditional> | 137 </conditional> |
138 </section> | 138 </section> |
139 </conditional> | 139 </conditional> |
140 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 140 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
141 <param name="header1" value="true" /> | 141 <param name="header1" value="true" /> |
142 <param name="selected_column_selector_option" value="all_columns"/> | 142 <param name="selected_column_selector_option" value="all_columns" /> |
143 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 143 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
144 <param name="header2" value="true" /> | 144 <param name="header2" value="true" /> |
145 <param name="selected_column_selector_option2" value="all_columns"/> | 145 <param name="selected_column_selector_option2" value="all_columns" /> |
146 <param name="save" value="save_weights"/> | 146 <param name="save" value="save_weights" /> |
147 <output name="outfile_result"> | 147 <output name="outfile_result"> |
148 <assert_contents> | 148 <assert_contents> |
149 <has_n_columns n="2"/> | 149 <has_n_columns n="2" /> |
150 <has_text text="0.6626"/> | 150 <has_text text="0.6384" /> |
151 <has_text text="5.598"/> | 151 <has_text text="-6.072" /> |
152 </assert_contents> | 152 </assert_contents> |
153 </output> | 153 </output> |
154 <output name="outfile_object" file="train_test_eval_model01" compare="sim_size" delta="5"/> | 154 <output name="outfile_object" file="train_test_eval_model01" compare="sim_size" delta="5" /> |
155 <output name="outfile_weights" file="train_test_eval_weights01.h5" compare="sim_size" delta="5"/> | 155 <output name="outfile_weights" file="train_test_eval_weights01.h5" compare="sim_size" delta="5" /> |
156 </test> | 156 </test> |
157 <test> | 157 <test> |
158 <conditional name="experiment_schemes"> | 158 <conditional name="experiment_schemes"> |
159 <param name="selected_exp_scheme" value="train_val_test"/> | 159 <param name="selected_exp_scheme" value="train_val_test" /> |
160 <param name="infile_estimator" value="keras_model04" ftype="zip"/> | 160 <param name="infile_estimator" value="keras_model04" ftype="zip" /> |
161 <section name="hyperparams_swapping"> | 161 <section name="hyperparams_swapping"> |
162 <param name="infile_params" value="keras_params04.tabular" ftype="tabular"/> | 162 <param name="infile_params" value="keras_params04.tabular" ftype="tabular" /> |
163 <repeat name="param_set"> | 163 <repeat name="param_set"> |
164 <param name="sp_value" value="999"/> | 164 <param name="sp_value" value="999" /> |
165 <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed"/> | 165 <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed" /> |
166 </repeat> | 166 </repeat> |
167 <repeat name="param_set"> | 167 <repeat name="param_set"> |
168 <param name="sp_value" value="999"/> | 168 <param name="sp_value" value="999" /> |
169 <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed"/> | 169 <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed" /> |
170 </repeat> | 170 </repeat> |
171 <repeat name="param_set"> | 171 <repeat name="param_set"> |
172 <param name="sp_value" value="0.1"/> | 172 <param name="sp_value" value="0.1" /> |
173 <param name="sp_name" value="lr"/> | 173 <param name="sp_name" value="lr" /> |
174 </repeat> | 174 </repeat> |
175 <repeat name="param_set"> | 175 <repeat name="param_set"> |
176 <param name="sp_value" value="'adamax'"/> | 176 <param name="sp_value" value="'adamax'" /> |
177 <param name="sp_name" value="optimizer"/> | 177 <param name="sp_name" value="optimizer" /> |
178 </repeat> | 178 </repeat> |
179 </section> | 179 </section> |
180 <section name="test_split"> | 180 <section name="test_split"> |
181 <conditional name="split_algos"> | 181 <conditional name="split_algos"> |
182 <param name="shuffle" value="group"/> | 182 <param name="shuffle" value="group" /> |
183 <param name="group_names" value="test"/> | 183 <param name="group_names" value="test" /> |
184 <section name="groups_selector"> | 184 <section name="groups_selector"> |
185 <param name="infile_g" value="regression_groups.tabular" ftype="tabular"/> | 185 <param name="infile_g" value="regression_groups.tabular" ftype="tabular" /> |
186 <param name="header_g" value="true"/> | 186 <param name="header_g" value="true" /> |
187 <conditional name="column_selector_options_g"> | 187 <conditional name="column_selector_options_g"> |
188 <param name="selected_column_selector_option_g" value="by_index_number"/> | 188 <param name="selected_column_selector_option_g" value="by_index_number" /> |
189 <param name="col_g" value="1"/> | 189 <param name="col_g" value="1" /> |
190 </conditional> | 190 </conditional> |
191 </section> | 191 </section> |
192 </conditional> | 192 </conditional> |
193 </section> | 193 </section> |
194 <section name="val_split"> | 194 <section name="val_split"> |
195 <conditional name="split_algos"> | 195 <conditional name="split_algos"> |
196 <param name="shuffle" value="group"/> | 196 <param name="shuffle" value="group" /> |
197 <param name="group_names" value="validation"/> | 197 <param name="group_names" value="validation" /> |
198 </conditional> | 198 </conditional> |
199 </section> | 199 </section> |
200 <section name="metrics"> | 200 <section name="metrics"> |
201 <conditional name="scoring"> | 201 <conditional name="scoring"> |
202 <param name="primary_scoring" value="r2"/> | 202 <param name="primary_scoring" value="r2" /> |
203 <param name="secondary_scoring" value="neg_mean_absolute_error"/> | 203 <param name="secondary_scoring" value="neg_mean_absolute_error" /> |
204 </conditional> | 204 </conditional> |
205 </section> | 205 </section> |
206 </conditional> | 206 </conditional> |
207 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 207 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
208 <param name="header1" value="true" /> | 208 <param name="header1" value="true" /> |
209 <param name="selected_column_selector_option" value="all_columns"/> | 209 <param name="selected_column_selector_option" value="all_columns" /> |
210 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 210 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
211 <param name="header2" value="true" /> | 211 <param name="header2" value="true" /> |
212 <param name="selected_column_selector_option2" value="all_columns"/> | 212 <param name="selected_column_selector_option2" value="all_columns" /> |
213 <param name="save" value="save_weights"/> | 213 <param name="save" value="save_weights" /> |
214 <output name="outfile_result" > | 214 <output name="outfile_result"> |
215 <assert_contents> | 215 <assert_contents> |
216 <has_n_columns n="2"/> | 216 <has_n_columns n="2" /> |
217 <has_text text="0.667"/> | 217 <has_text text="0.627" /> |
218 <has_text text="-5.586"/> | 218 <has_text text="-6.012" /> |
219 </assert_contents> | 219 </assert_contents> |
220 </output> | 220 </output> |
221 <output name="outfile_weights" file="train_test_eval_weights02.h5" compare="sim_size" delta="5"/> | 221 <output name="outfile_weights" file="train_test_eval_weights02.h5" compare="sim_size" delta="5" /> |
222 </test> | 222 </test> |
223 <test> | 223 <test> |
224 <conditional name="experiment_schemes"> | 224 <conditional name="experiment_schemes"> |
225 <param name="selected_exp_scheme" value="train_test"/> | 225 <param name="selected_exp_scheme" value="train_test" /> |
226 <param name="infile_estimator" value="pipeline10" ftype="zip"/> | 226 <param name="infile_estimator" value="pipeline10" ftype="zip" /> |
227 <section name="hyperparams_swapping"> | 227 <section name="hyperparams_swapping"> |
228 <param name="infile_params" value="get_params10.tabular" ftype="tabular"/> | 228 <param name="infile_params" value="get_params10.tabular" ftype="tabular" /> |
229 <repeat name="param_set"> | 229 <repeat name="param_set"> |
230 <param name="sp_value" value="10"/> | 230 <param name="sp_value" value="10" /> |
231 <param name="sp_name" value="adaboostregressor__random_state"/> | 231 <param name="sp_name" value="adaboostregressor__random_state" /> |
232 </repeat> | 232 </repeat> |
233 <repeat name="param_set"> | 233 <repeat name="param_set"> |
234 <param name="sp_value" value=": sklearn_tree.ExtraTreeRegressor(random_state=0)"/> | 234 <param name="sp_value" value=": sklearn_tree.ExtraTreeRegressor(random_state=0)" /> |
235 <param name="sp_name" value="adaboostregressor__base_estimator"/> | 235 <param name="sp_name" value="adaboostregressor__base_estimator" /> |
236 </repeat> | 236 </repeat> |
237 </section> | 237 </section> |
238 <section name="test_split"> | 238 <section name="test_split"> |
239 <conditional name="split_algos"> | 239 <conditional name="split_algos"> |
240 <param name="shuffle" value="simple"/> | 240 <param name="shuffle" value="simple" /> |
241 <param name="test_size" value="0.2"/> | 241 <param name="test_size" value="0.2" /> |
242 <param name="random_state" value="123"/> | 242 <param name="random_state" value="123" /> |
243 </conditional> | 243 </conditional> |
244 </section> | 244 </section> |
245 <section name="val_split"> | 245 <section name="val_split"> |
246 <conditional name="split_algos"> | 246 <conditional name="split_algos"> |
247 <param name="shuffle" value="simple"/> | 247 <param name="shuffle" value="simple" /> |
248 <param name="test_size" value="0.2"/> | 248 <param name="test_size" value="0.2" /> |
249 <param name="random_state" value="456"/> | 249 <param name="random_state" value="456" /> |
250 </conditional> | 250 </conditional> |
251 </section> | 251 </section> |
252 <section name="metrics"> | 252 <section name="metrics"> |
253 <conditional name="scoring"> | 253 <conditional name="scoring"> |
254 <param name="primary_scoring" value="r2"/> | 254 <param name="primary_scoring" value="r2" /> |
255 <param name="secondary_scoring" value="neg_mean_absolute_error"/> | 255 <param name="secondary_scoring" value="neg_mean_absolute_error" /> |
256 </conditional> | 256 </conditional> |
257 </section> | 257 </section> |
258 </conditional> | 258 </conditional> |
259 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 259 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
260 <param name="header1" value="true" /> | 260 <param name="header1" value="true" /> |
261 <param name="selected_column_selector_option" value="all_columns"/> | 261 <param name="selected_column_selector_option" value="all_columns" /> |
262 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 262 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
263 <param name="header2" value="true" /> | 263 <param name="header2" value="true" /> |
264 <param name="selected_column_selector_option2" value="all_columns"/> | 264 <param name="selected_column_selector_option2" value="all_columns" /> |
265 <param name="save" value="nope"/> | 265 <param name="save" value="nope" /> |
266 <output name="outfile_result" file="train_test_eval03.tabular"/> | 266 <output name="outfile_result" file="train_test_eval03.tabular" /> |
267 </test> | 267 </test> |
268 </tests> | 268 </tests> |
269 <help> | 269 <help> |
270 <![CDATA[ | 270 <![CDATA[ |
271 **What it does** | 271 **What it does** |
281 Performance scores. | 281 Performance scores. |
282 | 282 |
283 ]]> | 283 ]]> |
284 </help> | 284 </help> |
285 <expand macro="sklearn_citation"> | 285 <expand macro="sklearn_citation"> |
286 <expand macro="skrebate_citation"/> | 286 <expand macro="skrebate_citation" /> |
287 <expand macro="xgboost_citation"/> | 287 <expand macro="xgboost_citation" /> |
288 <expand macro="keras_citation"/> | 288 <expand macro="keras_citation" /> |
289 </expand> | 289 </expand> |
290 </tool> | 290 </tool> |