comparison train_test_eval.xml @ 0:68aaa903052a draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author bgruening
date Fri, 09 Aug 2019 07:09:06 -0400
parents
children e23cfe4be9d4
comparison
equal deleted inserted replaced
-1:000000000000 0:68aaa903052a
1 <tool id="sklearn_train_test_eval" name="Train, Test and Evaluation" version="@VERSION@">
2 <description>fit a model using part of dataset and evaluate using the rest</description>
3 <macros>
4 <import>main_macros.xml</import>
5 <import>keras_macros.xml</import>
6 </macros>
7 <expand macro="python_requirements"/>
8 <expand macro="macro_stdio"/>
9 <version_command>echo "@VERSION@"</version_command>
10 <command detect_errors="aggressive">
11 <![CDATA[
12 export HDF5_USE_FILE_LOCKING='FALSE';
13 #if $input_options.selected_input == 'refseq_and_interval'
14 bgzip -c '$input_options.target_file' > '${target_file.element_identifier}.gz' &&
15 tabix -p bed '${target_file.element_identifier}.gz' &&
16 #end if
17 python '$__tool_directory__/train_test_eval.py'
18 --inputs '$inputs'
19 --estimator '$experiment_schemes.infile_estimator'
20 #if $input_options.selected_input == 'seq_fasta'
21 --fasta_path '$input_options.fasta_path'
22 #elif $input_options.selected_input == 'refseq_and_interval'
23 --ref_seq '$input_options.ref_genome_file'
24 --interval '$input_options.interval_file'
25 --targets "`pwd`/${target_file.element_identifier}.gz"
26 #else
27 --infile1 '$input_options.infile1'
28 #end if
29 --infile2 '$input_options.infile2'
30 --outfile_result "`pwd`/tmp_outfile_result"
31 #if $save != 'nope'
32 --outfile_object '$outfile_object'
33 #end if
34 #if $save == 'save_weights'
35 --outfile_weights '$outfile_weights'
36 #end if
37 #if $experiment_schemes.test_split.split_algos.shuffle == 'group'
38 --groups '$experiment_schemes.test_split.split_algos.groups_selector.infile_g'
39 #end if
40 >'$outfile_result' && cp tmp_outfile_result '$outfile_result';
41
42 ]]>
43 </command>
44 <configfiles>
45 <inputs name="inputs" />
46 </configfiles>
47 <inputs>
48 <conditional name="experiment_schemes">
49 <param name="selected_exp_scheme" type="select" label="Select a scheme">
50 <option value="train_test" selected="true">Train and Test</option>
51 <option value="train_val_test">Train, Validate and Test</option>
52 </param>
53 <when value="train_test">
54 <expand macro="estimator_and_hyperparameter"/>
55 <section name="test_split" title="Test holdout" expanded="false">
56 <expand macro="train_test_split_params">
57 <expand macro="cv_groups"/>
58 </expand>
59 </section>
60 <section name="metrics" title="Metrics for evaluation" expanded="false">
61 <expand macro="scoring_selection"/>
62 </section>
63 </when>
64 <when value="train_val_test">
65 <expand macro="estimator_and_hyperparameter"/>
66 <section name="test_split" title="Test holdout" expanded="false">
67 <expand macro="train_test_split_params">
68 <expand macro="cv_groups"/>
69 </expand>
70 </section>
71 <section name="val_split" title="Validation holdout (recommend using the same method for both validation and test)" expanded="false">
72 <expand macro="train_test_split_params"/>
73 </section>
74 <section name="metrics" title="Metrics for evaluation" expanded="false">
75 <expand macro="scoring_selection"/>
76 </section>
77 </when>
78 </conditional>
79 <expand macro="sl_mixed_input"/>
80 <param name="save" type="select" label="Save the fitted model" help="For security reason, deep learning models will be saved into two datasets, model skeleton and weights.">
81 <option value="nope" selected="true">Nope, save is unnecessary</option>
82 <option value="save_estimator">Fitted whole estimator (excluding deep learning)</option>
83 <option value="save_weights">Model skeleton and weights, for deep learning exclusively</option>
84 </param>
85 </inputs>
86 <outputs>
87 <data format="tabular" name="outfile_result"/>
88 <data format="zip" name="outfile_object" label="Fitted estimator or estimator skeleton on ${on_string}">
89 <filter>save != 'nope'</filter>
90 </data>
91 <data format="h5" name="outfile_weights" label="Weights trained on ${on_string}">
92 <filter>save == 'save_weights'</filter>
93 </data>
94 </outputs>
95 <tests>
96 <test>
97 <conditional name="experiment_schemes">
98 <param name="selected_exp_scheme" value="train_val_test"/>
99 <param name="infile_estimator" value="keras_model04" ftype="zip"/>
100 <section name="hyperparams_swapping">
101 <param name="infile_params" value="keras_params04.tabular" ftype="tabular"/>
102 <repeat name="param_set">
103 <param name="sp_value" value="999"/>
104 <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed"/>
105 </repeat>
106 <repeat name="param_set">
107 <param name="sp_value" value="999"/>
108 <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed"/>
109 </repeat>
110 <repeat name="param_set">
111 <param name="sp_value" value="0.1"/>
112 <param name="sp_name" value="lr"/>
113 </repeat>
114 <repeat name="param_set">
115 <param name="sp_value" value="'adamax'"/>
116 <param name="sp_name" value="optimizer"/>
117 </repeat>
118 </section>
119 <section name="test_split">
120 <conditional name="split_algos">
121 <param name="shuffle" value="simple"/>
122 <param name="test_size" value="0.2"/>
123 <param name="random_state" value="123"/>
124 </conditional>
125 </section>
126 <section name="val_split">
127 <conditional name="split_algos">
128 <param name="shuffle" value="simple"/>
129 <param name="test_size" value="0.2"/>
130 <param name="random_state" value="456"/>
131 </conditional>
132 </section>
133 <section name="metrics">
134 <conditional name="scoring">
135 <param name="primary_scoring" value="r2"/>
136 <param name="secondary_scoring" value="neg_mean_absolute_error"/>
137 </conditional>
138 </section>
139 </conditional>
140 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
141 <param name="header1" value="true" />
142 <param name="selected_column_selector_option" value="all_columns"/>
143 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
144 <param name="header2" value="true" />
145 <param name="selected_column_selector_option2" value="all_columns"/>
146 <param name="save" value="save_weights"/>
147 <output name="outfile_result">
148 <assert_contents>
149 <has_n_columns n="2"/>
150 <has_text text="0.68419"/>
151 <has_text text="5.2990"/>
152 </assert_contents>
153 </output>
154 <output name="outfile_object" file="train_test_eval_model01" compare="sim_size" delta="5"/>
155 <output name="outfile_weights" file="train_test_eval_weights01.h5" compare="sim_size" delta="5"/>
156 </test>
157 <test>
158 <conditional name="experiment_schemes">
159 <param name="selected_exp_scheme" value="train_val_test"/>
160 <param name="infile_estimator" value="keras_model04" ftype="zip"/>
161 <section name="hyperparams_swapping">
162 <param name="infile_params" value="keras_params04.tabular" ftype="tabular"/>
163 <repeat name="param_set">
164 <param name="sp_value" value="999"/>
165 <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed"/>
166 </repeat>
167 <repeat name="param_set">
168 <param name="sp_value" value="999"/>
169 <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed"/>
170 </repeat>
171 <repeat name="param_set">
172 <param name="sp_value" value="0.1"/>
173 <param name="sp_name" value="lr"/>
174 </repeat>
175 <repeat name="param_set">
176 <param name="sp_value" value="'adamax'"/>
177 <param name="sp_name" value="optimizer"/>
178 </repeat>
179 </section>
180 <section name="test_split">
181 <conditional name="split_algos">
182 <param name="shuffle" value="group"/>
183 <param name="group_names" value="test"/>
184 <section name="groups_selector">
185 <param name="infile_g" value="regression_groups.tabular" ftype="tabular"/>
186 <param name="header_g" value="true"/>
187 <conditional name="column_selector_options_g">
188 <param name="selected_column_selector_option_g" value="by_index_number"/>
189 <param name="col_g" value="1"/>
190 </conditional>
191 </section>
192 </conditional>
193 </section>
194 <section name="val_split">
195 <conditional name="split_algos">
196 <param name="shuffle" value="group"/>
197 <param name="group_names" value="validation"/>
198 </conditional>
199 </section>
200 <section name="metrics">
201 <conditional name="scoring">
202 <param name="primary_scoring" value="r2"/>
203 <param name="secondary_scoring" value="neg_mean_absolute_error"/>
204 </conditional>
205 </section>
206 </conditional>
207 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
208 <param name="header1" value="true" />
209 <param name="selected_column_selector_option" value="all_columns"/>
210 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
211 <param name="header2" value="true" />
212 <param name="selected_column_selector_option2" value="all_columns"/>
213 <param name="save" value="save_weights"/>
214 <output name="outfile_result" >
215 <assert_contents>
216 <has_n_columns n="2"/>
217 <has_text text="0.69617"/>
218 <has_text text="5.2261"/>
219 </assert_contents>
220 </output>
221 <output name="outfile_weights" file="train_test_eval_weights02.h5" compare="sim_size" delta="5"/>
222 </test>
223 <test>
224 <conditional name="experiment_schemes">
225 <param name="selected_exp_scheme" value="train_test"/>
226 <param name="infile_estimator" value="pipeline10" ftype="zip"/>
227 <section name="hyperparams_swapping">
228 <param name="infile_params" value="get_params10.tabular" ftype="tabular"/>
229 <repeat name="param_set">
230 <param name="sp_value" value="10"/>
231 <param name="sp_name" value="adaboostregressor__random_state"/>
232 </repeat>
233 <repeat name="param_set">
234 <param name="sp_value" value=": sklearn_tree.ExtraTreeRegressor(random_state=0)"/>
235 <param name="sp_name" value="adaboostregressor__base_estimator"/>
236 </repeat>
237 </section>
238 <section name="test_split">
239 <conditional name="split_algos">
240 <param name="shuffle" value="simple"/>
241 <param name="test_size" value="0.2"/>
242 <param name="random_state" value="123"/>
243 </conditional>
244 </section>
245 <section name="val_split">
246 <conditional name="split_algos">
247 <param name="shuffle" value="simple"/>
248 <param name="test_size" value="0.2"/>
249 <param name="random_state" value="456"/>
250 </conditional>
251 </section>
252 <section name="metrics">
253 <conditional name="scoring">
254 <param name="primary_scoring" value="r2"/>
255 <param name="secondary_scoring" value="neg_mean_absolute_error"/>
256 </conditional>
257 </section>
258 </conditional>
259 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
260 <param name="header1" value="true" />
261 <param name="selected_column_selector_option" value="all_columns"/>
262 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
263 <param name="header2" value="true" />
264 <param name="selected_column_selector_option2" value="all_columns"/>
265 <param name="save" value="nope"/>
266 <output name="outfile_result" file="train_test_eval03.tabular"/>
267 </test>
268 </tests>
269 <help>
270 <![CDATA[
271 **What it does**
272
273 Given an estimator and dataset, this tool fits the estimator with part of the datasets and evalue the performance of the fitted estimator on the rest of the datasets. It consists of two modes: train-test and train-val-test.
274
275 - train-test: data sets will be split into train and test portions. Estimator is training on the train portion, and performance is evaluated on the test portion.
276
277 - train-val-test: data sets are split into three portions, train, val and test. Validations happen along with the training process, which is often useful in **deep learnings**.
278
279 **Output**
280
281 Performance scores.
282
283 ]]>
284 </help>
285 <expand macro="sklearn_citation">
286 <expand macro="skrebate_citation"/>
287 <expand macro="xgboost_citation"/>
288 <expand macro="keras_citation"/>
289 </expand>
290 </tool>