comparison keras_train_and_eval.xml @ 0:03f61bb3ca43 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5b2ac730ec6d3b762faa9034eddd19ad1b347476"
author bgruening
date Mon, 16 Dec 2019 05:36:53 -0500
parents
children ccd6269fad60
comparison
equal deleted inserted replaced
-1:000000000000 0:03f61bb3ca43
1 <tool id="keras_train_and_eval" name="Deep learning training and evaluation" version="@VERSION@">
2 <description>conduct deep training and evaluation either implicitly or explicitly</description>
3 <macros>
4 <import>main_macros.xml</import>
5 <import>keras_macros.xml</import>
6 </macros>
7 <expand macro="python_requirements"/>
8 <expand macro="macro_stdio"/>
9 <version_command>echo "@VERSION@"</version_command>
10 <command>
11 <![CDATA[
12 export HDF5_USE_FILE_LOCKING='FALSE';
13 #if $input_options.selected_input == 'refseq_and_interval'
14 bgzip -c '$input_options.target_file' > '${target_file.element_identifier}.gz' &&
15 tabix -p bed '${target_file.element_identifier}.gz' &&
16 cp '$input_options.ref_genome_file' '${ref_genome_file.element_identifier}' &&
17 #end if
18 python '$__tool_directory__/keras_train_and_eval.py'
19 --inputs '$inputs'
20 --estimator '$experiment_schemes.infile_estimator'
21 #if $input_options.selected_input == 'seq_fasta'
22 --fasta_path '$input_options.fasta_path'
23 #elif $input_options.selected_input == 'refseq_and_interval'
24 --ref_seq "`pwd`/${ref_genome_file.element_identifier}"
25 --interval '$input_options.interval_file'
26 --targets "`pwd`/${target_file.element_identifier}.gz"
27 #else
28 --infile1 '$input_options.infile1'
29 #end if
30 --infile2 '$input_options.infile2'
31 --outfile_result "`pwd`/tmp_outfile_result"
32 #if $save and 'save_estimator' in str($save)
33 --outfile_object '$outfile_object'
34 --outfile_weights '$outfile_weights'
35 #end if
36 #if $save and 'save_prediction' in str($save)
37 --outfile_y_true '$outfile_y_true'
38 --outfile_y_preds '$outfile_y_preds'
39 #end if
40 #if $experiment_schemes.test_split.split_algos.shuffle == 'group'
41 --groups '$experiment_schemes.test_split.split_algos.groups_selector.infile_g'
42 #end if
43 >'$outfile_result' && cp '$outfile_result' "`pwd`/../tool_stdout"
44 && cp tmp_outfile_result '$outfile_result';
45
46 ]]>
47 </command>
48 <configfiles>
49 <inputs name="inputs" />
50 </configfiles>
51 <inputs>
52 <conditional name="experiment_schemes">
53 <param name="selected_exp_scheme" type="select" label="Select a scheme">
54 <option value="train_val" selected="true">Train and Validate</option>
55 <option value="train_val_test">Train, Validate and and Evaluate</option>
56 </param>
57 <when value="train_val">
58 <expand macro="estimator_and_hyperparameter"/>
59 <section name="test_split" title="Validation holdout" expanded="false">
60 <expand macro="train_test_split_params">
61 <expand macro="cv_groups"/>
62 </expand>
63 </section>
64 <section name="metrics" title="Metrics for evaluation" expanded="false">
65 <expand macro="scoring_selection"/>
66 </section>
67 </when>
68 <when value="train_val_test">
69 <expand macro="estimator_and_hyperparameter"/>
70 <section name="test_split" title="Test holdout" expanded="false">
71 <expand macro="train_test_split_params">
72 <expand macro="cv_groups"/>
73 </expand>
74 </section>
75 <section name="val_split" title="Validation holdout (recommend using the same splitting method as for test holdout)" expanded="false">
76 <expand macro="train_test_split_params"/>
77 </section>
78 <section name="metrics" title="Metrics for evaluation" expanded="false">
79 <expand macro="scoring_selection"/>
80 </section>
81 </when>
82 </conditional>
83 <expand macro="sl_mixed_input_plus_sequence"/>
84 <param name="save" type="select" multiple='true' display="checkboxes" label="Save the fitted model" optional="true" help="Evaluation scores will be output by default.">
85 <option value="save_estimator" selected="true">Fitted estimator in skeleton and weights, separately</option>
86 <option value="save_prediction">True labels and prediction results from evaluation for downstream analysis</option>
87 </param>
88 </inputs>
89 <outputs>
90 <data format="tabular" name="outfile_result"/>
91 <data format="zip" name="outfile_object" label="Fitted estimator or estimator skeleton on ${on_string}">
92 <filter>str(save) and 'save_estimator' in str(save)</filter>
93 </data>
94 <data format="h5" name="outfile_weights" label="Weights trained on ${on_string}">
95 <filter>str(save) and 'save_estimator' in str(save)</filter>
96 </data>
97 <data format="tabular" name="outfile_y_true" label="True labels/target values on ${on_string}">
98 <filter>str(save) and 'save_prediction' in str(save)</filter>
99 </data>
100 <data format="tabular" name="outfile_y_preds" label="All predictions on ${on_string}">
101 <filter>str(save) and 'save_prediction' in str(save)</filter>
102 </data>
103 </outputs>
104 <tests>
105 <test>
106 <conditional name="experiment_schemes">
107 <param name="selected_exp_scheme" value="train_val_test"/>
108 <param name="infile_estimator" value="keras_model04" ftype="zip"/>
109 <section name="hyperparams_swapping">
110 <param name="infile_params" value="keras_params04.tabular" ftype="tabular"/>
111 <repeat name="param_set">
112 <param name="sp_value" value="999"/>
113 <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed"/>
114 </repeat>
115 <repeat name="param_set">
116 <param name="sp_value" value="999"/>
117 <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed"/>
118 </repeat>
119 <repeat name="param_set">
120 <param name="sp_value" value="0.1"/>
121 <param name="sp_name" value="lr"/>
122 </repeat>
123 <repeat name="param_set">
124 <param name="sp_value" value="'adamax'"/>
125 <param name="sp_name" value="optimizer"/>
126 </repeat>
127 </section>
128 <section name="test_split">
129 <conditional name="split_algos">
130 <param name="shuffle" value="simple"/>
131 <param name="test_size" value="0.2"/>
132 <param name="random_state" value="123"/>
133 </conditional>
134 </section>
135 <section name="val_split">
136 <conditional name="split_algos">
137 <param name="shuffle" value="simple"/>
138 <param name="test_size" value="0.2"/>
139 <param name="random_state" value="456"/>
140 </conditional>
141 </section>
142 <section name="metrics">
143 <conditional name="scoring">
144 <param name="primary_scoring" value="r2"/>
145 <param name="secondary_scoring" value="neg_mean_absolute_error"/>
146 </conditional>
147 </section>
148 </conditional>
149 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
150 <param name="header1" value="true" />
151 <param name="selected_column_selector_option" value="all_columns"/>
152 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
153 <param name="header2" value="true" />
154 <param name="selected_column_selector_option2" value="all_columns"/>
155 <param name="save" value="save_estimator"/>
156 <output name="outfile_result">
157 <assert_contents>
158 <has_n_columns n="2"/>
159 <has_text text="0.6626"/>
160 <has_text text="5.5986"/>
161 </assert_contents>
162 </output>
163 <output name="outfile_object" file="train_test_eval_model01" compare="sim_size" delta="5"/>
164 <output name="outfile_weights" file="train_test_eval_weights01.h5" compare="sim_size" delta="5"/>
165 </test>
166 <test>
167 <conditional name="experiment_schemes">
168 <param name="selected_exp_scheme" value="train_val_test"/>
169 <param name="infile_estimator" value="keras_model04" ftype="zip"/>
170 <section name="hyperparams_swapping">
171 <param name="infile_params" value="keras_params04.tabular" ftype="tabular"/>
172 <repeat name="param_set">
173 <param name="sp_value" value="999"/>
174 <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed"/>
175 </repeat>
176 <repeat name="param_set">
177 <param name="sp_value" value="999"/>
178 <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed"/>
179 </repeat>
180 <repeat name="param_set">
181 <param name="sp_value" value="0.1"/>
182 <param name="sp_name" value="lr"/>
183 </repeat>
184 <repeat name="param_set">
185 <param name="sp_value" value="'adamax'"/>
186 <param name="sp_name" value="optimizer"/>
187 </repeat>
188 </section>
189 <section name="test_split">
190 <conditional name="split_algos">
191 <param name="shuffle" value="group"/>
192 <param name="group_names" value="test"/>
193 <section name="groups_selector">
194 <param name="infile_g" value="regression_groups.tabular" ftype="tabular"/>
195 <param name="header_g" value="true"/>
196 <conditional name="column_selector_options_g">
197 <param name="selected_column_selector_option_g" value="by_index_number"/>
198 <param name="col_g" value="1"/>
199 </conditional>
200 </section>
201 </conditional>
202 </section>
203 <section name="val_split">
204 <conditional name="split_algos">
205 <param name="shuffle" value="group"/>
206 <param name="group_names" value="validation"/>
207 </conditional>
208 </section>
209 <section name="metrics">
210 <conditional name="scoring">
211 <param name="primary_scoring" value="r2"/>
212 <param name="secondary_scoring" value="neg_mean_absolute_error"/>
213 </conditional>
214 </section>
215 </conditional>
216 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
217 <param name="header1" value="true" />
218 <param name="selected_column_selector_option" value="all_columns"/>
219 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
220 <param name="header2" value="true" />
221 <param name="selected_column_selector_option2" value="all_columns"/>
222 <param name="save" value="save_estimator,save_prediction"/>
223 <output name="outfile_result" >
224 <assert_contents>
225 <has_n_columns n="2"/>
226 <has_text text="0.667"/>
227 <has_text text="5.586"/>
228 </assert_contents>
229 </output>
230 <output name="outfile_weights" file="train_test_eval_weights02.h5" compare="sim_size" delta="5"/>
231 <output name="outfile_y_true" file="keras_train_eval_y_true02.tabular" ftype="tabular"/>
232 </test>
233 <test>
234 <conditional name="experiment_schemes">
235 <param name="selected_exp_scheme" value="train_val"/>
236 <param name="infile_estimator" value="pipeline10" ftype="zip"/>
237 <section name="hyperparams_swapping">
238 <param name="infile_params" value="get_params10.tabular" ftype="tabular"/>
239 <repeat name="param_set">
240 <param name="sp_value" value="10"/>
241 <param name="sp_name" value="adaboostregressor__random_state"/>
242 </repeat>
243 <repeat name="param_set">
244 <param name="sp_value" value=": sklearn_tree.ExtraTreeRegressor(random_state=0)"/>
245 <param name="sp_name" value="adaboostregressor__base_estimator"/>
246 </repeat>
247 </section>
248 <section name="test_split">
249 <conditional name="split_algos">
250 <param name="shuffle" value="simple"/>
251 <param name="test_size" value="0.2"/>
252 <param name="random_state" value="123"/>
253 </conditional>
254 </section>
255 <section name="val_split">
256 <conditional name="split_algos">
257 <param name="shuffle" value="simple"/>
258 <param name="test_size" value="0.2"/>
259 <param name="random_state" value="456"/>
260 </conditional>
261 </section>
262 <section name="metrics">
263 <conditional name="scoring">
264 <param name="primary_scoring" value="r2"/>
265 <param name="secondary_scoring" value="neg_mean_absolute_error"/>
266 </conditional>
267 </section>
268 </conditional>
269 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
270 <param name="header1" value="true" />
271 <param name="selected_column_selector_option" value="all_columns"/>
272 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
273 <param name="header2" value="true" />
274 <param name="selected_column_selector_option2" value="all_columns"/>
275 <param name="save" value=""/>
276 <output name="outfile_result" file="train_test_eval03.tabular"/>
277 </test>
278 </tests>
279 <help>
280 <![CDATA[
281 **What it does**
282
283 Given a pre-built keras deep learning model and labeled training dataset, this tool works in two modes.
284
285 - Train and Validate: training dataset is split into train and validation portions. The model fits on the train portion, in the meantime performances are validated on the validation portion multiple times along with the training progressing. Finally, a fitted model (skeleton + weights) and its validation performance scores are outputted.
286
287
288 - Train, Validate and and Evaluate: training dataset is split into three portions, train, val and test. The same `Train and Validate` happens on the train and val portions. The test portion is hold out exclusively for testing (evaluation). As a result, a fitted model (skeleton + weights) and test performance scores are outputted.
289
290 In both modes, besides the performance scores, the true labels and predicted values are able to be ouputted, which could be used in generating plots in other tools, machine learning visualization extensions, for example.
291
292 Note that since all training and model parameters are accessible and changeable in the `Hyperparameter Swapping` section, the training and evaluation processes are transparent and fully controllable.
293
294 **Input**
295
296 - tabular
297 - sparse
298 - `sequnences in a fasta file` to work with DNA, RNA and Proteins with corresponding fasta data generator
299 - `reference genome and intervals` exclusively work with `GenomicIntervalBatchGenerator`.
300
301 **Output**
302
303 - performance scores from evaluation
304 - fitted estimator skeleton and weights
305 - true labels or values and predicted values from the evaluation
306
307 ]]>
308 </help>
309 <expand macro="sklearn_citation">
310 <expand macro="keras_citation"/>
311 </expand>
312 </tool>