comparison keras_train_and_eval.xml @ 10:b3093f953091 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author bgruening
date Wed, 09 Aug 2023 13:30:51 +0000
parents 3866911c93ae
children 818f9b69d8a0
comparison
equal deleted inserted replaced
9:e1317b5502fa 10:b3093f953091
1 <tool id="keras_train_and_eval" name="Deep learning training and evaluation" version="@VERSION@" profile="20.05"> 1 <tool id="keras_train_and_eval" name="Deep learning training and evaluation" version="@VERSION@" profile="@PROFILE@">
2 <description>conduct deep training and evaluation either implicitly or explicitly</description> 2 <description>conduct deep training and evaluation either implicitly or explicitly</description>
3 <macros> 3 <macros>
4 <import>main_macros.xml</import> 4 <import>main_macros.xml</import>
5 <import>keras_macros.xml</import> 5 <import>keras_macros.xml</import>
6 </macros> 6 </macros>
17 #end if 17 #end if
18 python '$__tool_directory__/keras_train_and_eval.py' 18 python '$__tool_directory__/keras_train_and_eval.py'
19 --inputs '$inputs' 19 --inputs '$inputs'
20 --estimator '$experiment_schemes.infile_estimator' 20 --estimator '$experiment_schemes.infile_estimator'
21 #if $input_options.selected_input == 'seq_fasta' 21 #if $input_options.selected_input == 'seq_fasta'
22 --fasta_path '$input_options.fasta_path' 22 --fasta_path '$input_options.fasta_path'
23 #elif $input_options.selected_input == 'refseq_and_interval' 23 #elif $input_options.selected_input == 'refseq_and_interval'
24 --ref_seq "`pwd`/${ref_genome_file.element_identifier}" 24 --ref_seq "`pwd`/${ref_genome_file.element_identifier}"
25 --interval '$input_options.interval_file' 25 --interval '$input_options.interval_file'
26 --targets "`pwd`/${target_file.element_identifier}.gz" 26 --targets "`pwd`/${target_file.element_identifier}.gz"
27 #else 27 #else
28 --infile1 '$input_options.infile1' 28 --infile1 '$input_options.infile1'
29 #end if 29 #end if
30 --infile2 '$input_options.infile2' 30 --infile2 '$input_options.infile2'
31 --outfile_result '$outfile_result' 31 --outfile_result '$outfile_result'
32 #if $save and 'save_estimator' in str($save) 32 #if $save and 'save_estimator' in str($save)
33 --outfile_object '$outfile_object' 33 --outfile_object '$outfile_object'
34 --outfile_weights '$outfile_weights'
35 #end if 34 #end if
36 #if $save and 'save_prediction' in str($save) 35 #if $save and 'save_prediction' in str($save)
37 --outfile_y_true '$outfile_y_true' 36 --outfile_y_true '$outfile_y_true'
38 --outfile_y_preds '$outfile_y_preds' 37 --outfile_y_preds '$outfile_y_preds'
39 #end if 38 #end if
40 #if $experiment_schemes.test_split.split_algos.shuffle == 'group' 39 #if $experiment_schemes.test_split.split_algos.shuffle == 'group'
41 --groups '$experiment_schemes.test_split.split_algos.groups_selector.infile_g' 40 --groups '$experiment_schemes.test_split.split_algos.groups_selector.infile_g'
42 #end if 41 #end if
42
43 ]]> 43 ]]>
44 </command> 44 </command>
45 <configfiles> 45 <configfiles>
46 <inputs name="inputs" /> 46 <inputs name="inputs" />
47 </configfiles> 47 </configfiles>
57 <expand macro="train_test_split_params"> 57 <expand macro="train_test_split_params">
58 <expand macro="cv_groups" /> 58 <expand macro="cv_groups" />
59 </expand> 59 </expand>
60 </section> 60 </section>
61 <section name="metrics" title="Metrics for evaluation" expanded="false"> 61 <section name="metrics" title="Metrics for evaluation" expanded="false">
62 <expand macro="scoring_selection" /> 62 <expand macro="scoring_selection" help="" />
63 </section> 63 </section>
64 </when> 64 </when>
65 <when value="train_val_test"> 65 <when value="train_val_test">
66 <expand macro="estimator_and_hyperparameter" /> 66 <expand macro="estimator_and_hyperparameter" />
67 <section name="test_split" title="Test holdout" expanded="false"> 67 <section name="test_split" title="Test holdout" expanded="false">
70 </expand> 70 </expand>
71 </section> 71 </section>
72 <section name="val_split" title="Validation holdout (recommend using the same splitting method as for test holdout)" expanded="false"> 72 <section name="val_split" title="Validation holdout (recommend using the same splitting method as for test holdout)" expanded="false">
73 <expand macro="train_test_split_params" /> 73 <expand macro="train_test_split_params" />
74 </section> 74 </section>
75 <section name="metrics" title="Metrics for evaluation" expanded="false"> 75 <section name="metrics" title="Metrics from scikit-learn" expanded="false">
76 <expand macro="scoring_selection" /> 76 <expand macro="scoring_selection" help="" />
77 </section> 77 </section>
78 </when> 78 </when>
79 </conditional> 79 </conditional>
80 <expand macro="sl_mixed_input_plus_sequence" /> 80 <expand macro="sl_mixed_input_plus_sequence" />
81 <param name="save" type="select" multiple='true' display="checkboxes" label="Save the fitted model" optional="true" help="Evaluation scores will be output by default."> 81 <param name="save" type="select" multiple='true' display="checkboxes" label="Save the fitted model" optional="true" help="Evaluation scores will be output by default.">
82 <option value="save_estimator" selected="true">Fitted estimator in skeleton and weights, separately</option> 82 <option value="save_estimator" selected="true">Fitted estimator</option>
83 <option value="save_prediction">True labels and prediction results from evaluation for downstream analysis</option> 83 <option value="save_prediction">True labels and prediction results from evaluation for downstream analysis</option>
84 </param> 84 </param>
85 </inputs> 85 </inputs>
86 <outputs> 86 <outputs>
87 <data format="tabular" name="outfile_result" /> 87 <data format="tabular" name="outfile_result" />
88 <data format="zip" name="outfile_object" label="Fitted estimator or estimator skeleton on ${on_string}"> 88 <data format="h5mlm" name="outfile_object" label="Fitted estimator or estimator skeleton on ${on_string}">
89 <filter>str(save) and 'save_estimator' in str(save)</filter>
90 </data>
91 <data format="h5" name="outfile_weights" label="Weights trained on ${on_string}">
92 <filter>str(save) and 'save_estimator' in str(save)</filter> 89 <filter>str(save) and 'save_estimator' in str(save)</filter>
93 </data> 90 </data>
94 <data format="tabular" name="outfile_y_true" label="True labels/target values on ${on_string}"> 91 <data format="tabular" name="outfile_y_true" label="True labels/target values on ${on_string}">
95 <filter>str(save) and 'save_prediction' in str(save)</filter> 92 <filter>str(save) and 'save_prediction' in str(save)</filter>
96 </data> 93 </data>
100 </outputs> 97 </outputs>
101 <tests> 98 <tests>
102 <test> 99 <test>
103 <conditional name="experiment_schemes"> 100 <conditional name="experiment_schemes">
104 <param name="selected_exp_scheme" value="train_val_test" /> 101 <param name="selected_exp_scheme" value="train_val_test" />
105 <param name="infile_estimator" value="keras_model04" ftype="zip" /> 102 <param name="infile_estimator" value="keras_model04" ftype="h5mlm" />
106 <section name="hyperparams_swapping"> 103 <section name="hyperparams_swapping">
107 <param name="infile_params" value="keras_params04.tabular" ftype="tabular" />
108 <repeat name="param_set"> 104 <repeat name="param_set">
109 <param name="sp_value" value="999" /> 105 <param name="sp_value" value="999" />
110 <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed" /> 106 <param name="sp_name" value="layers_1_Dense__config__kernel_initializer__config__seed" />
111 </repeat> 107 </repeat>
112 <repeat name="param_set"> 108 <repeat name="param_set">
113 <param name="sp_value" value="999" /> 109 <param name="sp_value" value="999" />
114 <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed" /> 110 <param name="sp_name" value="layers_3_Dense__config__kernel_initializer__config__seed" />
115 </repeat> 111 </repeat>
116 <repeat name="param_set"> 112 <repeat name="param_set">
117 <param name="sp_value" value="0.1" /> 113 <param name="sp_value" value="0.1" />
118 <param name="sp_name" value="lr" /> 114 <param name="sp_name" value="learning_rate" />
119 </repeat> 115 </repeat>
120 <repeat name="param_set"> 116 <repeat name="param_set">
121 <param name="sp_value" value="'adamax'" /> 117 <param name="sp_value" value="'adamax'" />
122 <param name="sp_name" value="optimizer" /> 118 <param name="sp_name" value="optimizer" />
123 </repeat> 119 </repeat>
150 <param name="header2" value="true" /> 146 <param name="header2" value="true" />
151 <param name="selected_column_selector_option2" value="all_columns" /> 147 <param name="selected_column_selector_option2" value="all_columns" />
152 <param name="save" value="save_estimator" /> 148 <param name="save" value="save_estimator" />
153 <output name="outfile_result"> 149 <output name="outfile_result">
154 <assert_contents> 150 <assert_contents>
155 <has_n_columns n="2" /> 151 <has_n_columns n="4" />
156 <has_text text="0.638" /> 152 <has_text text="0.794" />
157 <has_text text="-6.072" /> 153 <has_text text="-4.62" />
158 </assert_contents> 154 </assert_contents>
159 </output> 155 </output>
160 <output name="outfile_object" file="train_test_eval_model01" compare="sim_size" delta="50" /> 156 <output name="outfile_object" file="train_test_eval_model01" compare="sim_size" delta="5" />
161 <output name="outfile_weights" file="train_test_eval_weights01.h5" compare="sim_size" delta="50" />
162 </test> 157 </test>
163 <test> 158 <test>
164 <conditional name="experiment_schemes"> 159 <conditional name="experiment_schemes">
165 <param name="selected_exp_scheme" value="train_val_test" /> 160 <param name="selected_exp_scheme" value="train_val_test" />
166 <param name="infile_estimator" value="keras_model04" ftype="zip" /> 161 <param name="infile_estimator" value="keras_model04" ftype="h5mlm" />
167 <section name="hyperparams_swapping"> 162 <section name="hyperparams_swapping">
168 <param name="infile_params" value="keras_params04.tabular" ftype="tabular" />
169 <repeat name="param_set"> 163 <repeat name="param_set">
170 <param name="sp_value" value="999" /> 164 <param name="sp_value" value="999" />
171 <param name="sp_name" value="layers_0_Dense__config__kernel_initializer__config__seed" /> 165 <param name="sp_name" value="layers_1_Dense__config__kernel_initializer__config__seed" />
172 </repeat> 166 </repeat>
173 <repeat name="param_set"> 167 <repeat name="param_set">
174 <param name="sp_value" value="999" /> 168 <param name="sp_value" value="999" />
175 <param name="sp_name" value="layers_2_Dense__config__kernel_initializer__config__seed" /> 169 <param name="sp_name" value="layers_3_Dense__config__kernel_initializer__config__seed" />
176 </repeat> 170 </repeat>
177 <repeat name="param_set"> 171 <repeat name="param_set">
178 <param name="sp_value" value="0.1" /> 172 <param name="sp_value" value="0.1" />
179 <param name="sp_name" value="lr" /> 173 <param name="sp_name" value="learning_rate" />
180 </repeat> 174 </repeat>
181 <repeat name="param_set"> 175 <repeat name="param_set">
182 <param name="sp_value" value="'adamax'" /> 176 <param name="sp_value" value="'adamax'" />
183 <param name="sp_name" value="optimizer" /> 177 <param name="sp_name" value="optimizer" />
184 </repeat> 178 </repeat>
215 <param name="selected_column_selector_option" value="all_columns" /> 209 <param name="selected_column_selector_option" value="all_columns" />
216 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> 210 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
217 <param name="header2" value="true" /> 211 <param name="header2" value="true" />
218 <param name="selected_column_selector_option2" value="all_columns" /> 212 <param name="selected_column_selector_option2" value="all_columns" />
219 <param name="save" value="save_estimator,save_prediction" /> 213 <param name="save" value="save_estimator,save_prediction" />
220 <output name="outfile_result"> 214 <output name="outfile_result" >
221 <assert_contents> 215 <assert_contents>
222 <has_n_columns n="2" /> 216 <has_n_columns n="4" />
223 <has_text text="0.627" /> 217 <has_text text="0.779" />
224 <has_text text="-6.012" /> 218 <has_text text="-4.5" />
225 </assert_contents> 219 </assert_contents>
226 </output> 220 </output>
227 <output name="outfile_weights" file="train_test_eval_weights02.h5" compare="sim_size" delta="50" /> 221 <output name="outfile_object" file="train_test_eval_model02" compare="sim_size" delta="5" />
228 <output name="outfile_y_true" file="keras_train_eval_y_true02.tabular" ftype="tabular" /> 222 <output name="outfile_y_true" file="keras_train_eval_y_true02.tabular" ftype="tabular" />
229 </test> 223 </test>
230 <test> 224 <test>
231 <conditional name="experiment_schemes"> 225 <conditional name="experiment_schemes">
232 <param name="selected_exp_scheme" value="train_val" /> 226 <param name="selected_exp_scheme" value="train_val" />
233 <param name="infile_estimator" value="pipeline10" ftype="zip" /> 227 <param name="infile_estimator" value="pipeline10" ftype="h5mlm" />
234 <section name="hyperparams_swapping"> 228 <section name="hyperparams_swapping">
235 <param name="infile_params" value="get_params10.tabular" ftype="tabular" />
236 <repeat name="param_set"> 229 <repeat name="param_set">
237 <param name="sp_value" value="10" /> 230 <param name="sp_value" value="10" />
238 <param name="sp_name" value="adaboostregressor__random_state" /> 231 <param name="sp_name" value="adaboostregressor__random_state" />
239 </repeat> 232 </repeat>
240 <repeat name="param_set"> 233 <repeat name="param_set">
277 <![CDATA[ 270 <![CDATA[
278 **What it does** 271 **What it does**
279 272
280 Given a pre-built keras deep learning model and labeled training dataset, this tool works in two modes. 273 Given a pre-built keras deep learning model and labeled training dataset, this tool works in two modes.
281 274
282 - Train and Validate: training dataset is split into train and validation portions. The model fits on the train portion, in the meantime performances are validated on the validation portion multiple times along with the training progressing. Finally, a fitted model (skeleton + weights) and its validation performance scores are outputted. 275 - Train and Validate: the intput dataset is split into training and validation portions. The model is fitted on the training portion, in the meantime performances are evaluated on the validation portion multiple times while the training is progressing. Finally, a fitted model and its validation performance scores are outputted.
283 276
284 277
285 - Train, Validate and and Evaluate: training dataset is split into three portions, train, val and test. The same `Train and Validate` happens on the train and val portions. The test portion is hold out exclusively for testing (evaluation). As a result, a fitted model (skeleton + weights) and test performance scores are outputted. 278 - Train, Validate and and Evaluate: the input dataset is split into three portions, training, validation and testing. The same `Train and Validate` described above is performed on the training and validation portions. The testing portion is used exclusively for testing (evaluation). As a result, a fitted model and test performance scores are outputted.
286 279
287 In both modes, besides the performance scores, the true labels and predicted values are able to be ouputted, which could be used in generating plots in other tools, machine learning visualization extensions, for example. 280 In both modes, besides the performance scores, the true labels and predicted values are outputted, which could be used in generating plots in other tools, machine learning visualization extensions, for example.
288 281
289 Note that since all training and model parameters are accessible and changeable in the `Hyperparameter Swapping` section, the training and evaluation processes are transparent and fully controllable. 282 Note that since all training and model parameters are accessible and changeable in the `Hyperparameter Swapping` section, the training and evaluation processes are flexible and transparent.
283
284 For metrics, there are two sets of metrics for deep learning training and evaluation, one from the keras model builder and the other from scikit-learn. Keras metrics, if selected, are always evaluated, while the sklearn metrics could be ignored when `default` is the selection. Please be aware that not every sklearn metric works with deep learning model at current moment. Feel free to file a ticket if an issue is found and contibuting with PRs is always welcomed.
290 285
291 **Input** 286 **Input**
292 287
293 - tabular 288 - tabular
294 - sparse 289 - sparse
295 - `sequnences in a fasta file` to work with DNA, RNA and Proteins with corresponding fasta data generator 290 - `sequences in a fasta file` to work with DNA, RNA and proteins with corresponding fasta data generator
296 - `reference genome and intervals` exclusively work with `GenomicIntervalBatchGenerator`. 291 - `reference genome and intervals` exclusively work with `GenomicIntervalBatchGenerator`.
297 292
298 **Output** 293 **Output**
299 294
300 - performance scores from evaluation 295 - performance scores from evaluation
301 - fitted estimator skeleton and weights 296 - fitted estimator
302 - true labels or values and predicted values from the evaluation 297 - true labels or values and predicted values from the evaluation
303 298
304 ]]> 299 ]]>
305 </help> 300 </help>
306 <expand macro="sklearn_citation"> 301 <expand macro="sklearn_citation">