comparison search_model_validation.xml @ 25:fc99237f3392 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author bgruening
date Wed, 09 Aug 2023 13:04:12 +0000
parents 301e07345c93
children
comparison
equal deleted inserted replaced
24:301e07345c93 25:fc99237f3392
1 <tool id="sklearn_searchcv" name="Hyperparameter Search" version="@VERSION@" profile="20.05"> 1 <tool id="sklearn_searchcv" name="Hyperparameter Search" version="@VERSION@" profile="@PROFILE@">
2 <description>performs hyperparameter optimization using various SearchCVs</description> 2 <description>performs hyperparameter optimization using various SearchCVs</description>
3 <macros> 3 <macros>
4 <import>main_macros.xml</import> 4 <import>main_macros.xml</import>
5 <macro name="search_cv_estimator"> 5 <macro name="search_cv_estimator">
6 <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/> 6 <param name="infile_estimator" type="data" format="h5mlm" label="Choose the dataset containing pipeline/estimator object" />
7 <param name="is_deep_learning" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Is the estimator a deep learning model?"/> 7 <param name="is_deep_learning" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Is the estimator a deep learning model?" />
8 <section name="search_params_builder" title="Search parameters Builder" expanded="true"> 8 <section name="search_params_builder" title="Search parameters Builder" expanded="true">
9 <param name="infile_params" type="data" format="tabular" optional="true" label="Choose the dataset containing parameter names" help="This dataset could be the output of `get_params` in the `Estimator Attributes` tool."/> 9 <repeat name="param_set" min="1" max="30" title="Parameter settings for search:">
10 <repeat name="param_set" min="1" max="30" title="Parameter settings for search:"> 10 <param name="sp_name" type="select" optional="true" label="Choose a parameter name (with current value)">
11 <param name="sp_name" type="select" optional="true" label="Choose a parameter name (with current value)"> 11 <options from_dataset="infile_estimator" meta_file_key="hyper_params" startswith="@">
12 <options from_dataset="infile_params"> 12 <column name="name" index="2" />
13 <column name="name" index="2"/> 13 <column name="value" index="1" />
14 <column name="value" index="1"/> 14 <filter type="unique_value" name="unique_param" column="1" />
15 <filter type="unique_value" name="unique_param" column="1"/> 15 </options>
16 </options> 16 </param>
17 </param> 17 <param name="sp_list" type="text" value="" optional="true" label="Search list" help="list or array-like, for example: [1, 10, 100, 1000], [True, False] and ['auto', 'sqrt', None]. See `help` section for more examples">
18 <param name="sp_list" type="text" value="" optional="true" label="Search list" help="list or array-like, for example: [1, 10, 100, 1000], [True, False] and ['auto', 'sqrt', None]. See `help` section for more examples"> 18 <sanitizer>
19 <sanitizer> 19 <valid initial="default">
20 <valid initial="default"> 20 <add value="&apos;" />
21 <add value="&apos;"/> 21 <add value="&quot;" />
22 <add value="&quot;"/> 22 <add value="[" />
23 <add value="["/> 23 <add value="]" />
24 <add value="]"/> 24 </valid>
25 </valid> 25 </sanitizer>
26 </sanitizer> 26 </param>
27 </param> 27 </repeat>
28 </repeat>
29 </section> 28 </section>
30 </macro> 29 </macro>
31 </macros> 30 </macros>
32 <expand macro="python_requirements"/> 31 <expand macro="python_requirements" />
33 <expand macro="macro_stdio"/> 32 <expand macro="macro_stdio" />
34 <version_command>echo "@VERSION@"</version_command> 33 <version_command>echo "@VERSION@"</version_command>
35 <command><![CDATA[ 34 <command>
35 <![CDATA[
36 export HDF5_USE_FILE_LOCKING='FALSE'; 36 export HDF5_USE_FILE_LOCKING='FALSE';
37 #if $input_options.selected_input == 'refseq_and_interval' 37 #if $input_options.selected_input == 'refseq_and_interval'
38 bgzip -c '$input_options.target_file' > '${target_file.element_identifier}.gz' && 38 bgzip -c '$input_options.target_file' > '${target_file.element_identifier}.gz' &&
39 tabix -p bed '${target_file.element_identifier}.gz' && 39 tabix -p bed '${target_file.element_identifier}.gz' &&
40 #end if 40 #end if
41 python '$__tool_directory__/search_model_validation.py' 41 python '$__tool_directory__/search_model_validation.py'
42 --inputs '$inputs' 42 --inputs '$inputs'
43 --estimator '$search_schemes.infile_estimator' 43 --estimator '$infile_estimator'
44 #if $input_options.selected_input == 'seq_fasta' 44 #if $input_options.selected_input == 'seq_fasta'
45 --fasta_path '$input_options.fasta_path' 45 --fasta_path '$input_options.fasta_path'
46 #elif $input_options.selected_input == 'refseq_and_interval' 46 #elif $input_options.selected_input == 'refseq_and_interval'
47 --ref_seq '$input_options.ref_genome_file' 47 --ref_seq '$input_options.ref_genome_file'
48 --interval '$input_options.interval_file' 48 --interval '$input_options.interval_file'
49 --targets "`pwd`/${target_file.element_identifier}.gz" 49 --targets "`pwd`/${target_file.element_identifier}.gz"
50 #else 50 #else
51 --infile1 '$input_options.infile1' 51 --infile1 '$input_options.infile1'
52 #end if 52 #end if
53 --infile2 '$input_options.infile2' 53 --infile2 '$input_options.infile2'
54 #if $save != 'save_no_fit' 54 #if $save != 'save_no_fit'
55 --outfile_result '$outfile_result' 55 --outfile_result '$outfile_result'
56 #end if 56 #end if
57 #if $save == 'save_estimator' 57 #if $save == 'save_estimator'
58 --outfile_object '$outfile_object' 58 --outfile_object '$outfile_object'
59 #end if 59 #end if
60 #if $save == 'save_no_fit' 60 #if $save == 'save_no_fit'
61 --outfile_object '$outfile_object_no_fit' 61 --outfile_object '$outfile_object_no_fit'
62 #end if 62 #end if
63 #if $search_schemes.is_deep_learning == 'booltrue' and $save == 'save_estimator' and $outer_split.split_mode == 'nested_cv' 63 #if $options.cv_selector.selected_cv in ['GroupKFold', 'GroupShuffleSplit', 'LeaveOneGroupOut', 'LeavePGroupsOut']
64 --outfile_weights '$outfile_weights' 64 --groups '$options.cv_selector.groups_selector.infile_g'
65 #end if 65 #end if
66 #if $search_schemes.options.cv_selector.selected_cv in ['GroupKFold', 'GroupShuffleSplit', 'LeaveOneGroupOut', 'LeavePGroupsOut'] 66
67 --groups '$search_schemes.options.cv_selector.groups_selector.infile_g' 67 ]]>
68 #end if
69 ]]>
70 </command> 68 </command>
71 <configfiles> 69 <configfiles>
72 <inputs name="inputs" /> 70 <inputs name="inputs" />
73 </configfiles> 71 </configfiles>
74 <inputs> 72 <inputs>
75 <conditional name="search_schemes"> 73 <conditional name="search_algos">
76 <param name="selected_search_scheme" type="select" label="Select a model selection search scheme"> 74 <param name="selected_search_algo" type="select" label="Select a hyperparameter search algorithm">
77 <option value="GridSearchCV" selected="true">GridSearchCV - Exhaustive search over specified parameter values for an estimator </option> 75 <option value="GridSearchCV" selected="true">GridSearchCV - Exhaustive search over specified parameter values for an estimator </option>
78 <option value="RandomizedSearchCV">RandomizedSearchCV - Randomized search on hyper parameters for an estimator</option> 76 <option value="RandomizedSearchCV">RandomizedSearchCV - Randomized search on hyper parameters for an estimator</option>
77 <option value="skopt.BayesSearchCV">BayesSearchCV - Bayesian optimization over hyper parameters</option>
79 </param> 78 </param>
80 <when value="GridSearchCV"> 79 <when value="GridSearchCV">
81 <expand macro="search_cv_estimator"/>
82 <section name="options" title="Advanced Options for SearchCV" expanded="false">
83 <expand macro="search_cv_options"/>
84 </section>
85 </when> 80 </when>
86 <when value="RandomizedSearchCV"> 81 <when value="RandomizedSearchCV">
87 <expand macro="search_cv_estimator"/> 82 <param argument="n_iter" type="integer" value="10" label="Number of parameter settings that are sampled" />
88 <section name="options" title="Advanced Options for SearchCV" expanded="false"> 83 <expand macro="random_state" />
89 <expand macro="search_cv_options"/> 84 </when>
90 <param argument="n_iter" type="integer" value="10" label="Number of parameter settings that are sampled"/> 85 <when value="skopt.BayesSearchCV">
91 <expand macro="random_state"/> 86 <param argument="optimizer_kwargs" type="text" value="" optional="true" help="dict, optional Dict of arguments passed to :class:Optimizer. For example, {'base_estimator': 'RF'} would use a Random Forest surrogate instead of the default Gaussian Process.">
92 </section> 87 <sanitizer>
88 <valid initial="default">
89 <add value="{" />
90 <add value="}" />
91 </valid>
92 </sanitizer>
93 </param>
94 <param argument="n_iter" type="integer" value="50" label="Number of parameter settings that are sampled" />
95 <param argument="n_points" type="integer" value="1" label="Number of parameter settings to sample in parallel" help="Maximum parallel equals n_points times cv jobs." />
96 <expand macro="random_state" />
93 </when> 97 </when>
94 </conditional> 98 </conditional>
95 <expand macro="sl_mixed_input_plus_sequence"/> 99 <expand macro="search_cv_estimator" />
100 <section name="options" title="Advanced Options for SearchCV" expanded="false">
101 <expand macro="search_cv_options" />
102 </section>
103 <expand macro="sl_mixed_input_plus_sequence" />
96 <conditional name="outer_split"> 104 <conditional name="outer_split">
97 <param name="split_mode" type="select" label="Whether to hold a portion of samples for test exclusively?" help="Nested CV or train_test_split"> 105 <param name="split_mode" type="select" label="Whether to hold a portion of samples for test exclusively, nested CV?">
98 <option value="no" selected="true">Nope</option> 106 <option value="no" selected="true">Nope</option>
99 <option value="nested_cv">Yes - do nested CV</option> 107 <option value="nested_cv">Yes - do nested CV</option>
100 </param> 108 </param>
101 <when value='no'/> 109 <when value='no' />
102 <when value="nested_cv"> 110 <when value="nested_cv">
103 <expand macro="cv_reduced" label="Select the outer cv splitter"/> 111 <expand macro="cv_reduced" label="Select the outer cv splitter" />
104 </when> 112 </when>
105 </conditional> 113 </conditional>
106 <param name="save" type="select" label="Save best estimator?" help="For a non-deep learning model, save will output fitted best_estimator_ (refit must be true) or a list of cv_results_ from each outer split in nested CV mode. For a deep learning model, by checking the boolean option below the model input, the outputs are two parts, model skeleton and weights. Save Deep learning model for nested CV is not supported."> 114 <param name="save" type="select" label="Save best estimator?" help="For a non-deep learning model, save will output fitted best_estimator_ or a list of cv_results_ from each outer split if in nested CV mode. For a deep learning model, by checking the boolean option below the model input, the outputs are two parts, model skeleton and weights. Save Deep learning model for nested CV is not supported.">
107 <option value="nope">Nope, save is unnecessary</option> 115 <option value="nope">Nope, save is unnecessary</option>
108 <option value="save_estimator" selected="true">Fitted best estimator or Detailed cv_results_ from nested CV</option> 116 <option value="save_estimator" selected="true">Fitted best estimator or Detailed cv_results_ from nested CV</option>
109 <option value="save_no_fit">SearchCV object without fitting</option> 117 <option value="save_no_fit">SearchCV object without fitting</option>
110 </param> 118 </param>
111 </inputs> 119 </inputs>
112 <outputs> 120 <outputs>
113 <data format="tabular" name="outfile_result"> 121 <data format="tabular" name="outfile_result">
114 <filter>save != 'save_no_fit'</filter> 122 <filter>save != 'save_no_fit'</filter>
115 </data> 123 </data>
116 <data format="zip" name="outfile_object" label="Fitted estimator or estimator skeleton on ${on_string}"> 124 <data format="h5mlm" name="outfile_object" label="Fitted best estimator on ${on_string}">
117 <filter>save == 'save_estimator' and outer_split['split_mode'] == 'no'</filter> 125 <filter>save == 'save_estimator' and outer_split['split_mode'] == 'no'</filter>
118 </data> 126 </data>
119 <data format="h5" name="outfile_weights" label="Weights trained on ${on_string}">
120 <filter>search_schemes['is_deep_learning'] and save == 'save_estimator' and outer_split['split_mode'] == 'no'</filter>
121 </data>
122 <collection type="list" name="outfile_in_splits" label="cv_results_ from splits on ${on_string}"> 127 <collection type="list" name="outfile_in_splits" label="cv_results_ from splits on ${on_string}">
123 <filter>not search_schemes['is_deep_learning'] and save == 'save_estimator' and outer_split['split_mode'] == 'nested_cv'</filter> 128 <filter>not is_deep_learning and save == 'save_estimator' and outer_split['split_mode'] == 'nested_cv'</filter>
124 <discover_datasets format="tabular" pattern="__name__" directory="cv_results_in_folds"/> 129 <discover_datasets format="tabular" pattern="__name__" directory="cv_results_in_folds" />
125 </collection> 130 </collection>
126 <data format="zip" name="outfile_object_no_fit" label="Unfitted SearchCV on ${on_string}"> 131 <data format="h5mlm" name="outfile_object_no_fit" label="Unfitted SearchCV on ${on_string}">
127 <filter>save == 'save_no_fit'</filter> 132 <filter>save == 'save_no_fit'</filter>
128 </data> 133 </data>
129 </outputs> 134 </outputs>
130 <tests> 135 <tests>
131 <test> 136 <test>
132 <param name="selected_search_scheme" value="GridSearchCV"/> 137 <param name="selected_search_algo" value="GridSearchCV" />
133 <param name="infile_estimator" value="pipeline01" ftype="zip"/> 138 <param name="infile_estimator" value="pipeline01" ftype="h5mlm" />
134 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/> 139 <repeat name="param_set">
135 <repeat name="param_set"> 140 <param name="sp_list" value="[1, 10, 100, 1000]" />
136 <param name="sp_list" value="[1, 10, 100, 1000]"/> 141 <param name="sp_name" value="svr__C" />
137 <param name="sp_name" value="svr__C"/> 142 </repeat>
138 </repeat> 143 <repeat name="param_set">
139 <repeat name="param_set"> 144 <param name="sp_list" value="['all', 3, 5, 7, 9]" />
140 <param name="sp_list" value="[-1, 3, 5, 7, 9]"/> 145 <param name="sp_name" value="selectkbest__k" />
141 <param name="sp_name" value="selectkbest__k"/> 146 </repeat>
142 </repeat> 147 <param name="error_score" value="false" />
143 <param name="error_score" value="false"/> 148 <param name="n_splits" value="3" />
144 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 149 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
145 <param name="header1" value="true" /> 150 <param name="header1" value="true" />
146 <param name="selected_column_selector_option" value="all_columns"/> 151 <param name="selected_column_selector_option" value="all_columns" />
147 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 152 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
148 <param name="header2" value="true" /> 153 <param name="header2" value="true" />
149 <param name="selected_column_selector_option2" value="all_columns"/> 154 <param name="selected_column_selector_option2" value="all_columns" />
150 <output name="outfile_result"> 155 <output name="outfile_result">
151 <assert_contents> 156 <assert_contents>
152 <has_n_columns n="13"/> 157 <has_n_columns n="13" />
153 <has_text text="0.7938837807353147"/> 158 <has_text text="0.7938837807" />
154 <has_text text="{'selectkbest__k': 9, 'svr__C': 1}"/> 159 <has_text text="{'selectkbest__k': 9, 'svr__C': 1}" />
155 </assert_contents> 160 </assert_contents>
156 </output> 161 </output>
157 </test> 162 </test>
158 <test expect_failure="true"> 163 <test expect_failure="true">
159 <param name="selected_search_scheme" value="GridSearchCV"/> 164 <param name="selected_search_algo" value="GridSearchCV" />
160 <param name="infile_estimator" value="pipeline01" ftype="zip"/> 165 <param name="infile_estimator" value="pipeline01" ftype="h5mlm" />
161 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/> 166 <repeat name="param_set">
162 <repeat name="param_set"> 167 <param name="sp_list" value="[1, 10, 100, 1000]" />
163 <param name="sp_list" value="[1, 10, 100, 1000]"/> 168 <param name="sp_name" value="svr__C" />
164 <param name="sp_name" value="svr__C"/> 169 </repeat>
165 </repeat> 170 <repeat name="param_set">
166 <repeat name="param_set"> 171 <param name="sp_list" value="[-1, 3, 5, 7, 9]" />
167 <param name="sp_list" value="[-1, 3, 5, 7, 9]"/> 172 <param name="sp_name" value="selectkbest__k" />
168 <param name="sp_name" value="selectkbest__k"/> 173 </repeat>
169 </repeat> 174 <param name="error_score" value="true" />
170 <param name="error_score" value="true"/> 175 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
171 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 176 <param name="header1" value="true" />
172 <param name="header1" value="true" /> 177 <param name="selected_column_selector_option" value="all_columns" />
173 <param name="selected_column_selector_option" value="all_columns"/> 178 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
174 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 179 <param name="header2" value="true" />
175 <param name="header2" value="true" /> 180 <param name="selected_column_selector_option2" value="all_columns" />
176 <param name="selected_column_selector_option2" value="all_columns"/> 181 </test>
177 </test> 182 <test>
178 <test> 183 <param name="selected_search_algo" value="RandomizedSearchCV" />
179 <param name="selected_search_scheme" value="RandomizedSearchCV"/> 184 <param name="infile_estimator" value="pipeline01" ftype="h5mlm" />
180 <param name="infile_estimator" value="pipeline01" ftype="zip"/> 185 <repeat name="param_set">
181 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/> 186 <param name="sp_list" value="[1, 10, 100, 1000]" />
182 <repeat name="param_set"> 187 <param name="sp_name" value="svr__C" />
183 <param name="sp_list" value="[1, 10, 100, 1000]"/> 188 </repeat>
184 <param name="sp_name" value="svr__C"/> 189 <repeat name="param_set">
185 </repeat> 190 <param name="sp_list" value="['linear', 'poly', 'rbf', 'sigmoid']" />
186 <repeat name="param_set"> 191 <param name="sp_name" value="svr__kernel" />
187 <param name="sp_list" value="['linear', 'poly', 'rbf', 'sigmoid']"/> 192 </repeat>
188 <param name="sp_name" value="svr__kernel"/> 193 <repeat name="param_set">
189 </repeat> 194 <param name="sp_list" value="[3, 5, 7, 9]" />
190 <repeat name="param_set"> 195 <param name="sp_name" value="selectkbest__k" />
191 <param name="sp_list" value="[3, 5, 7, 9]"/> 196 </repeat>
192 <param name="sp_name" value="selectkbest__k"/> 197 <repeat name="param_set">
193 </repeat> 198 <param name="sp_list" value="[True, False]" />
194 <repeat name="param_set"> 199 <param name="sp_name" value="robustscaler__with_centering" />
195 <param name="sp_list" value="[True, False]"/> 200 </repeat>
196 <param name="sp_name" value="robustscaler__with_centering"/> 201 <section name="options">
197 </repeat> 202 <conditional name="cv_selector">
198 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 203 <param name="selected_cv" value="default" />
199 <param name="header1" value="true" /> 204 <param name="n_splits" value="3" />
200 <param name="selected_column_selector_option" value="all_columns"/> 205 </conditional>
201 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 206 </section>
202 <param name="header2" value="true" /> 207 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
203 <param name="selected_column_selector_option2" value="all_columns"/> 208 <param name="header1" value="true" />
209 <param name="selected_column_selector_option" value="all_columns" />
210 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
211 <param name="header2" value="true" />
212 <param name="selected_column_selector_option2" value="all_columns" />
204 <output name="outfile_result" > 213 <output name="outfile_result" >
205 <assert_contents> 214 <assert_contents>
206 <has_n_columns n="15" /> 215 <has_n_columns n="15" />
207 <has_text text="param_robustscaler__with_centering"/> 216 <has_text text="param_robustscaler__with_centering" />
208 </assert_contents> 217 </assert_contents>
209 </output> 218 </output>
210 </test> 219 </test>
211 <test> 220 <test>
212 <param name="selected_search_scheme" value="RandomizedSearchCV"/> 221 <param name="selected_search_algo" value="RandomizedSearchCV" />
213 <param name="infile_estimator" value="pipeline03" ftype="zip"/> 222 <param name="infile_estimator" value="pipeline03" ftype="h5mlm" />
214 <param name="infile_params" value="get_params03.tabular" ftype="tabular"/> 223 <repeat name="param_set">
215 <repeat name="param_set"> 224 <param name="sp_list" value="np_arange(50, 1001, 50)" />
216 <param name="sp_list" value="np_arange(50, 1001, 50)"/> 225 <param name="sp_name" value="xgbclassifier__n_estimators" />
217 <param name="sp_name" value="xgbclassifier__n_estimators"/> 226 </repeat>
218 </repeat> 227 <repeat name="param_set">
219 <repeat name="param_set"> 228 <param name="sp_list" value="scipy_stats_randint(1, 51)" />
220 <param name="sp_list" value="scipy_stats_randint(1, 51)"/> 229 <param name="sp_name" value="xgbclassifier__max_depth" />
221 <param name="sp_name" value="xgbclassifier__max_depth"/> 230 </repeat>
222 </repeat> 231 <repeat name="param_set">
223 <repeat name="param_set"> 232 <param name="sp_list" value="scipy_stats_uniform(0., 1.)" />
224 <param name="sp_list" value="scipy_stats_uniform(0., 1.)"/> 233 <param name="sp_name" value="xgbclassifier__gamma" />
225 <param name="sp_name" value="xgbclassifier__gamma"/> 234 </repeat>
226 </repeat> 235 <repeat name="param_set">
227 <repeat name="param_set"> 236 <param name="sp_list" value="[324089]" />
228 <param name="sp_list" value="[324089]"/> 237 <param name="sp_name" value="xgbclassifier__random_state" />
229 <param name="sp_name" value="xgbclassifier__random_state"/> 238 </repeat>
230 </repeat> 239 <param name="n_splits" value="3" />
231 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 240 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
232 <param name="header1" value="true" /> 241 <param name="header1" value="true" />
233 <param name="selected_column_selector_option" value="all_columns"/> 242 <param name="selected_column_selector_option" value="all_columns" />
234 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 243 <param name="infile2" value="classifier_y.tabular" ftype="tabular" />
235 <param name="header2" value="true" /> 244 <param name="header2" value="true" />
236 <param name="selected_column_selector_option2" value="all_columns"/> 245 <param name="selected_column_selector_option2" value="all_columns" />
237 <output name="outfile_result" > 246 <output name="outfile_result" >
238 <assert_contents> 247 <assert_contents>
239 <has_n_columns n="15" /> 248 <has_n_columns n="15" />
240 <has_text text="param_xgbclassifier__max_depth"/> 249 <has_text text="param_xgbclassifier__max_depth" />
241 </assert_contents> 250 </assert_contents>
242 </output> 251 </output>
243 </test> 252 </test>
244 <test> 253 <test>
245 <param name="selected_search_scheme" value="GridSearchCV"/> 254 <param name="selected_search_algo" value="GridSearchCV" />
246 <param name="infile_estimator" value="pipeline04" ftype="zip"/> 255 <param name="infile_estimator" value="pipeline04" ftype="h5mlm" />
247 <param name="infile_params" value="get_params04.tabular" ftype="tabular"/> 256 <repeat name="param_set">
248 <repeat name="param_set"> 257 <param name="sp_list" value="list(range(100, 1001, 100))" />
249 <param name="sp_list" value="list(range(100, 1001, 100))"/> 258 <param name="sp_name" value="linearsvc__random_state" />
250 <param name="sp_name" value="linearsvc__random_state"/> 259 </repeat>
251 </repeat> 260 <repeat name="param_set">
252 <repeat name="param_set"> 261 <param name="sp_list" value=": [sklearn_ensemble.ExtraTreesClassifier(n_estimators=100, random_state=324089)]" />
253 <param name="sp_list" value=": [sklearn_ensemble.ExtraTreesClassifier(n_estimators=100, random_state=324089)]"/> 262 <param name="sp_name" value="selectfrommodel__estimator" />
254 <param name="sp_name" value="selectfrommodel__estimator"/> 263 </repeat>
255 </repeat> 264 <param name="n_splits" value="3" />
256 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 265 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
257 <param name="header1" value="true" /> 266 <param name="header1" value="true" />
258 <param name="selected_column_selector_option" value="all_columns"/> 267 <param name="selected_column_selector_option" value="all_columns" />
259 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 268 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
260 <param name="header2" value="true" /> 269 <param name="header2" value="true" />
261 <param name="selected_column_selector_option2" value="all_columns"/> 270 <param name="selected_column_selector_option2" value="all_columns" />
262 <output name="outfile_result"> 271 <output name="outfile_result">
263 <assert_contents> 272 <assert_contents>
264 <has_n_columns n="13"/> 273 <has_n_columns n="13" />
265 <has_text text="0.05363984674329502"/> 274 <has_text text="0.05363984674329502" />
266 </assert_contents> 275 </assert_contents>
267 </output> 276 </output>
268 </test> 277 </test>
269 <test> 278 <test>
270 <param name="selected_search_scheme" value="GridSearchCV"/> 279 <param name="selected_search_algo" value="GridSearchCV" />
271 <param name="infile_estimator" value="pipeline01" ftype="zip"/> 280 <param name="infile_estimator" value="pipeline01" ftype="h5mlm" />
272 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/> 281 <repeat name="param_set">
273 <repeat name="param_set"> 282 <param name="sp_list" value="[1, 10, 100, 1000]" />
274 <param name="sp_list" value="[1, 10, 100, 1000]"/> 283 <param name="sp_name" value="svr__C" />
275 <param name="sp_name" value="svr__C"/> 284 </repeat>
276 </repeat> 285 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
277 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 286 <param name="header1" value="true" />
278 <param name="header1" value="true" /> 287 <param name="selected_column_selector_option" value="all_columns" />
279 <param name="selected_column_selector_option" value="all_columns"/> 288 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
280 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 289 <param name="header2" value="true" />
281 <param name="header2" value="true" /> 290 <param name="selected_column_selector_option2" value="all_columns" />
282 <param name="selected_column_selector_option2" value="all_columns"/> 291 <param name="save" value="save_estimator" />
283 <param name="save" value="save_estimator"/> 292 <output name="outfile_object" file="searchCV01" compare="sim_size" delta="10" />
284 <output name="outfile_object" file="searchCV01" compare="sim_size" delta="10"/> 293 </test>
285 </test> 294 <test>
286 <test> 295 <param name="selected_search_algo" value="GridSearchCV" />
287 <param name="selected_search_scheme" value="GridSearchCV"/> 296 <param name="infile_estimator" value="pipeline06" ftype="h5mlm" />
288 <param name="infile_estimator" value="pipeline06" ftype="zip"/> 297 <repeat name="param_set">
289 <param name="infile_params" value="get_params06.tabular" ftype="tabular"/> 298 <param name="sp_list" value="[10, 50, 200, 1000]" />
290 <repeat name="param_set"> 299 <param name="sp_name" value="adaboostregressor__n_estimators" />
291 <param name="sp_list" value="[10, 50, 200, 1000]"/> 300 </repeat>
292 <param name="sp_name" value="adaboostregressor__n_estimators"/> 301 <repeat name="param_set">
293 </repeat> 302 <param name="sp_list" value="[324089]" />
294 <repeat name="param_set"> 303 <param name="sp_name" value="adaboostregressor__random_state" />
295 <param name="sp_list" value="[324089]"/> 304 </repeat>
296 <param name="sp_name" value="adaboostregressor__random_state"/> 305 <param name="n_splits" value="3" />
297 </repeat> 306 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
298 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 307 <param name="header1" value="true" />
299 <param name="header1" value="true" /> 308 <param name="selected_column_selector_option" value="all_columns" />
300 <param name="selected_column_selector_option" value="all_columns"/> 309 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
301 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 310 <param name="header2" value="true" />
302 <param name="header2" value="true" /> 311 <param name="selected_column_selector_option2" value="all_columns" />
303 <param name="selected_column_selector_option2" value="all_columns"/> 312 <output name="outfile_result">
304 <output name="outfile_result"> 313 <assert_contents>
305 <assert_contents> 314 <has_n_columns n="13" />
306 <has_n_columns n="13"/>
307 <has_text_matching expression=".+0.7772355090078996" /> 315 <has_text_matching expression=".+0.7772355090078996" />
308 </assert_contents> 316 </assert_contents>
309 </output> 317 </output>
310 </test> 318 </test>
311 <test> 319 <test>
312 <param name="selected_search_scheme" value="GridSearchCV"/> 320 <param name="selected_search_algo" value="GridSearchCV" />
313 <param name="infile_estimator" value="pipeline07" ftype="zip"/> 321 <param name="infile_estimator" value="pipeline07" ftype="h5mlm" />
314 <param name="infile_params" value="get_params07.tabular" ftype="tabular"/> 322 <repeat name="param_set">
315 <repeat name="param_set"> 323 <param name="sp_list" value="[10, 50, 100, 200]" />
316 <param name="sp_list" value="[10, 50, 100, 200]"/> 324 <param name="sp_name" value="adaboostclassifier__n_estimators" />
317 <param name="sp_name" value="adaboostclassifier__n_estimators"/> 325 </repeat>
318 </repeat> 326 <repeat name="param_set">
319 <repeat name="param_set"> 327 <param name="sp_list" value="[324089]" />
320 <param name="sp_list" value="[324089]"/> 328 <param name="sp_name" value="adaboostclassifier__random_state" />
321 <param name="sp_name" value="adaboostclassifier__random_state"/> 329 </repeat>
322 </repeat> 330 <repeat name="param_set">
323 <repeat name="param_set"> 331 <param name="sp_list" value="[1.0, 2.0]" />
324 <param name="sp_list" value="[1.0, 2.0]"/> 332 <param name="sp_name" value="rbfsampler__gamma" />
325 <param name="sp_name" value="rbfsampler__gamma"/> 333 </repeat>
326 </repeat> 334 <param name='selected_cv' value="default" />
327 <param name='selected_cv' value="default"/> 335 <param name="n_splits" value="3" />
328 <param name="n_splits" value="3"/> 336 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
329 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 337 <param name="header1" value="true" />
330 <param name="header1" value="true" /> 338 <param name="selected_column_selector_option" value="all_columns" />
331 <param name="selected_column_selector_option" value="all_columns"/> 339 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
332 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 340 <param name="header2" value="true" />
333 <param name="header2" value="true" /> 341 <param name="selected_column_selector_option2" value="all_columns" />
334 <param name="selected_column_selector_option2" value="all_columns"/> 342 <output name="outfile_result">
335 <output name="outfile_result"> 343 <assert_contents>
336 <assert_contents> 344 <has_n_columns n="14" />
337 <has_n_columns n="14"/>
338 <has_text_matching expression=".+0.05747126436781609[^/d]" /> 345 <has_text_matching expression=".+0.05747126436781609[^/d]" />
339 </assert_contents> 346 </assert_contents>
340 </output> 347 </output>
341 </test> 348 </test>
342 <test> 349 <test>
343 <param name="selected_search_scheme" value="GridSearchCV"/> 350 <param name="selected_search_algo" value="GridSearchCV" />
344 <param name="infile_estimator" value="pipeline08" ftype="zip"/> 351 <param name="infile_estimator" value="pipeline08" ftype="h5mlm" />
345 <param name="infile_params" value="get_params08.tabular" ftype="tabular"/> 352 <repeat name="param_set">
346 <repeat name="param_set"> 353 <param name="sp_list" value="[10, 50, 100, 200]" />
347 <param name="sp_list" value="[10, 50, 100, 200]"/> 354 <param name="sp_name" value="adaboostclassifier__n_estimators" />
348 <param name="sp_name" value="adaboostclassifier__n_estimators"/> 355 </repeat>
349 </repeat> 356 <repeat name="param_set">
350 <repeat name="param_set"> 357 <param name="sp_list" value="[324089]" />
351 <param name="sp_list" value="[324089]"/> 358 <param name="sp_name" value="adaboostclassifier__random_state" />
352 <param name="sp_name" value="adaboostclassifier__random_state"/> 359 </repeat>
353 </repeat> 360 <repeat name="param_set">
354 <repeat name="param_set"> 361 <param name="sp_list" value="['ward', 'complete', 'average']" />
355 <param name="sp_list" value="['ward', 'complete', 'average']"/> 362 <param name="sp_name" value="featureagglomeration__linkage" />
356 <param name="sp_name" value="featureagglomeration__linkage"/> 363 </repeat>
357 </repeat> 364 <param name="n_splits" value="3" />
358 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 365 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
359 <param name="header1" value="true" /> 366 <param name="header1" value="true" />
360 <param name="selected_column_selector_option" value="all_columns"/> 367 <param name="selected_column_selector_option" value="all_columns" />
361 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 368 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
362 <param name="header2" value="true" /> 369 <param name="header2" value="true" />
363 <param name="selected_column_selector_option2" value="all_columns"/> 370 <param name="selected_column_selector_option2" value="all_columns" />
364 <output name="outfile_result"> 371 <output name="outfile_result">
365 <assert_contents> 372 <assert_contents>
366 <has_text_matching expression=".+0.08045977011494253[^/w]+10[^/w]" /> 373 <has_text_matching expression=".+0.08045977011494253[^/w]+10[^/w]" />
367 </assert_contents> 374 </assert_contents>
368 </output> 375 </output>
369 </test> 376 </test>
370 <test> 377 <test>
371 <param name="selected_search_scheme" value="GridSearchCV"/> 378 <param name="selected_search_algo" value="GridSearchCV" />
372 <param name="infile_estimator" value="pipeline01" ftype="zip"/> 379 <param name="infile_estimator" value="pipeline01" ftype="h5mlm" />
373 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/> 380 <repeat name="param_set">
374 <repeat name="param_set"> 381 <param name="sp_list" value="[1, 10, 100, 1000]" />
375 <param name="sp_list" value="[1, 10, 100, 1000]"/> 382 <param name="sp_name" value="svr__C" />
376 <param name="sp_name" value="svr__C"/> 383 </repeat>
377 </repeat> 384 <param name='selected_cv' value="StratifiedKFold" />
378 <param name='selected_cv' value="StratifiedKFold"/> 385 <param name="n_splits" value="3" />
379 <param name="n_splits" value="3"/>
380 <param name="shuffle" value="true" /> 386 <param name="shuffle" value="true" />
381 <param name="random_state" value="10"/> 387 <param name="random_state" value="10" />
382 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 388 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
383 <param name="header1" value="true" /> 389 <param name="header1" value="true" />
384 <param name="selected_column_selector_option" value="all_columns"/> 390 <param name="selected_column_selector_option" value="all_columns" />
385 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 391 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
386 <param name="header2" value="true" /> 392 <param name="header2" value="true" />
387 <param name="selected_column_selector_option2" value="all_columns"/> 393 <param name="selected_column_selector_option2" value="all_columns" />
388 <param name="save" value="save_estimator"/> 394 <param name="save" value="save_estimator" />
389 <output name="outfile_object" file="searchCV02" compare="sim_size" delta="10"/> 395 <output name="outfile_object" file="searchCV02" compare="sim_size" delta="10" />
390 </test> 396 </test>
391 <test> 397 <test>
392 <param name="selected_search_scheme" value="GridSearchCV"/> 398 <param name="selected_search_algo" value="GridSearchCV" />
393 <param name="infile_estimator" value="pipeline03" ftype="zip"/> 399 <param name="infile_estimator" value="pipeline03" ftype="h5mlm" />
394 <param name="infile_params" value="get_params03.tabular" ftype="tabular"/> 400 <repeat name="param_set">
395 <repeat name="param_set"> 401 <param name="sp_list" value="[10, 50, 200, 1000]" />
396 <param name="sp_list" value="[10, 50, 200, 1000]"/> 402 <param name="sp_name" value="xgbclassifier__n_estimators" />
397 <param name="sp_name" value="xgbclassifier__n_estimators"/> 403 </repeat>
398 </repeat> 404 <repeat name="param_set">
399 <repeat name="param_set"> 405 <param name="sp_list" value="[324089]" />
400 <param name="sp_list" value="[324089]"/> 406 <param name="sp_name" value="xgbclassifier__random_state" />
401 <param name="sp_name" value="xgbclassifier__random_state"/> 407 </repeat>
402 </repeat> 408 <param name="primary_scoring" value="balanced_accuracy" />
403 <param name="primary_scoring" value="balanced_accuracy"/> 409 <param name='selected_cv' value="StratifiedKFold" />
404 <param name='selected_cv' value="StratifiedKFold"/> 410 <param name="n_splits" value="3" />
405 <param name="n_splits" value="3"/>
406 <param name="shuffle" value="true" /> 411 <param name="shuffle" value="true" />
407 <param name="random_state" value="10"/> 412 <param name="random_state" value="10" />
408 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 413 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
409 <param name="header1" value="true" /> 414 <param name="header1" value="true" />
410 <param name="selected_column_selector_option" value="all_columns"/> 415 <param name="selected_column_selector_option" value="all_columns" />
411 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 416 <param name="infile2" value="classifier_y.tabular" ftype="tabular" />
412 <param name="header2" value="true" /> 417 <param name="header2" value="true" />
413 <param name="selected_column_selector_option2" value="all_columns"/> 418 <param name="selected_column_selector_option2" value="all_columns" />
414 <output name="outfile_result" > 419 <output name="outfile_result" >
415 <assert_contents> 420 <assert_contents>
416 <has_n_columns n="13" /> 421 <has_n_columns n="13" />
417 <has_text text="0.08719866399898475"/> 422 <has_text text="0.7927378" />
418 </assert_contents> 423 </assert_contents>
419 </output> 424 </output>
420 </test> 425 </test>
421 <test> 426 <test>
422 <param name="selected_search_scheme" value="GridSearchCV"/> 427 <param name="selected_search_algo" value="GridSearchCV" />
423 <param name="infile_estimator" value="pipeline02" ftype="zip"/> 428 <param name="infile_estimator" value="pipeline02" ftype="h5mlm" />
424 <param name="infile_params" value="get_params02.tabular" ftype="tabular"/> 429 <repeat name="param_set">
425 <repeat name="param_set"> 430 <param name="sp_list" value="[0.01, 0.001]" />
426 <param name="sp_list" value="[0.01, 0.001]"/> 431 <param name="sp_name" value="lassocv__eps" />
427 <param name="sp_name" value="lassocv__eps"/> 432 </repeat>
428 </repeat> 433 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
429 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 434 <param name="header1" value="true" />
430 <param name="header1" value="true" /> 435 <param name="selected_column_selector_option" value="all_columns" />
431 <param name="selected_column_selector_option" value="all_columns"/> 436 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
432 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 437 <param name="header2" value="true" />
433 <param name="header2" value="true" /> 438 <param name="selected_column_selector_option2" value="all_columns" />
434 <param name="selected_column_selector_option2" value="all_columns"/> 439 <output name="outfile_result">
435 <output name="outfile_result"> 440 <assert_contents>
436 <assert_contents> 441 <has_n_columns n="14" />
437 <has_n_columns n="12"/> 442 <has_text text="0.78685093734485" />
438 <has_text text="0.776296816136668" /> 443 </assert_contents>
439 </assert_contents> 444 </output>
440 </output> 445 </test>
441 </test> 446 <test>
442 <test> 447 <param name="selected_search_algo" value="GridSearchCV" />
443 <param name="selected_search_scheme" value="GridSearchCV"/> 448 <param name="infile_estimator" value="pipeline05" ftype="h5mlm" />
444 <param name="infile_estimator" value="pipeline05" ftype="zip"/> 449 <repeat name="param_set">
445 <param name="infile_params" value="get_params05.tabular" ftype="tabular"/> 450 <param name="sp_list" value="[10, 50, 100]" />
446 <repeat name="param_set"> 451 <param name="sp_name" value="n_estimators" />
447 <param name="sp_list" value="[10, 50, 100, 300]"/> 452 </repeat>
448 <param name="sp_name" value="n_estimators"/> 453 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
449 </repeat> 454 <param name="header1" value="true" />
450 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 455 <param name="selected_column_selector_option" value="all_columns" />
451 <param name="header1" value="true" /> 456 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
452 <param name="selected_column_selector_option" value="all_columns"/> 457 <param name="header2" value="true" />
453 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 458 <param name="selected_column_selector_option2" value="all_columns" />
454 <param name="header2" value="true" /> 459 <output name="outfile_result">
455 <param name="selected_column_selector_option2" value="all_columns"/> 460 <assert_contents>
456 <output name="outfile_result"> 461 <has_n_columns n="14" />
457 <assert_contents> 462 <has_text text="0.8101624993383203" />
458 <has_n_columns n="12"/> 463 </assert_contents>
459 <has_text text="0.8176576686816003" /> 464 </output>
460 </assert_contents> 465 <output name="outfile_object" file="searchCV03" compare="sim_size" delta="10" />
461 </output>
462 </test> 466 </test>
463 <test expect_failure="true"> 467 <test expect_failure="true">
464 <param name="selected_search_scheme" value="GridSearchCV"/> 468 <param name="selected_search_algo" value="GridSearchCV" />
465 <param name="infile_estimator" value="pipeline01" ftype="zip"/> 469 <param name="infile_estimator" value="pipeline01" ftype="h5mlm" />
466 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/> 470 <repeat name="param_set">
467 <repeat name="param_set"> 471 <param name="sp_list" value="open('~/.ssh/authorized_keys', 'r').read()" />
468 <param name="sp_list" value="open('~/.ssh/authorized_keys', 'r').read()"/> 472 <param name="sp_name" value="svr__C" />
469 <param name="sp_name" value="svr__C"/> 473 </repeat>
470 </repeat> 474 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
471 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 475 <param name="header1" value="true" />
472 <param name="header1" value="true" /> 476 <param name="selected_column_selector_option" value="all_columns" />
473 <param name="selected_column_selector_option" value="all_columns"/> 477 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
474 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 478 <param name="header2" value="true" />
475 <param name="header2" value="true" /> 479 <param name="selected_column_selector_option2" value="all_columns" />
476 <param name="selected_column_selector_option2" value="all_columns"/> 480 </test>
477 </test> 481 <test>
478 <test> 482 <param name="selected_search_algo" value="GridSearchCV" />
479 <param name="selected_search_scheme" value="GridSearchCV"/> 483 <param name="infile_estimator" value="pipeline10" ftype="h5mlm" />
480 <param name="infile_estimator" value="pipeline10" ftype="zip"/> 484 <repeat name="param_set">
481 <param name="infile_params" value="get_params10.tabular" ftype="tabular"/> 485 <param name="sp_list" value=": [sklearn_tree.DecisionTreeRegressor(random_state=0), sklearn_tree.ExtraTreeRegressor(random_state=0)]" />
482 <repeat name="param_set"> 486 <param name="sp_name" value="adaboostregressor__base_estimator" />
483 <param name="sp_list" value=": [sklearn_tree.DecisionTreeRegressor(random_state=0), sklearn_tree.ExtraTreeRegressor(random_state=0)]"/> 487 </repeat>
484 <param name="sp_name" value="adaboostregressor__base_estimator"/> 488 <repeat name="param_set">
485 </repeat> 489 <param name="sp_list" value="[10]" />
486 <repeat name="param_set"> 490 <param name="sp_name" value="adaboostregressor__random_state" />
487 <param name="sp_list" value="[10]"/> 491 </repeat>
488 <param name="sp_name" value="adaboostregressor__random_state"/> 492 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
489 </repeat> 493 <param name="header1" value="true" />
490 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 494 <param name="selected_column_selector_option" value="all_columns" />
491 <param name="header1" value="true" /> 495 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
492 <param name="selected_column_selector_option" value="all_columns"/> 496 <param name="header2" value="true" />
493 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 497 <param name="selected_column_selector_option2" value="all_columns" />
494 <param name="header2" value="true" /> 498 <output name="outfile_result">
495 <param name="selected_column_selector_option2" value="all_columns"/> 499 <assert_contents>
496 <output name="outfile_result"> 500 <has_n_columns n="15" />
497 <assert_contents> 501 <has_text text="0.7981150937087843" />
498 <has_n_columns n="13"/> 502 </assert_contents>
499 <has_text text="0.8165699136618538"/> 503 </output>
500 </assert_contents> 504 </test>
501 </output> 505 <test>
502 </test> 506 <param name="selected_search_algo" value="GridSearchCV" />
503 <test> 507 <param name="infile_estimator" value="pipeline09" ftype="h5mlm" />
504 <param name="selected_search_scheme" value="GridSearchCV"/>
505 <param name="infile_estimator" value="pipeline09" ftype="zip"/>
506 <param name="infile_params" value="get_params09.tabular" ftype="tabular"/>
507 <repeat name="param_set"> 508 <repeat name="param_set">
508 <param name="sp_list" value=": [sklearn_feature_selection.SelectKBest(), 509 <param name="sp_list" value=": [sklearn_feature_selection.SelectKBest(),
509 sklearn_feature_selection.VarianceThreshold(), skrebate_ReliefF(), sklearn_preprocessing.RobustScaler()]"/> 510 sklearn_feature_selection.VarianceThreshold(), skrebate_ReliefF(), sklearn_preprocessing.RobustScaler()]" />
510 <param name="sp_name" value="relieff"/> 511 <param name="sp_name" value="relieff" />
511 </repeat> 512 </repeat>
512 <repeat name="param_set"> 513 <repeat name="param_set">
513 <param name="sp_list" value="[10]"/> 514 <param name="sp_list" value="[10]" />
514 <param name="sp_name" value="randomforestregressor__random_state"/> 515 <param name="sp_name" value="randomforestregressor__random_state" />
515 </repeat> 516 </repeat>
516 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 517 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
517 <param name="header1" value="true" /> 518 <param name="header1" value="true" />
518 <param name="selected_column_selector_option" value="all_columns"/> 519 <param name="selected_column_selector_option" value="all_columns" />
519 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 520 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
520 <param name="header2" value="true" /> 521 <param name="header2" value="true" />
521 <param name="selected_column_selector_option2" value="all_columns"/> 522 <param name="selected_column_selector_option2" value="all_columns" />
522 <output name="outfile_result"> 523 <output name="outfile_result">
523 <assert_contents> 524 <assert_contents>
524 <has_n_columns n="13"/> 525 <has_n_columns n="15" />
525 <has_text text="0.8151250518677202"/> 526 <has_text text="0.8136054873316014" />
526 </assert_contents> 527 </assert_contents>
527 </output> 528 </output>
528 </test> 529 </test>
529 <test> 530 <test>
530 <param name="selected_search_scheme" value="GridSearchCV"/> 531 <param name="selected_search_algo" value="GridSearchCV" />
531 <param name="infile_estimator" value="pipeline09" ftype="zip"/> 532 <param name="infile_estimator" value="pipeline09" ftype="h5mlm" />
532 <param name="infile_params" value="get_params09.tabular" ftype="tabular"/> 533 <repeat name="param_set">
533 <repeat name="param_set"> 534 <param name="sp_list" value=": [None,'sk_prep_all', 7, 13, skrebate_ReliefF(n_features_to_select=12)]" />
534 <param name="sp_list" value=": [None,'sk_prep_all', 7, 13, skrebate_ReliefF(n_features_to_select=12)]"/> 535 <param name="sp_name" value="relieff" />
535 <param name="sp_name" value="relieff"/> 536 </repeat>
536 </repeat> 537 <repeat name="param_set">
537 <repeat name="param_set"> 538 <param name="sp_list" value="[10]" />
538 <param name="sp_list" value="[10]"/> 539 <param name="sp_name" value="randomforestregressor__random_state" />
539 <param name="sp_name" value="randomforestregressor__random_state"/> 540 </repeat>
540 </repeat> 541 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
541 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 542 <param name="header1" value="true" />
542 <param name="header1" value="true" /> 543 <param name="selected_column_selector_option" value="all_columns" />
543 <param name="selected_column_selector_option" value="all_columns"/> 544 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
544 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 545 <param name="header2" value="true" />
545 <param name="header2" value="true" /> 546 <param name="selected_column_selector_option2" value="all_columns" />
546 <param name="selected_column_selector_option2" value="all_columns"/> 547 <output name="outfile_result">
547 <output name="outfile_result"> 548 <assert_contents>
548 <assert_contents> 549 <has_n_columns n="15" />
549 <has_n_columns n="13"/> 550 <has_text text="0.8137203241980131" />
550 <has_text text="0.8151250518677202"/> 551 </assert_contents>
551 </assert_contents> 552 </output>
552 </output> 553 </test>
553 </test> 554 <test>
554 <test> 555 <param name="selected_search_algo" value="GridSearchCV" />
555 <param name="selected_search_scheme" value="GridSearchCV"/> 556 <param name="infile_estimator" value="pipeline11" ftype="h5mlm" />
556 <param name="infile_estimator" value="pipeline11" ftype="zip"/> 557 <repeat name="param_set">
557 <param name="infile_params" value="get_params11.tabular" ftype="tabular"/> 558 <param name="sp_list" value="[3,4,5]" />
558 <repeat name="param_set"> 559 <param name="sp_name" value="editednearestneighbours__n_neighbors" />
559 <param name="sp_list" value="[3,4,5]"/> 560 </repeat>
560 <param name="sp_name" value="editednearestneighbours__n_neighbors"/> 561 <repeat name="param_set">
561 </repeat> 562 <param name="sp_list" value="[10, 50, 100, 500]" />
562 <repeat name="param_set"> 563 <param name="sp_name" value="randomforestclassifier__n_estimators" />
563 <param name="sp_list" value="[10]"/> 564 </repeat>
564 <param name="sp_name" value="editednearestneighbours__random_state"/> 565 <repeat name="param_set">
565 </repeat> 566 <param name="sp_list" value="[10]" />
566 <repeat name="param_set"> 567 <param name="sp_name" value="randomforestclassifier__random_state" />
567 <param name="sp_list" value="[10, 50, 100, 500]"/> 568 </repeat>
568 <param name="sp_name" value="randomforestclassifier__n_estimators"/> 569 <param name="primary_scoring" value="f1_macro" />
569 </repeat> 570 <param name="secondary_scoring" value="balanced_accuracy,accuracy" />
570 <repeat name="param_set"> 571 <param name="n_splits" value="5" />
571 <param name="sp_list" value="[10]"/> 572 <param name="infile1" value="imblearn_X.tabular" ftype="tabular" />
572 <param name="sp_name" value="randomforestclassifier__random_state"/> 573 <param name="header1" value="true" />
573 </repeat> 574 <param name="selected_column_selector_option" value="all_columns" />
574 <param name="primary_scoring" value="f1_macro"/> 575 <param name="infile2" value="imblearn_y.tabular" ftype="tabular" />
575 <param name="secondary_scoring" value="balanced_accuracy,accuracy"/> 576 <param name="header2" value="true" />
576 <param name="n_splits" value="5"/> 577 <param name="selected_column_selector_option2" value="all_columns" />
577 <param name="infile1" value="imblearn_X.tabular" ftype="tabular"/> 578 <output name="outfile_result">
578 <param name="header1" value="true" /> 579 <assert_contents>
579 <param name="selected_column_selector_option" value="all_columns"/> 580 <has_n_columns n="32" />
580 <param name="infile2" value="imblearn_y.tabular" ftype="tabular"/> 581 <has_text text="0.9945648481554453" />
581 <param name="header2" value="true" /> 582 <has_text text="0.9988888888888889" />
582 <param name="selected_column_selector_option2" value="all_columns"/> 583 <has_text text="0.998" />
583 <output name="outfile_result"> 584 </assert_contents>
584 <assert_contents> 585 </output>
585 <has_n_columns n="33"/> 586 </test>
586 <has_text text="0.9945648481554453"/> 587 <test>
587 <has_text text="0.9988888888888889"/> 588 <param name="selected_search_algo" value="GridSearchCV" />
588 <has_text text="0.998"/> 589 <param name="infile_estimator" value="pipeline12" ftype="h5mlm" />
589 </assert_contents> 590 <repeat name="param_set">
590 </output> 591 <param name="sp_list" value="[10, 100, 200]" />
591 </test> 592 <param name="sp_name" value="estimator__n_estimators" />
592 <test> 593 </repeat>
593 <param name="selected_search_scheme" value="GridSearchCV"/> 594 <repeat name="param_set">
594 <param name="infile_estimator" value="pipeline12" ftype="zip"/> 595 <param name="sp_list" value="[10, None]" />
595 <param name="infile_params" value="get_params12.tabular" ftype="tabular"/> 596 <param name="sp_name" value="n_features_to_select" />
596 <repeat name="param_set"> 597 </repeat>
597 <param name="sp_list" value="[10, 100, 200]"/> 598 <param name="primary_scoring" value="r2" />
598 <param name="sp_name" value="estimator__n_estimators"/> 599 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
599 </repeat> 600 <param name="header1" value="true" />
600 <repeat name="param_set"> 601 <param name="selected_column_selector_option" value="all_columns" />
601 <param name="sp_list" value="[10, None]"/> 602 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
602 <param name="sp_name" value="n_features_to_select"/> 603 <param name="header2" value="true" />
603 </repeat> 604 <param name="selected_column_selector_option2" value="all_columns" />
604 <param name="primary_scoring" value="r2"/> 605 <output name="outfile_result">
605 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 606 <assert_contents>
606 <param name="header1" value="true" /> 607 <has_n_columns n="15" />
607 <param name="selected_column_selector_option" value="all_columns"/> 608 <has_text text="0.78510" />
608 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 609 </assert_contents>
609 <param name="header2" value="true" /> 610 </output>
610 <param name="selected_column_selector_option2" value="all_columns"/> 611 </test>
611 <output name="outfile_result"> 612 <test>
612 <assert_contents> 613 <conditional name="search_algos">
613 <has_n_columns n="13"/> 614 <param name="selected_search_algo" value="GridSearchCV" />
614 <has_text text="0.8149439619875293"/>
615 </assert_contents>
616 </output>
617 </test>
618 <test>
619 <conditional name="search_schemes">
620 <param name="selected_search_scheme" value="GridSearchCV"/>
621 <param name="infile_estimator" value="pipeline05" ftype="zip"/>
622 <section name="search_params_builder">
623 <param name="infile_params" value="get_params05.tabular" ftype="tabular"/>
624 <repeat name="param_set">
625 <param name="sp_list" value="[10, 50, 100, 300]"/>
626 <param name="sp_name" value="n_estimators"/>
627 </repeat>
628 </section>
629 </conditional> 615 </conditional>
630 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 616 <param name="infile_estimator" value="pipeline05" ftype="h5mlm" />
631 <param name="header1" value="true" /> 617 <section name="search_params_builder">
632 <param name="selected_column_selector_option" value="all_columns"/> 618 <repeat name="param_set">
633 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 619 <param name="sp_list" value="[10, 50, 100, 300]" />
634 <param name="header2" value="true" /> 620 <param name="sp_name" value="n_estimators" />
635 <param name="selected_column_selector_option2" value="all_columns"/> 621 </repeat>
622 </section>
623 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
624 <param name="header1" value="true" />
625 <param name="selected_column_selector_option" value="all_columns" />
626 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
627 <param name="header2" value="true" />
628 <param name="selected_column_selector_option2" value="all_columns" />
636 <conditional name="outer_split"> 629 <conditional name="outer_split">
637 <param name="split_mode" value="nested_cv"/> 630 <param name="split_mode" value="nested_cv" />
638 <conditional name="cv_selector"> 631 <conditional name="cv_selector">
639 <param name='selected_cv' value="KFold"/> 632 <param name='selected_cv' value="KFold" />
640 <param name="n_splits" value="3"/> 633 <param name="n_splits" value="3" />
641 <param name="shuffle" value="true" /> 634 <param name="shuffle" value="true" />
642 <param name="random_state" value="123"/> 635 <param name="random_state" value="123" />
643 </conditional> 636 </conditional>
644 </conditional> 637 </conditional>
645 <output name="outfile_result"> 638 <output name="outfile_result">
646 <assert_contents> 639 <assert_contents>
647 <has_n_columns n="4"/> 640 <has_n_columns n="4" />
648 <has_text text="0.8044418936007722" /> 641 <has_text text="0.8355084087564864" />
649 </assert_contents> 642 </assert_contents>
650 </output> 643 </output>
651 </test> 644 </test>
652 </tests> 645 </tests>
653 <help> 646 <help>
677 670
678 - np_arange(0.01, 1, 0.1) 671 - np_arange(0.01, 1, 0.1)
679 672
680 - np_random_choice(list(range(1, 51)) + [None], size=20) 673 - np_random_choice(list(range(1, 51)) + [None], size=20)
681 674
682 - scipy_stats_randin(1, 11) 675 - scipy_stats_randint(1, 11)
683 676
684 **Estimator / Preprocessor search (additional `:` in the front)**:: 677 **Estimator / Preprocessor search (additional `:` in the front)**::
685 678
686 : [sklearn_tree.DecisionTreeRegressor(), sklearn_tree.ExtraTreeRegressor()] 679 : [sklearn_tree.DecisionTreeRegressor(), sklearn_tree.ExtraTreeRegressor()]
687 680
785 .. _`https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation`: https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation 778 .. _`https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation`: https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation
786 779
787 ]]> 780 ]]>
788 </help> 781 </help>
789 <expand macro="sklearn_citation"> 782 <expand macro="sklearn_citation">
790 <expand macro="skrebate_citation"/> 783 <expand macro="skrebate_citation" />
791 <expand macro="xgboost_citation"/> 784 <expand macro="xgboost_citation" />
792 <expand macro="imblearn_citation"/> 785 <expand macro="imblearn_citation" />
793 </expand> 786 </expand>
794 </tool> 787 </tool>