comparison search_model_validation.xml @ 8:1c4a241bef5c draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit c0a3a186966888e5787335a7628bf0a4382637e7
author bgruening
date Tue, 14 May 2019 18:05:43 -0400
parents 4368259ff821
children 82b6104d4682
comparison
equal deleted inserted replaced
7:4368259ff821 8:1c4a241bef5c
1 <tool id="sklearn_searchcv" name="Hyperparameter Search" version="@VERSION@"> 1 <tool id="sklearn_searchcv" name="Hyperparameter Search" version="@VERSION@">
2 <description>using exhausitive or randomized search</description> 2 <description>using exhausitive or randomized search</description>
3 <macros> 3 <macros>
4 <import>main_macros.xml</import> 4 <import>main_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="python_requirements"> 6 <expand macro="python_requirements"/>
7 <requirement type="package" version="0.6">skrebate</requirement>
8 <requirement type="package" version="0.4.2">imbalanced-learn</requirement>
9 </expand>
10 <expand macro="macro_stdio"/> 7 <expand macro="macro_stdio"/>
11 <version_command>echo "@VERSION@"</version_command> 8 <version_command>echo "@VERSION@"</version_command>
12 <command> 9 <command>
13 <![CDATA[ 10 <![CDATA[
14 python '$__tool_directory__/search_model_validation.py' 11 python '$__tool_directory__/search_model_validation.py'
15 '$inputs' 12 --inputs '$inputs'
16 '$search_schemes.infile_pipeline' 13 --estimator '$search_schemes.infile_estimator'
17 '$input_options.infile1' 14 --infile1 '$input_options.infile1'
18 '$input_options.infile2' 15 --infile2 '$input_options.infile2'
19 '$outfile_result' 16 --outfile_result '$outfile_result'
20 #if $save: 17 #if $save
21 '$outfile_estimator' 18 --outfile_object '$outfile_object'
22 #end if 19 #end if
20 #if $search_schemes.options.cv_selector.selected_cv in ['GroupKFold', 'GroupShuffleSplit', 'LeaveOneGroupOut', 'LeavePGroupsOut']
21 --groups '$inputs,$search_schemes.options.cv_selector.groups_selector.infile_g'
22 #end if
23
23 ]]> 24 ]]>
24 </command> 25 </command>
25 <configfiles> 26 <configfiles>
26 <inputs name="inputs" /> 27 <inputs name="inputs" />
27 </configfiles> 28 </configfiles>
28 <inputs> 29 <inputs>
29 <conditional name="search_schemes"> 30 <conditional name="search_schemes">
30 <param name="selected_search_scheme" type="select" label="Select a model selection search scheme:"> 31 <param name="selected_search_scheme" type="select" label="Select a model selection search scheme">
31 <option value="GridSearchCV" selected="true">GridSearchCV - Exhaustive search over specified parameter values for an estimator </option> 32 <option value="GridSearchCV" selected="true">GridSearchCV - Exhaustive search over specified parameter values for an estimator </option>
32 <option value="RandomizedSearchCV">RandomizedSearchCV - Randomized search on hyper parameters for an estimator</option> 33 <option value="RandomizedSearchCV">RandomizedSearchCV - Randomized search on hyper parameters for an estimator</option>
33 </param> 34 </param>
34 <when value="GridSearchCV"> 35 <when value="GridSearchCV">
35 <expand macro="search_cv_estimator"/> 36 <expand macro="search_cv_estimator"/>
44 <param argument="n_iter" type="integer" value="10" label="Number of parameter settings that are sampled"/> 45 <param argument="n_iter" type="integer" value="10" label="Number of parameter settings that are sampled"/>
45 <expand macro="random_state"/> 46 <expand macro="random_state"/>
46 </section> 47 </section>
47 </when> 48 </when>
48 </conditional> 49 </conditional>
49 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Save the best estimator/pipeline?"/> 50 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Save the searchCV object"/>
50 <expand macro="sl_mixed_input"/> 51 <expand macro="sl_mixed_input"/>
52 <conditional name="train_test_split">
53 <param name="do_split" type="select" label="Whether to hold a portion of samples for test exclusively?" help="train_test_split">
54 <option value="no">Nope</option>
55 <option value="yes">Yes - I do</option>
56 </param>
57 <when value='no'/>
58 <when value='yes'>
59 <param argument="test_size" type="float" optional="True" value="0.25" label="Test size:"/>
60 <param argument="train_size" type="float" optional="True" value="" label="Train size:"/>
61 <param argument="random_state" type="integer" optional="True" value="" label="Random seed number:"/>
62 <param argument="shuffle" type="select">
63 <option value="None">None - No shuffle</option>
64 <option value="simple">Shuffle -- for regression problems</option>
65 <option value="stratified">StratifiedShuffle -- will use the target values as class labels</option>
66 <option value="group">GroupShuffle -- make sure group CV option is choosen</option>
67 </param>
68 </when>
69 </conditional>
51 </inputs> 70 </inputs>
52 <outputs> 71 <outputs>
53 <data format="tabular" name="outfile_result"/> 72 <data format="tabular" name="outfile_result"/>
54 <data format="zip" name="outfile_estimator" label="${tool.name}: best estimator on ${on_string}"> 73 <data format="zip" name="outfile_object" label="${search_schemes.selected_search_scheme} on ${on_string}">
55 <filter>save</filter> 74 <filter>save</filter>
56 </data> 75 </data>
57 </outputs> 76 </outputs>
58 <tests> 77 <tests>
59 <test> 78 <test>
60 <param name="selected_search_scheme" value="GridSearchCV"/> 79 <param name="selected_search_scheme" value="GridSearchCV"/>
61 <param name="infile_pipeline" value="pipeline01" ftype="zip"/> 80 <param name="infile_estimator" value="pipeline01" ftype="zip"/>
62 <conditional name="search_param_selector"> 81 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/>
63 <param name="search_p" value="C: [1, 10, 100, 1000]"/> 82 <repeat name="param_set">
64 <param name="selected_param_type" value="final_estimator_p"/> 83 <param name="sp_list" value="[1, 10, 100, 1000]"/>
65 </conditional> 84 <param name="sp_name" value="svr__C"/>
66 <conditional name="search_param_selector"> 85 </repeat>
67 <param name="search_p" value="k: [-1, 3, 5, 7, 9]"/> 86 <repeat name="param_set">
68 <param name="selected_param_type" value="prep_2_p"/> 87 <param name="sp_list" value="[-1, 3, 5, 7, 9]"/>
69 </conditional> 88 <param name="sp_name" value="selectkbest__k"/>
89 </repeat>
70 <param name="error_score" value="false"/> 90 <param name="error_score" value="false"/>
71 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 91 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
72 <param name="header1" value="true" /> 92 <param name="header1" value="true" />
73 <param name="selected_column_selector_option" value="all_columns"/> 93 <param name="selected_column_selector_option" value="all_columns"/>
74 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 94 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
76 <param name="selected_column_selector_option2" value="all_columns"/> 96 <param name="selected_column_selector_option2" value="all_columns"/>
77 <output name="outfile_result"> 97 <output name="outfile_result">
78 <assert_contents> 98 <assert_contents>
79 <has_n_columns n="13"/> 99 <has_n_columns n="13"/>
80 <has_text text="0.7938837807353147"/> 100 <has_text text="0.7938837807353147"/>
81 <has_text text="{'estimator__C': 1, 'preprocessing_2__k': 9}"/> 101 <has_text text="{'selectkbest__k': 9, 'svr__C': 1}"/>
82 </assert_contents> 102 </assert_contents>
83 </output> 103 </output>
84 </test> 104 </test>
85 <test expect_failure="true"> 105 <test expect_failure="true">
86 <param name="selected_search_scheme" value="GridSearchCV"/> 106 <param name="selected_search_scheme" value="GridSearchCV"/>
87 <param name="infile_pipeline" value="pipeline01" ftype="zip"/> 107 <param name="infile_estimator" value="pipeline01" ftype="zip"/>
88 <conditional name="search_param_selector"> 108 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/>
89 <param name="search_p" value="C: [1, 10, 100, 1000]"/> 109 <repeat name="param_set">
90 <param name="selected_param_type" value="final_estimator_p"/> 110 <param name="sp_list" value="[1, 10, 100, 1000]"/>
91 </conditional> 111 <param name="sp_name" value="svr__C"/>
92 <conditional name="search_param_selector"> 112 </repeat>
93 <param name="search_p" value="k: [-1, 3, 5, 7, 9]"/> 113 <repeat name="param_set">
94 <param name="selected_param_type" value="prep_2_p"/> 114 <param name="sp_list" value="[-1, 3, 5, 7, 9]"/>
95 </conditional> 115 <param name="sp_name" value="selectkbest__k"/>
116 </repeat>
96 <param name="error_score" value="true"/> 117 <param name="error_score" value="true"/>
97 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 118 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
98 <param name="header1" value="true" /> 119 <param name="header1" value="true" />
99 <param name="selected_column_selector_option" value="all_columns"/> 120 <param name="selected_column_selector_option" value="all_columns"/>
100 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 121 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
101 <param name="header2" value="true" /> 122 <param name="header2" value="true" />
102 <param name="selected_column_selector_option2" value="all_columns"/> 123 <param name="selected_column_selector_option2" value="all_columns"/>
103 </test> 124 </test>
104 <test> 125 <test>
105 <param name="selected_search_scheme" value="RandomizedSearchCV"/> 126 <param name="selected_search_scheme" value="RandomizedSearchCV"/>
106 <param name="infile_pipeline" value="pipeline01" ftype="zip"/> 127 <param name="infile_estimator" value="pipeline01" ftype="zip"/>
107 <conditional name="search_param_selector"> 128 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/>
108 <param name="search_p" value="C: [1, 10, 100, 1000]"/> 129 <repeat name="param_set">
109 <param name="selected_param_type" value="final_estimator_p"/> 130 <param name="sp_list" value="[1, 10, 100, 1000]"/>
110 </conditional> 131 <param name="sp_name" value="svr__C"/>
111 <conditional name="search_param_selector"> 132 </repeat>
112 <param name="search_p" value="kernel: ['linear', 'poly', 'rbf', 'sigmoid']"/> 133 <repeat name="param_set">
113 <param name="selected_param_type" value="final_estimator_p"/> 134 <param name="sp_list" value="['linear', 'poly', 'rbf', 'sigmoid']"/>
114 </conditional> 135 <param name="sp_name" value="svr__kernel"/>
115 <conditional name="search_param_selector"> 136 </repeat>
116 <param name="search_p" value="k: [3, 5, 7, 9]"/> 137 <repeat name="param_set">
117 <param name="selected_param_type" value="prep_2_p"/> 138 <param name="sp_list" value="[3, 5, 7, 9]"/>
118 </conditional> 139 <param name="sp_name" value="selectkbest__k"/>
119 <conditional name="search_param_selector"> 140 </repeat>
120 <param name="search_p" value="with_centering: [True, False]"/> 141 <repeat name="param_set">
121 <param name="selected_param_type" value="prep_1_p"/> 142 <param name="sp_list" value="[True, False]"/>
122 </conditional> 143 <param name="sp_name" value="robustscaler__with_centering"/>
144 </repeat>
123 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 145 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
124 <param name="header1" value="true" /> 146 <param name="header1" value="true" />
125 <param name="selected_column_selector_option" value="all_columns"/> 147 <param name="selected_column_selector_option" value="all_columns"/>
126 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 148 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
127 <param name="header2" value="true" /> 149 <param name="header2" value="true" />
128 <param name="selected_column_selector_option2" value="all_columns"/> 150 <param name="selected_column_selector_option2" value="all_columns"/>
129 <output name="outfile_result" > 151 <output name="outfile_result" >
130 <assert_contents> 152 <assert_contents>
131 <has_n_columns n="15" /> 153 <has_n_columns n="15" />
132 <has_text text="param_preprocessing_1__with_centering"/> 154 <has_text text="param_robustscaler__with_centering"/>
133 </assert_contents> 155 </assert_contents>
134 </output> 156 </output>
135 </test> 157 </test>
136 <test> 158 <test>
137 <param name="selected_search_scheme" value="RandomizedSearchCV"/> 159 <param name="selected_search_scheme" value="RandomizedSearchCV"/>
138 <param name="infile_pipeline" value="pipeline03" ftype="zip"/> 160 <param name="infile_estimator" value="pipeline03" ftype="zip"/>
139 <conditional name="search_param_selector"> 161 <param name="infile_params" value="get_params03.tabular" ftype="tabular"/>
140 <param name="search_p" value="n_estimators: np_arange(50, 1001, 50)"/> 162 <repeat name="param_set">
141 <param name="selected_param_type" value="final_estimator_p"/> 163 <param name="sp_list" value="np_arange(50, 1001, 50)"/>
142 </conditional> 164 <param name="sp_name" value="xgbclassifier__n_estimators"/>
143 <conditional name="search_param_selector"> 165 </repeat>
144 <param name="search_p" value="max_depth: scipy_stats_randint(1, 51)"/> 166 <repeat name="param_set">
145 <param name="selected_param_type" value="final_estimator_p"/> 167 <param name="sp_list" value="scipy_stats_randint(1, 51)"/>
146 </conditional> 168 <param name="sp_name" value="xgbclassifier__max_depth"/>
147 <conditional name="search_param_selector"> 169 </repeat>
148 <param name="search_p" value="gamma: scipy_stats_uniform(0., 1.)"/> 170 <repeat name="param_set">
149 <param name="selected_param_type" value="final_estimator_p"/> 171 <param name="sp_list" value="scipy_stats_uniform(0., 1.)"/>
150 </conditional> 172 <param name="sp_name" value="xgbclassifier__gamma"/>
151 <conditional name="search_param_selector"> 173 </repeat>
152 <param name="search_p" value="random_state: [324089]"/> 174 <repeat name="param_set">
153 <param name="selected_param_type" value="final_estimator_p"/> 175 <param name="sp_list" value="[324089]"/>
154 </conditional> 176 <param name="sp_name" value="xgbclassifier__random_state"/>
177 </repeat>
155 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 178 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
156 <param name="header1" value="true" /> 179 <param name="header1" value="true" />
157 <param name="selected_column_selector_option" value="all_columns"/> 180 <param name="selected_column_selector_option" value="all_columns"/>
158 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 181 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
159 <param name="header2" value="true" /> 182 <param name="header2" value="true" />
160 <param name="selected_column_selector_option2" value="all_columns"/> 183 <param name="selected_column_selector_option2" value="all_columns"/>
161 <output name="outfile_result" > 184 <output name="outfile_result" >
162 <assert_contents> 185 <assert_contents>
163 <has_n_columns n="15" /> 186 <has_n_columns n="15" />
164 <has_text text="param_estimator__max_depth"/> 187 <has_text text="param_xgbclassifier__max_depth"/>
165 </assert_contents> 188 </assert_contents>
166 </output> 189 </output>
167 </test> 190 </test>
168 <test> 191 <test>
169 <param name="selected_search_scheme" value="GridSearchCV"/> 192 <param name="selected_search_scheme" value="GridSearchCV"/>
170 <param name="infile_pipeline" value="pipeline04" ftype="zip"/> 193 <param name="infile_estimator" value="pipeline04" ftype="zip"/>
171 <conditional name="search_param_selector"> 194 <param name="infile_params" value="get_params04.tabular" ftype="tabular"/>
172 <param name="search_p" value="random_state: list(range(100, 1001, 100))"/> 195 <repeat name="param_set">
173 <param name="selected_param_type" value="final_estimator_p"/> 196 <param name="sp_list" value="list(range(100, 1001, 100))"/>
174 </conditional> 197 <param name="sp_name" value="linearsvc__random_state"/>
175 <conditional name="search_param_selector"> 198 </repeat>
176 <param name="search_p" value="estimator-: [sklearn_ensemble.ExtraTreesClassifier(n_estimators=100, random_state=324089)]"/> 199 <repeat name="param_set">
177 <param name="selected_param_type" value="prep_1_p"/> 200 <param name="sp_list" value=": [sklearn_ensemble.ExtraTreesClassifier(n_estimators=100, random_state=324089)]"/>
178 </conditional> 201 <param name="sp_name" value="selectfrommodel__estimator"/>
202 </repeat>
179 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 203 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
180 <param name="header1" value="true" /> 204 <param name="header1" value="true" />
181 <param name="selected_column_selector_option" value="all_columns"/> 205 <param name="selected_column_selector_option" value="all_columns"/>
182 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 206 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
183 <param name="header2" value="true" /> 207 <param name="header2" value="true" />
189 </assert_contents> 213 </assert_contents>
190 </output> 214 </output>
191 </test> 215 </test>
192 <test> 216 <test>
193 <param name="selected_search_scheme" value="GridSearchCV"/> 217 <param name="selected_search_scheme" value="GridSearchCV"/>
194 <param name="infile_pipeline" value="pipeline01" ftype="zip"/> 218 <param name="infile_estimator" value="pipeline01" ftype="zip"/>
195 <conditional name="search_param_selector"> 219 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/>
196 <param name="search_p" value="C: [1, 10, 100, 1000]"/> 220 <repeat name="param_set">
197 <param name="selected_param_type" value="final_estimator_p"/> 221 <param name="sp_list" value="[1, 10, 100, 1000]"/>
198 </conditional> 222 <param name="sp_name" value="svr__C"/>
199 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 223 </repeat>
200 <param name="header1" value="true" /> 224 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
201 <param name="selected_column_selector_option" value="all_columns"/> 225 <param name="header1" value="true" />
202 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 226 <param name="selected_column_selector_option" value="all_columns"/>
203 <param name="header2" value="true" /> 227 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
204 <param name="selected_column_selector_option2" value="all_columns"/> 228 <param name="header2" value="true" />
205 <output name="outfile_estimator" file="searchCV01" compare="sim_size" delta="1"/> 229 <param name="selected_column_selector_option2" value="all_columns"/>
206 </test> 230 <output name="outfile_object" file="searchCV01" compare="sim_size" delta="10"/>
207 <test> 231 </test>
208 <param name="selected_search_scheme" value="GridSearchCV"/> 232 <test>
209 <param name="infile_pipeline" value="pipeline06" ftype="zip"/> 233 <param name="selected_search_scheme" value="GridSearchCV"/>
210 <conditional name="search_param_selector"> 234 <param name="infile_estimator" value="pipeline06" ftype="zip"/>
211 <param name="search_p" value="n_estimators: [10, 50, 200, 1000]"/> 235 <param name="infile_params" value="get_params06.tabular" ftype="tabular"/>
212 <param name="selected_param_type" value="final_estimator_p"/> 236 <repeat name="param_set">
213 </conditional> 237 <param name="sp_list" value="[10, 50, 200, 1000]"/>
214 <conditional name="search_param_selector"> 238 <param name="sp_name" value="adaboostregressor__n_estimators"/>
215 <param name="search_p" value="random_state: [324089]"/> 239 </repeat>
216 <param name="selected_param_type" value="final_estimator_p"/> 240 <repeat name="param_set">
217 </conditional> 241 <param name="sp_list" value="[324089]"/>
242 <param name="sp_name" value="adaboostregressor__random_state"/>
243 </repeat>
218 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 244 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
219 <param name="header1" value="true" /> 245 <param name="header1" value="true" />
220 <param name="selected_column_selector_option" value="all_columns"/> 246 <param name="selected_column_selector_option" value="all_columns"/>
221 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 247 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
222 <param name="header2" value="true" /> 248 <param name="header2" value="true" />
228 </assert_contents> 254 </assert_contents>
229 </output> 255 </output>
230 </test> 256 </test>
231 <test> 257 <test>
232 <param name="selected_search_scheme" value="GridSearchCV"/> 258 <param name="selected_search_scheme" value="GridSearchCV"/>
233 <param name="infile_pipeline" value="pipeline07" ftype="zip"/> 259 <param name="infile_estimator" value="pipeline07" ftype="zip"/>
234 <conditional name="search_param_selector"> 260 <param name="infile_params" value="get_params07.tabular" ftype="tabular"/>
235 <param name="search_p" value="n_estimators: [10, 50, 100, 200]"/> 261 <repeat name="param_set">
236 <param name="selected_param_type" value="final_estimator_p"/> 262 <param name="sp_list" value="[10, 50, 100, 200]"/>
237 </conditional> 263 <param name="sp_name" value="adaboostclassifier__n_estimators"/>
238 <conditional name="search_param_selector"> 264 </repeat>
239 <param name="search_p" value="random_state: [324089]"/> 265 <repeat name="param_set">
240 <param name="selected_param_type" value="final_estimator_p"/> 266 <param name="sp_list" value="[324089]"/>
241 </conditional> 267 <param name="sp_name" value="adaboostclassifier__random_state"/>
242 <conditional name="search_param_selector"> 268 </repeat>
243 <param name="search_p" value="gamma: [1.0, 2.0]"/> 269 <repeat name="param_set">
244 <param name="selected_param_type" value="prep_1_p"/> 270 <param name="sp_list" value="[1.0, 2.0]"/>
245 </conditional> 271 <param name="sp_name" value="rbfsampler__gamma"/>
272 </repeat>
246 <param name='selected_cv' value="default"/> 273 <param name='selected_cv' value="default"/>
247 <param name="n_splits" value="3"/> 274 <param name="n_splits" value="3"/>
248 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 275 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
249 <param name="header1" value="true" /> 276 <param name="header1" value="true" />
250 <param name="selected_column_selector_option" value="all_columns"/> 277 <param name="selected_column_selector_option" value="all_columns"/>
258 </assert_contents> 285 </assert_contents>
259 </output> 286 </output>
260 </test> 287 </test>
261 <test> 288 <test>
262 <param name="selected_search_scheme" value="GridSearchCV"/> 289 <param name="selected_search_scheme" value="GridSearchCV"/>
263 <param name="infile_pipeline" value="pipeline08" ftype="zip"/> 290 <param name="infile_estimator" value="pipeline08" ftype="zip"/>
264 <conditional name="search_param_selector"> 291 <param name="infile_params" value="get_params08.tabular" ftype="tabular"/>
265 <param name="search_p" value="n_estimators: [10, 50, 100, 200]"/> 292 <repeat name="param_set">
266 <param name="selected_param_type" value="final_estimator_p"/> 293 <param name="sp_list" value="[10, 50, 100, 200]"/>
267 </conditional> 294 <param name="sp_name" value="adaboostclassifier__n_estimators"/>
268 <conditional name="search_param_selector"> 295 </repeat>
269 <param name="search_p" value="random_state: [324089]"/> 296 <repeat name="param_set">
270 <param name="selected_param_type" value="final_estimator_p"/> 297 <param name="sp_list" value="[324089]"/>
271 </conditional> 298 <param name="sp_name" value="adaboostclassifier__random_state"/>
272 <conditional name="search_param_selector"> 299 </repeat>
273 <param name="search_p" value="linkage: ['ward', 'complete', 'average']"/> 300 <repeat name="param_set">
274 <param name="selected_param_type" value="prep_1_p"/> 301 <param name="sp_list" value="['ward', 'complete', 'average']"/>
275 </conditional> 302 <param name="sp_name" value="featureagglomeration__linkage"/>
303 </repeat>
276 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 304 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
277 <param name="header1" value="true" /> 305 <param name="header1" value="true" />
278 <param name="selected_column_selector_option" value="all_columns"/> 306 <param name="selected_column_selector_option" value="all_columns"/>
279 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 307 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
280 <param name="header2" value="true" /> 308 <param name="header2" value="true" />
285 </assert_contents> 313 </assert_contents>
286 </output> 314 </output>
287 </test> 315 </test>
288 <test> 316 <test>
289 <param name="selected_search_scheme" value="GridSearchCV"/> 317 <param name="selected_search_scheme" value="GridSearchCV"/>
290 <param name="infile_pipeline" value="pipeline01" ftype="zip"/> 318 <param name="infile_estimator" value="pipeline01" ftype="zip"/>
291 <conditional name="search_param_selector"> 319 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/>
292 <param name="search_p" value="C: [1, 10, 100, 1000]"/> 320 <repeat name="param_set">
293 <param name="selected_param_type" value="final_estimator_p"/> 321 <param name="sp_list" value="[1, 10, 100, 1000]"/>
294 </conditional> 322 <param name="sp_name" value="svr__C"/>
323 </repeat>
295 <param name='selected_cv' value="StratifiedKFold"/> 324 <param name='selected_cv' value="StratifiedKFold"/>
296 <param name="n_splits" value="3"/> 325 <param name="n_splits" value="3"/>
297 <param name="shuffle" value="true" /> 326 <param name="shuffle" value="true" />
298 <param name="random_state" value="10"/> 327 <param name="random_state" value="10"/>
299 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 328 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
300 <param name="header1" value="true" /> 329 <param name="header1" value="true" />
301 <param name="selected_column_selector_option" value="all_columns"/> 330 <param name="selected_column_selector_option" value="all_columns"/>
302 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 331 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
303 <param name="header2" value="true" /> 332 <param name="header2" value="true" />
304 <param name="selected_column_selector_option2" value="all_columns"/> 333 <param name="selected_column_selector_option2" value="all_columns"/>
305 <output name="outfile_estimator" file="searchCV02" compare="sim_size" delta="1"/> 334 <output name="outfile_object" file="searchCV02" compare="sim_size" delta="10"/>
306 </test> 335 </test>
307 <test> 336 <test>
308 <param name="selected_search_scheme" value="GridSearchCV"/> 337 <param name="selected_search_scheme" value="GridSearchCV"/>
309 <param name="infile_pipeline" value="pipeline03" ftype="zip"/> 338 <param name="infile_estimator" value="pipeline03" ftype="zip"/>
310 <conditional name="search_param_selector"> 339 <param name="infile_params" value="get_params03.tabular" ftype="tabular"/>
311 <param name="search_p" value="n_estimators: [10, 50, 200, 1000]"/> 340 <repeat name="param_set">
312 <param name="selected_param_type" value="final_estimator_p"/> 341 <param name="sp_list" value="[10, 50, 200, 1000]"/>
313 </conditional> 342 <param name="sp_name" value="xgbclassifier__n_estimators"/>
314 <conditional name="search_param_selector"> 343 </repeat>
315 <param name="search_p" value="random_state: [324089]"/> 344 <repeat name="param_set">
316 <param name="selected_param_type" value="final_estimator_p"/> 345 <param name="sp_list" value="[324089]"/>
317 </conditional> 346 <param name="sp_name" value="xgbclassifier__random_state"/>
347 </repeat>
318 <param name="primary_scoring" value="balanced_accuracy"/> 348 <param name="primary_scoring" value="balanced_accuracy"/>
319 <param name='selected_cv' value="StratifiedKFold"/> 349 <param name='selected_cv' value="StratifiedKFold"/>
320 <param name="n_splits" value="3"/> 350 <param name="n_splits" value="3"/>
321 <param name="shuffle" value="true" /> 351 <param name="shuffle" value="true" />
322 <param name="random_state" value="10"/> 352 <param name="random_state" value="10"/>
333 </assert_contents> 363 </assert_contents>
334 </output> 364 </output>
335 </test> 365 </test>
336 <test> 366 <test>
337 <param name="selected_search_scheme" value="GridSearchCV"/> 367 <param name="selected_search_scheme" value="GridSearchCV"/>
338 <param name="infile_pipeline" value="pipeline09" ftype="zip"/> 368 <param name="infile_estimator" value="pipeline09" ftype="zip"/>
339 <conditional name="search_param_selector"> 369 <param name="infile_params" value="get_params09.tabular" ftype="tabular"/>
340 <param name="search_p" value="n_neighbors: [50, 100, 150, 200]"/> 370 <repeat name="param_set">
341 <param name="selected_param_type" value="prep_1_p"/> 371 <param name="sp_list" value="[50, 100, 150, 200]"/>
342 </conditional> 372 <param name="sp_name" value="relieff__n_neighbors"/>
343 <conditional name="search_param_selector"> 373 </repeat>
344 <param name="search_p" value="random_state: [324089]"/> 374 <repeat name="param_set">
345 <param name="selected_param_type" value="final_estimator_p"/> 375 <param name="sp_list" value="[324089]"/>
346 </conditional> 376 <param name="sp_name" value="randomforestregressor__random_state"/>
377 </repeat>
347 <param name="primary_scoring" value="explained_variance"/> 378 <param name="primary_scoring" value="explained_variance"/>
348 <param name="secondary_scoring" value="neg_mean_squared_error,r2"/> 379 <param name="secondary_scoring" value="neg_mean_squared_error,r2"/>
349 <param name='selected_cv' value="StratifiedKFold"/> 380 <param name='selected_cv' value="StratifiedKFold"/>
350 <param name="n_splits" value="3"/> 381 <param name="n_splits" value="3"/>
351 <param name="shuffle" value="true" /> 382 <param name="shuffle" value="true" />
365 </assert_contents> 396 </assert_contents>
366 </output> 397 </output>
367 </test> 398 </test>
368 <test> 399 <test>
369 <param name="selected_search_scheme" value="GridSearchCV"/> 400 <param name="selected_search_scheme" value="GridSearchCV"/>
370 <param name="infile_pipeline" value="pipeline02" ftype="zip"/> 401 <param name="infile_estimator" value="pipeline02" ftype="zip"/>
371 <conditional name="search_param_selector"> 402 <param name="infile_params" value="get_params02.tabular" ftype="tabular"/>
372 <param name="search_p" value="eps: [0.01, 0.001]"/> 403 <repeat name="param_set">
373 <param name="selected_param_type" value="final_estimator_p"/> 404 <param name="sp_list" value="[0.01, 0.001]"/>
374 </conditional> 405 <param name="sp_name" value="lassocv__eps"/>
406 </repeat>
375 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 407 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
376 <param name="header1" value="true" /> 408 <param name="header1" value="true" />
377 <param name="selected_column_selector_option" value="all_columns"/> 409 <param name="selected_column_selector_option" value="all_columns"/>
378 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 410 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
379 <param name="header2" value="true" /> 411 <param name="header2" value="true" />
380 <param name="selected_column_selector_option2" value="all_columns"/> 412 <param name="selected_column_selector_option2" value="all_columns"/>
381 <output name="outfile_result"> 413 <output name="outfile_result">
382 <assert_contents> 414 <assert_contents>
383 <has_n_columns n="12"/> 415 <has_n_columns n="12"/>
384 <has_text text="0.7762968161366681" /> 416 <has_text text="0.776296816136668" />
385 </assert_contents> 417 </assert_contents>
386 </output> 418 </output>
387 </test> 419 </test>
388 <test> 420 <test>
389 <param name="selected_search_scheme" value="GridSearchCV"/> 421 <param name="selected_search_scheme" value="GridSearchCV"/>
390 <param name="infile_pipeline" value="pipeline05" ftype="zip"/> 422 <param name="infile_estimator" value="pipeline05" ftype="zip"/>
391 <conditional name="search_param_selector"> 423 <param name="infile_params" value="get_params05.tabular" ftype="tabular"/>
392 <param name="search_p" value="n_estimators: [10, 50, 100, 300]"/> 424 <repeat name="param_set">
393 <param name="selected_param_type" value="final_estimator_p"/> 425 <param name="sp_list" value="[10, 50, 100, 300]"/>
394 </conditional> 426 <param name="sp_name" value="randomforestregressor__n_estimators"/>
427 </repeat>
395 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 428 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
396 <param name="header1" value="true" /> 429 <param name="header1" value="true" />
397 <param name="selected_column_selector_option" value="all_columns"/> 430 <param name="selected_column_selector_option" value="all_columns"/>
398 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 431 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
399 <param name="header2" value="true" /> 432 <param name="header2" value="true" />
405 </assert_contents> 438 </assert_contents>
406 </output> 439 </output>
407 </test> 440 </test>
408 <test expect_failure="true"> 441 <test expect_failure="true">
409 <param name="selected_search_scheme" value="GridSearchCV"/> 442 <param name="selected_search_scheme" value="GridSearchCV"/>
410 <param name="infile_pipeline" value="pipeline01" ftype="zip"/> 443 <param name="infile_estimator" value="pipeline01" ftype="zip"/>
411 <conditional name="search_param_selector"> 444 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/>
412 <param name="search_p" value="C: open('~/.ssh/authorized_keys', 'r').read()"/> 445 <repeat name="param_set">
413 <param name="selected_param_type" value="final_estimator_p"/> 446 <param name="sp_list" value="open('~/.ssh/authorized_keys', 'r').read()"/>
414 </conditional> 447 <param name="sp_name" value="svr__C"/>
415 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 448 </repeat>
416 <param name="header1" value="true" /> 449 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
417 <param name="selected_column_selector_option" value="all_columns"/> 450 <param name="header1" value="true" />
418 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 451 <param name="selected_column_selector_option" value="all_columns"/>
419 <param name="header2" value="true" /> 452 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
420 <param name="selected_column_selector_option2" value="all_columns"/> 453 <param name="header2" value="true" />
421 </test> 454 <param name="selected_column_selector_option2" value="all_columns"/>
422 <test> 455 </test>
423 <param name="selected_search_scheme" value="GridSearchCV"/> 456 <test>
424 <param name="infile_pipeline" value="pipeline10" ftype="zip"/> 457 <param name="selected_search_scheme" value="GridSearchCV"/>
425 <conditional name="search_param_selector"> 458 <param name="infile_estimator" value="pipeline10" ftype="zip"/>
426 <param name="search_p" value="base_estimator-: [sklearn_tree.DecisionTreeRegressor(random_state=0), sklearn_tree.ExtraTreeRegressor(random_state=0)]"/> 459 <param name="infile_params" value="get_params10.tabular" ftype="tabular"/>
427 <param name="selected_param_type" value="final_estimator_p"/> 460 <repeat name="param_set">
428 </conditional> 461 <param name="sp_list" value=": [sklearn_tree.DecisionTreeRegressor(random_state=0), sklearn_tree.ExtraTreeRegressor(random_state=0)]"/>
429 <conditional name="search_param_selector"> 462 <param name="sp_name" value="adaboostregressor__base_estimator"/>
430 <param name="search_p" value="random_state: [10]"/> 463 </repeat>
431 <param name="selected_param_type" value="final_estimator_p"/> 464 <repeat name="param_set">
432 </conditional> 465 <param name="sp_list" value="[10]"/>
466 <param name="sp_name" value="adaboostregressor__random_state"/>
467 </repeat>
433 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 468 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
434 <param name="header1" value="true" /> 469 <param name="header1" value="true" />
435 <param name="selected_column_selector_option" value="all_columns"/> 470 <param name="selected_column_selector_option" value="all_columns"/>
436 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 471 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
437 <param name="header2" value="true" /> 472 <param name="header2" value="true" />
443 </assert_contents> 478 </assert_contents>
444 </output> 479 </output>
445 </test> 480 </test>
446 <test> 481 <test>
447 <param name="selected_search_scheme" value="GridSearchCV"/> 482 <param name="selected_search_scheme" value="GridSearchCV"/>
448 <param name="infile_pipeline" value="pipeline09" ftype="zip"/> 483 <param name="infile_estimator" value="pipeline09" ftype="zip"/>
449 <conditional name="search_param_selector"> 484 <param name="infile_params" value="get_params09.tabular" ftype="tabular"/>
450 <param name="search_p" value=": [sklearn_feature_selection.SelectKBest(), 485 <repeat name="param_set">
486 <param name="sp_list" value=": [sklearn_feature_selection.SelectKBest(),
451 sklearn_feature_selection.VarianceThreshold(), skrebate_ReliefF(), sklearn_preprocessing.RobustScaler()]"/> 487 sklearn_feature_selection.VarianceThreshold(), skrebate_ReliefF(), sklearn_preprocessing.RobustScaler()]"/>
452 <param name="selected_param_type" value="prep_1_p"/> 488 <param name="sp_name" value="relieff"/>
453 </conditional> 489 </repeat>
454 <conditional name="search_param_selector"> 490 <repeat name="param_set">
455 <param name="search_p" value="random_state: [10]"/> 491 <param name="sp_list" value="[10]"/>
456 <param name="selected_param_type" value="final_estimator_p"/> 492 <param name="sp_name" value="randomforestregressor__random_state"/>
457 </conditional> 493 </repeat>
458 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 494 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
459 <param name="header1" value="true" /> 495 <param name="header1" value="true" />
460 <param name="selected_column_selector_option" value="all_columns"/> 496 <param name="selected_column_selector_option" value="all_columns"/>
461 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 497 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
462 <param name="header2" value="true" /> 498 <param name="header2" value="true" />
468 </assert_contents> 504 </assert_contents>
469 </output> 505 </output>
470 </test> 506 </test>
471 <test> 507 <test>
472 <param name="selected_search_scheme" value="GridSearchCV"/> 508 <param name="selected_search_scheme" value="GridSearchCV"/>
473 <param name="infile_pipeline" value="pipeline09" ftype="zip"/> 509 <param name="infile_estimator" value="pipeline09" ftype="zip"/>
474 <conditional name="search_param_selector"> 510 <param name="infile_params" value="get_params09.tabular" ftype="tabular"/>
475 <param name="search_p" value=": [None,'sk_prep_all', 8, 14, skrebate_ReliefF(n_features_to_select=12)]"/> 511 <repeat name="param_set">
476 <param name="selected_param_type" value="prep_1_p"/> 512 <param name="sp_list" value=": [None,'sk_prep_all', 8, 14, skrebate_ReliefF(n_features_to_select=12)]"/>
477 </conditional> 513 <param name="sp_name" value="relieff"/>
478 <conditional name="search_param_selector"> 514 </repeat>
479 <param name="search_p" value="random_state: [10]"/> 515 <repeat name="param_set">
480 <param name="selected_param_type" value="final_estimator_p"/> 516 <param name="sp_list" value="[10]"/>
481 </conditional> 517 <param name="sp_name" value="randomforestregressor__random_state"/>
518 </repeat>
482 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 519 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
483 <param name="header1" value="true" /> 520 <param name="header1" value="true" />
484 <param name="selected_column_selector_option" value="all_columns"/> 521 <param name="selected_column_selector_option" value="all_columns"/>
485 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 522 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
486 <param name="header2" value="true" /> 523 <param name="header2" value="true" />
492 </assert_contents> 529 </assert_contents>
493 </output> 530 </output>
494 </test> 531 </test>
495 <test> 532 <test>
496 <param name="selected_search_scheme" value="GridSearchCV"/> 533 <param name="selected_search_scheme" value="GridSearchCV"/>
497 <param name="infile_pipeline" value="pipeline11" ftype="zip"/> 534 <param name="infile_estimator" value="pipeline11" ftype="zip"/>
498 <conditional name="search_param_selector"> 535 <param name="infile_params" value="get_params11.tabular" ftype="tabular"/>
499 <param name="search_p" value="n_neighbors: [3,4,5]"/> 536 <repeat name="param_set">
500 <param name="selected_param_type" value="prep_1_p"/> 537 <param name="sp_list" value="[3,4,5]"/>
501 </conditional> 538 <param name="sp_name" value="editednearestneighbours__n_neighbors"/>
502 <conditional name="search_param_selector"> 539 </repeat>
503 <param name="search_p" value="random_state: [10]"/> 540 <repeat name="param_set">
504 <param name="selected_param_type" value="prep_1_p"/> 541 <param name="sp_list" value="[10]"/>
505 </conditional> 542 <param name="sp_name" value="editednearestneighbours__random_state"/>
506 <conditional name="search_param_selector"> 543 </repeat>
507 <param name="search_p" value="n_estimators:[10, 50, 100, 500]"/> 544 <repeat name="param_set">
508 <param name="selected_param_type" value="final_estimator_p"/> 545 <param name="sp_list" value="[10, 50, 100, 500]"/>
509 </conditional> 546 <param name="sp_name" value="randomforestclassifier__n_estimators"/>
510 <conditional name="search_param_selector"> 547 </repeat>
511 <param name="search_p" value="random_state: [10]"/> 548 <repeat name="param_set">
512 <param name="selected_param_type" value="final_estimator_p"/> 549 <param name="sp_list" value="[10]"/>
513 </conditional> 550 <param name="sp_name" value="randomforestclassifier__random_state"/>
551 </repeat>
514 <param name="primary_scoring" value="f1_macro"/> 552 <param name="primary_scoring" value="f1_macro"/>
515 <param name="secondary_scoring" value="balanced_accuracy,accuracy"/> 553 <param name="secondary_scoring" value="balanced_accuracy,accuracy"/>
516 <param name="n_splits" value="5"/> 554 <param name="n_splits" value="5"/>
517 <param name="infile1" value="imblearn_X.tabular" ftype="tabular"/> 555 <param name="infile1" value="imblearn_X.tabular" ftype="tabular"/>
518 <param name="header1" value="true" /> 556 <param name="header1" value="true" />
529 </assert_contents> 567 </assert_contents>
530 </output> 568 </output>
531 </test> 569 </test>
532 <test> 570 <test>
533 <param name="selected_search_scheme" value="GridSearchCV"/> 571 <param name="selected_search_scheme" value="GridSearchCV"/>
534 <param name="infile_pipeline" value="pipeline12" ftype="zip"/> 572 <param name="infile_estimator" value="pipeline12" ftype="zip"/>
535 <conditional name="search_param_selector"> 573 <param name="infile_params" value="get_params12.tabular" ftype="tabular"/>
536 <param name="search_p" value="estimator__n_estimators: [10, 100, 200]"/> 574 <repeat name="param_set">
537 <param name="selected_param_type" value="final_estimator_p"/> 575 <param name="sp_list" value="[10, 100, 200]"/>
538 </conditional> 576 <param name="sp_name" value="rfe__estimator__n_estimators"/>
539 <conditional name="search_param_selector"> 577 </repeat>
540 <param name="search_p" value="n_features_to_select: [10, None]"/> 578 <repeat name="param_set">
541 <param name="selected_param_type" value="final_estimator_p"/> 579 <param name="sp_list" value="[10, None]"/>
542 </conditional> 580 <param name="sp_name" value="rfe__n_features_to_select"/>
581 </repeat>
543 <param name="primary_scoring" value="r2"/> 582 <param name="primary_scoring" value="r2"/>
544 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> 583 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
545 <param name="header1" value="true" /> 584 <param name="header1" value="true" />
546 <param name="selected_column_selector_option" value="all_columns"/> 585 <param name="selected_column_selector_option" value="all_columns"/>
547 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 586 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
552 <has_n_columns n="13"/> 591 <has_n_columns n="13"/>
553 <has_text text="0.8149439619875293"/> 592 <has_text text="0.8149439619875293"/>
554 </assert_contents> 593 </assert_contents>
555 </output> 594 </output>
556 </test> 595 </test>
596 <!--test>
597 <conditional name="search_schemes">
598 <param name="selected_search_scheme" value="GridSearchCV"/>
599 <param name="infile_estimator" value="pipeline05" ftype="zip"/>
600 <section name="search_params_builder">
601 <param name="infile_params" value="get_params05.tabular" ftype="tabular"/>
602 <repeat name="param_set">
603 <param name="sp_list" value="[10, 50, 100, 300]"/>
604 <param name="sp_name" value="randomforestregressor__n_estimators"/>
605 </repeat>
606 </section>
607 </conditional>
608 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
609 <param name="header1" value="true" />
610 <param name="selected_column_selector_option" value="all_columns"/>
611 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
612 <param name="header2" value="true" />
613 <param name="selected_column_selector_option2" value="all_columns"/>
614 <output name="outfile_result">
615 <assert_contents>
616 <has_n_columns n="1"/>
617 <has_text text="0.7986842219788204" />
618 </assert_contents>
619 </output>
620 </test-->
557 </tests> 621 </tests>
558 <help> 622 <help>
559 <![CDATA[ 623 <![CDATA[
560 **What it does** 624 **What it does**
561 Searches optimized parameter values for an estimator or pipeline through either exhaustive grid cross validation search or Randomized cross validation search. 625 Searches optimized parameter settings for an estimator or pipeline through either exhaustive grid cross validation search or Randomized cross validation search.
562 please refer to `Scikit-learn model_selection GridSearchCV`_, `Scikit-learn model_selection RandomizedSearchCV`_ and `Tuning hyper-parameters`_. 626 please refer to `Scikit-learn model_selection GridSearchCV`_, `Scikit-learn model_selection RandomizedSearchCV`_ and `Tuning hyper-parameters`_.
563 627
564 **How to choose search patameters?** 628 **Return**
629
630 Outputs `cv_results_` from SearchCV in a tabular dataset if no train_test_split, otherwise the test score(s). Besides, Output of the SearchCV object is optional.
631
632 **How to choose search patameters grid?**
565 633
566 Please refer to `svm`_, `linear_model`_, `ensemble`_, `naive_bayes`_, `tree`_, `neighbors`_ and `xgboost`_ for estimator parameters. 634 Please refer to `svm`_, `linear_model`_, `ensemble`_, `naive_bayes`_, `tree`_, `neighbors`_ and `xgboost`_ for estimator parameters.
567 Refer to `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_, `cluster.FeatureAgglomeration`_ and `skrebate`_ for parameter in the pre-processing steps. 635 Refer to `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_, `cluster.FeatureAgglomeration`_
568 636 and `skrebate`_ for parameter in the pre-processing steps.
569 **Search parameter input** accepts parameter and setting in key:value pair. One pair per input box. Setting can be list, numpy array, or distribution. 637
570 The evaluation of settings supports operations in Math, list comprehension, numpy.arange(np_arange), most numpy.random(e.g., np_random_uniform) and some scipy.stats(e.g., scipy_stats_zipf) classes or functions, and others. 638 **Search parameter list** can be list, numpy array, or distribution. The evaluation of settings supports operations in Math,
571 639 list comprehension, numpy.arange(np_arange), most numpy.random(e.g., np_random_uniform) and some scipy.stats(e.g., scipy_stats_zipf) classes or functions, and others.
572 **Examples:** 640
573 641 Examples:
574 - K: [3, 5, 7, 9] 642
575 643 - [3, 5, 7, 9]
576 - n_estimators: list(range(50, 1001, 50)) 644
577 645 - list(range(50, 1001, 50))
578 - gamma: np_arange(0.01, 1, 0.1) 646
579 647 - np_arange(0.01, 1, 0.1)
580 - alpha: np_random_choice(list(range(1, 51)) + [None], size=20) 648
581 649 - np_random_choice(list(range(1, 51)) + [None], size=20)
582 - max_depth: scipy_stats_randin(1, 11) 650
583 651 - scipy_stats_randin(1, 11)
584 **Estimator search/eval (additional '-')**:: 652
585 653 **Estimator / Preprocessor search (additional `:` in the front)**::
586 base_estimator-: [sklearn_tree.DecisionTreeRegressor(), sklearn_tree.ExtraTreeRegressor()] 654
587 655 : [sklearn_tree.DecisionTreeRegressor(), sklearn_tree.ExtraTreeRegressor()]
588 **Preprocessors search/swap**::
589 656
590 : [sklearn_feature_selection.SelectKBest(), sklearn_feature_selection.VarianceThreshold(), 657 : [sklearn_feature_selection.SelectKBest(), sklearn_feature_selection.VarianceThreshold(),
591 skrebate_ReliefF(), sklearn_preprocessing.RobustScaler()] 658 skrebate_ReliefF(), sklearn_preprocessing.RobustScaler()]
592 659
593 **Hot number/keyword for preprocessors**:: 660 **Hot number/keyword for preprocessors**::
654 Support mix (CAUTION: Mix of imblearn and other preprocessors may not work), e.g.:: 721 Support mix (CAUTION: Mix of imblearn and other preprocessors may not work), e.g.::
655 722
656 : [None, 'sk_prep_all', 22, 'k_appr_all', sklearn_feature_selection.SelectKBest(k=50)] 723 : [None, 'sk_prep_all', 22, 'k_appr_all', sklearn_feature_selection.SelectKBest(k=50)]
657 724
658 725
726
727 **Whether to do train_test_split?**
728
729 Please refer to `https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation`_
730
731
732 .. image:: https://scikit-learn.org/stable/_images/grid_search_cross_validation.png
733 :height: 300
734 :width: 400
735
736
659 .. _`Scikit-learn model_selection GridSearchCV`: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html 737 .. _`Scikit-learn model_selection GridSearchCV`: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
660 .. _`Scikit-learn model_selection RandomizedSearchCV`: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html 738 .. _`Scikit-learn model_selection RandomizedSearchCV`: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html
661 .. _`Tuning hyper-parameters`: http://scikit-learn.org/stable/modules/grid_search.html 739 .. _`Tuning hyper-parameters`: http://scikit-learn.org/stable/modules/grid_search.html
662 740
663 .. _`svm`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.svm 741 .. _`svm`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.svm
672 .. _`feature_selection`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.feature_selection 750 .. _`feature_selection`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.feature_selection
673 .. _`decomposition`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.decomposition 751 .. _`decomposition`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.decomposition
674 .. _`kernel_approximation`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.kernel_approximation 752 .. _`kernel_approximation`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.kernel_approximation
675 .. _`cluster.FeatureAgglomeration`: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html 753 .. _`cluster.FeatureAgglomeration`: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html
676 .. _`skrebate`: https://epistasislab.github.io/scikit-rebate/using/ 754 .. _`skrebate`: https://epistasislab.github.io/scikit-rebate/using/
755 .. _`https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation`: https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation
677 756
678 ]]> 757 ]]>
679 </help> 758 </help>
680 <expand macro="sklearn_citation"> 759 <expand macro="sklearn_citation">
681 <expand macro="skrebate_citation"/> 760 <expand macro="skrebate_citation"/>