Mercurial > repos > bgruening > sklearn_searchcv
comparison search_model_validation.xml @ 8:1c4a241bef5c draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit c0a3a186966888e5787335a7628bf0a4382637e7
author | bgruening |
---|---|
date | Tue, 14 May 2019 18:05:43 -0400 |
parents | 4368259ff821 |
children | 82b6104d4682 |
comparison
equal
deleted
inserted
replaced
7:4368259ff821 | 8:1c4a241bef5c |
---|---|
1 <tool id="sklearn_searchcv" name="Hyperparameter Search" version="@VERSION@"> | 1 <tool id="sklearn_searchcv" name="Hyperparameter Search" version="@VERSION@"> |
2 <description>using exhausitive or randomized search</description> | 2 <description>using exhausitive or randomized search</description> |
3 <macros> | 3 <macros> |
4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="python_requirements"> | 6 <expand macro="python_requirements"/> |
7 <requirement type="package" version="0.6">skrebate</requirement> | |
8 <requirement type="package" version="0.4.2">imbalanced-learn</requirement> | |
9 </expand> | |
10 <expand macro="macro_stdio"/> | 7 <expand macro="macro_stdio"/> |
11 <version_command>echo "@VERSION@"</version_command> | 8 <version_command>echo "@VERSION@"</version_command> |
12 <command> | 9 <command> |
13 <![CDATA[ | 10 <![CDATA[ |
14 python '$__tool_directory__/search_model_validation.py' | 11 python '$__tool_directory__/search_model_validation.py' |
15 '$inputs' | 12 --inputs '$inputs' |
16 '$search_schemes.infile_pipeline' | 13 --estimator '$search_schemes.infile_estimator' |
17 '$input_options.infile1' | 14 --infile1 '$input_options.infile1' |
18 '$input_options.infile2' | 15 --infile2 '$input_options.infile2' |
19 '$outfile_result' | 16 --outfile_result '$outfile_result' |
20 #if $save: | 17 #if $save |
21 '$outfile_estimator' | 18 --outfile_object '$outfile_object' |
22 #end if | 19 #end if |
20 #if $search_schemes.options.cv_selector.selected_cv in ['GroupKFold', 'GroupShuffleSplit', 'LeaveOneGroupOut', 'LeavePGroupsOut'] | |
21 --groups '$inputs,$search_schemes.options.cv_selector.groups_selector.infile_g' | |
22 #end if | |
23 | |
23 ]]> | 24 ]]> |
24 </command> | 25 </command> |
25 <configfiles> | 26 <configfiles> |
26 <inputs name="inputs" /> | 27 <inputs name="inputs" /> |
27 </configfiles> | 28 </configfiles> |
28 <inputs> | 29 <inputs> |
29 <conditional name="search_schemes"> | 30 <conditional name="search_schemes"> |
30 <param name="selected_search_scheme" type="select" label="Select a model selection search scheme:"> | 31 <param name="selected_search_scheme" type="select" label="Select a model selection search scheme"> |
31 <option value="GridSearchCV" selected="true">GridSearchCV - Exhaustive search over specified parameter values for an estimator </option> | 32 <option value="GridSearchCV" selected="true">GridSearchCV - Exhaustive search over specified parameter values for an estimator </option> |
32 <option value="RandomizedSearchCV">RandomizedSearchCV - Randomized search on hyper parameters for an estimator</option> | 33 <option value="RandomizedSearchCV">RandomizedSearchCV - Randomized search on hyper parameters for an estimator</option> |
33 </param> | 34 </param> |
34 <when value="GridSearchCV"> | 35 <when value="GridSearchCV"> |
35 <expand macro="search_cv_estimator"/> | 36 <expand macro="search_cv_estimator"/> |
44 <param argument="n_iter" type="integer" value="10" label="Number of parameter settings that are sampled"/> | 45 <param argument="n_iter" type="integer" value="10" label="Number of parameter settings that are sampled"/> |
45 <expand macro="random_state"/> | 46 <expand macro="random_state"/> |
46 </section> | 47 </section> |
47 </when> | 48 </when> |
48 </conditional> | 49 </conditional> |
49 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Save the best estimator/pipeline?"/> | 50 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Save the searchCV object"/> |
50 <expand macro="sl_mixed_input"/> | 51 <expand macro="sl_mixed_input"/> |
52 <conditional name="train_test_split"> | |
53 <param name="do_split" type="select" label="Whether to hold a portion of samples for test exclusively?" help="train_test_split"> | |
54 <option value="no">Nope</option> | |
55 <option value="yes">Yes - I do</option> | |
56 </param> | |
57 <when value='no'/> | |
58 <when value='yes'> | |
59 <param argument="test_size" type="float" optional="True" value="0.25" label="Test size:"/> | |
60 <param argument="train_size" type="float" optional="True" value="" label="Train size:"/> | |
61 <param argument="random_state" type="integer" optional="True" value="" label="Random seed number:"/> | |
62 <param argument="shuffle" type="select"> | |
63 <option value="None">None - No shuffle</option> | |
64 <option value="simple">Shuffle -- for regression problems</option> | |
65 <option value="stratified">StratifiedShuffle -- will use the target values as class labels</option> | |
66 <option value="group">GroupShuffle -- make sure group CV option is choosen</option> | |
67 </param> | |
68 </when> | |
69 </conditional> | |
51 </inputs> | 70 </inputs> |
52 <outputs> | 71 <outputs> |
53 <data format="tabular" name="outfile_result"/> | 72 <data format="tabular" name="outfile_result"/> |
54 <data format="zip" name="outfile_estimator" label="${tool.name}: best estimator on ${on_string}"> | 73 <data format="zip" name="outfile_object" label="${search_schemes.selected_search_scheme} on ${on_string}"> |
55 <filter>save</filter> | 74 <filter>save</filter> |
56 </data> | 75 </data> |
57 </outputs> | 76 </outputs> |
58 <tests> | 77 <tests> |
59 <test> | 78 <test> |
60 <param name="selected_search_scheme" value="GridSearchCV"/> | 79 <param name="selected_search_scheme" value="GridSearchCV"/> |
61 <param name="infile_pipeline" value="pipeline01" ftype="zip"/> | 80 <param name="infile_estimator" value="pipeline01" ftype="zip"/> |
62 <conditional name="search_param_selector"> | 81 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/> |
63 <param name="search_p" value="C: [1, 10, 100, 1000]"/> | 82 <repeat name="param_set"> |
64 <param name="selected_param_type" value="final_estimator_p"/> | 83 <param name="sp_list" value="[1, 10, 100, 1000]"/> |
65 </conditional> | 84 <param name="sp_name" value="svr__C"/> |
66 <conditional name="search_param_selector"> | 85 </repeat> |
67 <param name="search_p" value="k: [-1, 3, 5, 7, 9]"/> | 86 <repeat name="param_set"> |
68 <param name="selected_param_type" value="prep_2_p"/> | 87 <param name="sp_list" value="[-1, 3, 5, 7, 9]"/> |
69 </conditional> | 88 <param name="sp_name" value="selectkbest__k"/> |
89 </repeat> | |
70 <param name="error_score" value="false"/> | 90 <param name="error_score" value="false"/> |
71 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 91 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> |
72 <param name="header1" value="true" /> | 92 <param name="header1" value="true" /> |
73 <param name="selected_column_selector_option" value="all_columns"/> | 93 <param name="selected_column_selector_option" value="all_columns"/> |
74 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 94 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
76 <param name="selected_column_selector_option2" value="all_columns"/> | 96 <param name="selected_column_selector_option2" value="all_columns"/> |
77 <output name="outfile_result"> | 97 <output name="outfile_result"> |
78 <assert_contents> | 98 <assert_contents> |
79 <has_n_columns n="13"/> | 99 <has_n_columns n="13"/> |
80 <has_text text="0.7938837807353147"/> | 100 <has_text text="0.7938837807353147"/> |
81 <has_text text="{'estimator__C': 1, 'preprocessing_2__k': 9}"/> | 101 <has_text text="{'selectkbest__k': 9, 'svr__C': 1}"/> |
82 </assert_contents> | 102 </assert_contents> |
83 </output> | 103 </output> |
84 </test> | 104 </test> |
85 <test expect_failure="true"> | 105 <test expect_failure="true"> |
86 <param name="selected_search_scheme" value="GridSearchCV"/> | 106 <param name="selected_search_scheme" value="GridSearchCV"/> |
87 <param name="infile_pipeline" value="pipeline01" ftype="zip"/> | 107 <param name="infile_estimator" value="pipeline01" ftype="zip"/> |
88 <conditional name="search_param_selector"> | 108 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/> |
89 <param name="search_p" value="C: [1, 10, 100, 1000]"/> | 109 <repeat name="param_set"> |
90 <param name="selected_param_type" value="final_estimator_p"/> | 110 <param name="sp_list" value="[1, 10, 100, 1000]"/> |
91 </conditional> | 111 <param name="sp_name" value="svr__C"/> |
92 <conditional name="search_param_selector"> | 112 </repeat> |
93 <param name="search_p" value="k: [-1, 3, 5, 7, 9]"/> | 113 <repeat name="param_set"> |
94 <param name="selected_param_type" value="prep_2_p"/> | 114 <param name="sp_list" value="[-1, 3, 5, 7, 9]"/> |
95 </conditional> | 115 <param name="sp_name" value="selectkbest__k"/> |
116 </repeat> | |
96 <param name="error_score" value="true"/> | 117 <param name="error_score" value="true"/> |
97 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 118 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> |
98 <param name="header1" value="true" /> | 119 <param name="header1" value="true" /> |
99 <param name="selected_column_selector_option" value="all_columns"/> | 120 <param name="selected_column_selector_option" value="all_columns"/> |
100 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 121 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
101 <param name="header2" value="true" /> | 122 <param name="header2" value="true" /> |
102 <param name="selected_column_selector_option2" value="all_columns"/> | 123 <param name="selected_column_selector_option2" value="all_columns"/> |
103 </test> | 124 </test> |
104 <test> | 125 <test> |
105 <param name="selected_search_scheme" value="RandomizedSearchCV"/> | 126 <param name="selected_search_scheme" value="RandomizedSearchCV"/> |
106 <param name="infile_pipeline" value="pipeline01" ftype="zip"/> | 127 <param name="infile_estimator" value="pipeline01" ftype="zip"/> |
107 <conditional name="search_param_selector"> | 128 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/> |
108 <param name="search_p" value="C: [1, 10, 100, 1000]"/> | 129 <repeat name="param_set"> |
109 <param name="selected_param_type" value="final_estimator_p"/> | 130 <param name="sp_list" value="[1, 10, 100, 1000]"/> |
110 </conditional> | 131 <param name="sp_name" value="svr__C"/> |
111 <conditional name="search_param_selector"> | 132 </repeat> |
112 <param name="search_p" value="kernel: ['linear', 'poly', 'rbf', 'sigmoid']"/> | 133 <repeat name="param_set"> |
113 <param name="selected_param_type" value="final_estimator_p"/> | 134 <param name="sp_list" value="['linear', 'poly', 'rbf', 'sigmoid']"/> |
114 </conditional> | 135 <param name="sp_name" value="svr__kernel"/> |
115 <conditional name="search_param_selector"> | 136 </repeat> |
116 <param name="search_p" value="k: [3, 5, 7, 9]"/> | 137 <repeat name="param_set"> |
117 <param name="selected_param_type" value="prep_2_p"/> | 138 <param name="sp_list" value="[3, 5, 7, 9]"/> |
118 </conditional> | 139 <param name="sp_name" value="selectkbest__k"/> |
119 <conditional name="search_param_selector"> | 140 </repeat> |
120 <param name="search_p" value="with_centering: [True, False]"/> | 141 <repeat name="param_set"> |
121 <param name="selected_param_type" value="prep_1_p"/> | 142 <param name="sp_list" value="[True, False]"/> |
122 </conditional> | 143 <param name="sp_name" value="robustscaler__with_centering"/> |
144 </repeat> | |
123 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 145 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> |
124 <param name="header1" value="true" /> | 146 <param name="header1" value="true" /> |
125 <param name="selected_column_selector_option" value="all_columns"/> | 147 <param name="selected_column_selector_option" value="all_columns"/> |
126 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 148 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
127 <param name="header2" value="true" /> | 149 <param name="header2" value="true" /> |
128 <param name="selected_column_selector_option2" value="all_columns"/> | 150 <param name="selected_column_selector_option2" value="all_columns"/> |
129 <output name="outfile_result" > | 151 <output name="outfile_result" > |
130 <assert_contents> | 152 <assert_contents> |
131 <has_n_columns n="15" /> | 153 <has_n_columns n="15" /> |
132 <has_text text="param_preprocessing_1__with_centering"/> | 154 <has_text text="param_robustscaler__with_centering"/> |
133 </assert_contents> | 155 </assert_contents> |
134 </output> | 156 </output> |
135 </test> | 157 </test> |
136 <test> | 158 <test> |
137 <param name="selected_search_scheme" value="RandomizedSearchCV"/> | 159 <param name="selected_search_scheme" value="RandomizedSearchCV"/> |
138 <param name="infile_pipeline" value="pipeline03" ftype="zip"/> | 160 <param name="infile_estimator" value="pipeline03" ftype="zip"/> |
139 <conditional name="search_param_selector"> | 161 <param name="infile_params" value="get_params03.tabular" ftype="tabular"/> |
140 <param name="search_p" value="n_estimators: np_arange(50, 1001, 50)"/> | 162 <repeat name="param_set"> |
141 <param name="selected_param_type" value="final_estimator_p"/> | 163 <param name="sp_list" value="np_arange(50, 1001, 50)"/> |
142 </conditional> | 164 <param name="sp_name" value="xgbclassifier__n_estimators"/> |
143 <conditional name="search_param_selector"> | 165 </repeat> |
144 <param name="search_p" value="max_depth: scipy_stats_randint(1, 51)"/> | 166 <repeat name="param_set"> |
145 <param name="selected_param_type" value="final_estimator_p"/> | 167 <param name="sp_list" value="scipy_stats_randint(1, 51)"/> |
146 </conditional> | 168 <param name="sp_name" value="xgbclassifier__max_depth"/> |
147 <conditional name="search_param_selector"> | 169 </repeat> |
148 <param name="search_p" value="gamma: scipy_stats_uniform(0., 1.)"/> | 170 <repeat name="param_set"> |
149 <param name="selected_param_type" value="final_estimator_p"/> | 171 <param name="sp_list" value="scipy_stats_uniform(0., 1.)"/> |
150 </conditional> | 172 <param name="sp_name" value="xgbclassifier__gamma"/> |
151 <conditional name="search_param_selector"> | 173 </repeat> |
152 <param name="search_p" value="random_state: [324089]"/> | 174 <repeat name="param_set"> |
153 <param name="selected_param_type" value="final_estimator_p"/> | 175 <param name="sp_list" value="[324089]"/> |
154 </conditional> | 176 <param name="sp_name" value="xgbclassifier__random_state"/> |
177 </repeat> | |
155 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 178 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> |
156 <param name="header1" value="true" /> | 179 <param name="header1" value="true" /> |
157 <param name="selected_column_selector_option" value="all_columns"/> | 180 <param name="selected_column_selector_option" value="all_columns"/> |
158 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 181 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
159 <param name="header2" value="true" /> | 182 <param name="header2" value="true" /> |
160 <param name="selected_column_selector_option2" value="all_columns"/> | 183 <param name="selected_column_selector_option2" value="all_columns"/> |
161 <output name="outfile_result" > | 184 <output name="outfile_result" > |
162 <assert_contents> | 185 <assert_contents> |
163 <has_n_columns n="15" /> | 186 <has_n_columns n="15" /> |
164 <has_text text="param_estimator__max_depth"/> | 187 <has_text text="param_xgbclassifier__max_depth"/> |
165 </assert_contents> | 188 </assert_contents> |
166 </output> | 189 </output> |
167 </test> | 190 </test> |
168 <test> | 191 <test> |
169 <param name="selected_search_scheme" value="GridSearchCV"/> | 192 <param name="selected_search_scheme" value="GridSearchCV"/> |
170 <param name="infile_pipeline" value="pipeline04" ftype="zip"/> | 193 <param name="infile_estimator" value="pipeline04" ftype="zip"/> |
171 <conditional name="search_param_selector"> | 194 <param name="infile_params" value="get_params04.tabular" ftype="tabular"/> |
172 <param name="search_p" value="random_state: list(range(100, 1001, 100))"/> | 195 <repeat name="param_set"> |
173 <param name="selected_param_type" value="final_estimator_p"/> | 196 <param name="sp_list" value="list(range(100, 1001, 100))"/> |
174 </conditional> | 197 <param name="sp_name" value="linearsvc__random_state"/> |
175 <conditional name="search_param_selector"> | 198 </repeat> |
176 <param name="search_p" value="estimator-: [sklearn_ensemble.ExtraTreesClassifier(n_estimators=100, random_state=324089)]"/> | 199 <repeat name="param_set"> |
177 <param name="selected_param_type" value="prep_1_p"/> | 200 <param name="sp_list" value=": [sklearn_ensemble.ExtraTreesClassifier(n_estimators=100, random_state=324089)]"/> |
178 </conditional> | 201 <param name="sp_name" value="selectfrommodel__estimator"/> |
202 </repeat> | |
179 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 203 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> |
180 <param name="header1" value="true" /> | 204 <param name="header1" value="true" /> |
181 <param name="selected_column_selector_option" value="all_columns"/> | 205 <param name="selected_column_selector_option" value="all_columns"/> |
182 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 206 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
183 <param name="header2" value="true" /> | 207 <param name="header2" value="true" /> |
189 </assert_contents> | 213 </assert_contents> |
190 </output> | 214 </output> |
191 </test> | 215 </test> |
192 <test> | 216 <test> |
193 <param name="selected_search_scheme" value="GridSearchCV"/> | 217 <param name="selected_search_scheme" value="GridSearchCV"/> |
194 <param name="infile_pipeline" value="pipeline01" ftype="zip"/> | 218 <param name="infile_estimator" value="pipeline01" ftype="zip"/> |
195 <conditional name="search_param_selector"> | 219 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/> |
196 <param name="search_p" value="C: [1, 10, 100, 1000]"/> | 220 <repeat name="param_set"> |
197 <param name="selected_param_type" value="final_estimator_p"/> | 221 <param name="sp_list" value="[1, 10, 100, 1000]"/> |
198 </conditional> | 222 <param name="sp_name" value="svr__C"/> |
199 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 223 </repeat> |
200 <param name="header1" value="true" /> | 224 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> |
201 <param name="selected_column_selector_option" value="all_columns"/> | 225 <param name="header1" value="true" /> |
202 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 226 <param name="selected_column_selector_option" value="all_columns"/> |
203 <param name="header2" value="true" /> | 227 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
204 <param name="selected_column_selector_option2" value="all_columns"/> | 228 <param name="header2" value="true" /> |
205 <output name="outfile_estimator" file="searchCV01" compare="sim_size" delta="1"/> | 229 <param name="selected_column_selector_option2" value="all_columns"/> |
206 </test> | 230 <output name="outfile_object" file="searchCV01" compare="sim_size" delta="10"/> |
207 <test> | 231 </test> |
208 <param name="selected_search_scheme" value="GridSearchCV"/> | 232 <test> |
209 <param name="infile_pipeline" value="pipeline06" ftype="zip"/> | 233 <param name="selected_search_scheme" value="GridSearchCV"/> |
210 <conditional name="search_param_selector"> | 234 <param name="infile_estimator" value="pipeline06" ftype="zip"/> |
211 <param name="search_p" value="n_estimators: [10, 50, 200, 1000]"/> | 235 <param name="infile_params" value="get_params06.tabular" ftype="tabular"/> |
212 <param name="selected_param_type" value="final_estimator_p"/> | 236 <repeat name="param_set"> |
213 </conditional> | 237 <param name="sp_list" value="[10, 50, 200, 1000]"/> |
214 <conditional name="search_param_selector"> | 238 <param name="sp_name" value="adaboostregressor__n_estimators"/> |
215 <param name="search_p" value="random_state: [324089]"/> | 239 </repeat> |
216 <param name="selected_param_type" value="final_estimator_p"/> | 240 <repeat name="param_set"> |
217 </conditional> | 241 <param name="sp_list" value="[324089]"/> |
242 <param name="sp_name" value="adaboostregressor__random_state"/> | |
243 </repeat> | |
218 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 244 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> |
219 <param name="header1" value="true" /> | 245 <param name="header1" value="true" /> |
220 <param name="selected_column_selector_option" value="all_columns"/> | 246 <param name="selected_column_selector_option" value="all_columns"/> |
221 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 247 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
222 <param name="header2" value="true" /> | 248 <param name="header2" value="true" /> |
228 </assert_contents> | 254 </assert_contents> |
229 </output> | 255 </output> |
230 </test> | 256 </test> |
231 <test> | 257 <test> |
232 <param name="selected_search_scheme" value="GridSearchCV"/> | 258 <param name="selected_search_scheme" value="GridSearchCV"/> |
233 <param name="infile_pipeline" value="pipeline07" ftype="zip"/> | 259 <param name="infile_estimator" value="pipeline07" ftype="zip"/> |
234 <conditional name="search_param_selector"> | 260 <param name="infile_params" value="get_params07.tabular" ftype="tabular"/> |
235 <param name="search_p" value="n_estimators: [10, 50, 100, 200]"/> | 261 <repeat name="param_set"> |
236 <param name="selected_param_type" value="final_estimator_p"/> | 262 <param name="sp_list" value="[10, 50, 100, 200]"/> |
237 </conditional> | 263 <param name="sp_name" value="adaboostclassifier__n_estimators"/> |
238 <conditional name="search_param_selector"> | 264 </repeat> |
239 <param name="search_p" value="random_state: [324089]"/> | 265 <repeat name="param_set"> |
240 <param name="selected_param_type" value="final_estimator_p"/> | 266 <param name="sp_list" value="[324089]"/> |
241 </conditional> | 267 <param name="sp_name" value="adaboostclassifier__random_state"/> |
242 <conditional name="search_param_selector"> | 268 </repeat> |
243 <param name="search_p" value="gamma: [1.0, 2.0]"/> | 269 <repeat name="param_set"> |
244 <param name="selected_param_type" value="prep_1_p"/> | 270 <param name="sp_list" value="[1.0, 2.0]"/> |
245 </conditional> | 271 <param name="sp_name" value="rbfsampler__gamma"/> |
272 </repeat> | |
246 <param name='selected_cv' value="default"/> | 273 <param name='selected_cv' value="default"/> |
247 <param name="n_splits" value="3"/> | 274 <param name="n_splits" value="3"/> |
248 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 275 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> |
249 <param name="header1" value="true" /> | 276 <param name="header1" value="true" /> |
250 <param name="selected_column_selector_option" value="all_columns"/> | 277 <param name="selected_column_selector_option" value="all_columns"/> |
258 </assert_contents> | 285 </assert_contents> |
259 </output> | 286 </output> |
260 </test> | 287 </test> |
261 <test> | 288 <test> |
262 <param name="selected_search_scheme" value="GridSearchCV"/> | 289 <param name="selected_search_scheme" value="GridSearchCV"/> |
263 <param name="infile_pipeline" value="pipeline08" ftype="zip"/> | 290 <param name="infile_estimator" value="pipeline08" ftype="zip"/> |
264 <conditional name="search_param_selector"> | 291 <param name="infile_params" value="get_params08.tabular" ftype="tabular"/> |
265 <param name="search_p" value="n_estimators: [10, 50, 100, 200]"/> | 292 <repeat name="param_set"> |
266 <param name="selected_param_type" value="final_estimator_p"/> | 293 <param name="sp_list" value="[10, 50, 100, 200]"/> |
267 </conditional> | 294 <param name="sp_name" value="adaboostclassifier__n_estimators"/> |
268 <conditional name="search_param_selector"> | 295 </repeat> |
269 <param name="search_p" value="random_state: [324089]"/> | 296 <repeat name="param_set"> |
270 <param name="selected_param_type" value="final_estimator_p"/> | 297 <param name="sp_list" value="[324089]"/> |
271 </conditional> | 298 <param name="sp_name" value="adaboostclassifier__random_state"/> |
272 <conditional name="search_param_selector"> | 299 </repeat> |
273 <param name="search_p" value="linkage: ['ward', 'complete', 'average']"/> | 300 <repeat name="param_set"> |
274 <param name="selected_param_type" value="prep_1_p"/> | 301 <param name="sp_list" value="['ward', 'complete', 'average']"/> |
275 </conditional> | 302 <param name="sp_name" value="featureagglomeration__linkage"/> |
303 </repeat> | |
276 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 304 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> |
277 <param name="header1" value="true" /> | 305 <param name="header1" value="true" /> |
278 <param name="selected_column_selector_option" value="all_columns"/> | 306 <param name="selected_column_selector_option" value="all_columns"/> |
279 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 307 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
280 <param name="header2" value="true" /> | 308 <param name="header2" value="true" /> |
285 </assert_contents> | 313 </assert_contents> |
286 </output> | 314 </output> |
287 </test> | 315 </test> |
288 <test> | 316 <test> |
289 <param name="selected_search_scheme" value="GridSearchCV"/> | 317 <param name="selected_search_scheme" value="GridSearchCV"/> |
290 <param name="infile_pipeline" value="pipeline01" ftype="zip"/> | 318 <param name="infile_estimator" value="pipeline01" ftype="zip"/> |
291 <conditional name="search_param_selector"> | 319 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/> |
292 <param name="search_p" value="C: [1, 10, 100, 1000]"/> | 320 <repeat name="param_set"> |
293 <param name="selected_param_type" value="final_estimator_p"/> | 321 <param name="sp_list" value="[1, 10, 100, 1000]"/> |
294 </conditional> | 322 <param name="sp_name" value="svr__C"/> |
323 </repeat> | |
295 <param name='selected_cv' value="StratifiedKFold"/> | 324 <param name='selected_cv' value="StratifiedKFold"/> |
296 <param name="n_splits" value="3"/> | 325 <param name="n_splits" value="3"/> |
297 <param name="shuffle" value="true" /> | 326 <param name="shuffle" value="true" /> |
298 <param name="random_state" value="10"/> | 327 <param name="random_state" value="10"/> |
299 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 328 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> |
300 <param name="header1" value="true" /> | 329 <param name="header1" value="true" /> |
301 <param name="selected_column_selector_option" value="all_columns"/> | 330 <param name="selected_column_selector_option" value="all_columns"/> |
302 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 331 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
303 <param name="header2" value="true" /> | 332 <param name="header2" value="true" /> |
304 <param name="selected_column_selector_option2" value="all_columns"/> | 333 <param name="selected_column_selector_option2" value="all_columns"/> |
305 <output name="outfile_estimator" file="searchCV02" compare="sim_size" delta="1"/> | 334 <output name="outfile_object" file="searchCV02" compare="sim_size" delta="10"/> |
306 </test> | 335 </test> |
307 <test> | 336 <test> |
308 <param name="selected_search_scheme" value="GridSearchCV"/> | 337 <param name="selected_search_scheme" value="GridSearchCV"/> |
309 <param name="infile_pipeline" value="pipeline03" ftype="zip"/> | 338 <param name="infile_estimator" value="pipeline03" ftype="zip"/> |
310 <conditional name="search_param_selector"> | 339 <param name="infile_params" value="get_params03.tabular" ftype="tabular"/> |
311 <param name="search_p" value="n_estimators: [10, 50, 200, 1000]"/> | 340 <repeat name="param_set"> |
312 <param name="selected_param_type" value="final_estimator_p"/> | 341 <param name="sp_list" value="[10, 50, 200, 1000]"/> |
313 </conditional> | 342 <param name="sp_name" value="xgbclassifier__n_estimators"/> |
314 <conditional name="search_param_selector"> | 343 </repeat> |
315 <param name="search_p" value="random_state: [324089]"/> | 344 <repeat name="param_set"> |
316 <param name="selected_param_type" value="final_estimator_p"/> | 345 <param name="sp_list" value="[324089]"/> |
317 </conditional> | 346 <param name="sp_name" value="xgbclassifier__random_state"/> |
347 </repeat> | |
318 <param name="primary_scoring" value="balanced_accuracy"/> | 348 <param name="primary_scoring" value="balanced_accuracy"/> |
319 <param name='selected_cv' value="StratifiedKFold"/> | 349 <param name='selected_cv' value="StratifiedKFold"/> |
320 <param name="n_splits" value="3"/> | 350 <param name="n_splits" value="3"/> |
321 <param name="shuffle" value="true" /> | 351 <param name="shuffle" value="true" /> |
322 <param name="random_state" value="10"/> | 352 <param name="random_state" value="10"/> |
333 </assert_contents> | 363 </assert_contents> |
334 </output> | 364 </output> |
335 </test> | 365 </test> |
336 <test> | 366 <test> |
337 <param name="selected_search_scheme" value="GridSearchCV"/> | 367 <param name="selected_search_scheme" value="GridSearchCV"/> |
338 <param name="infile_pipeline" value="pipeline09" ftype="zip"/> | 368 <param name="infile_estimator" value="pipeline09" ftype="zip"/> |
339 <conditional name="search_param_selector"> | 369 <param name="infile_params" value="get_params09.tabular" ftype="tabular"/> |
340 <param name="search_p" value="n_neighbors: [50, 100, 150, 200]"/> | 370 <repeat name="param_set"> |
341 <param name="selected_param_type" value="prep_1_p"/> | 371 <param name="sp_list" value="[50, 100, 150, 200]"/> |
342 </conditional> | 372 <param name="sp_name" value="relieff__n_neighbors"/> |
343 <conditional name="search_param_selector"> | 373 </repeat> |
344 <param name="search_p" value="random_state: [324089]"/> | 374 <repeat name="param_set"> |
345 <param name="selected_param_type" value="final_estimator_p"/> | 375 <param name="sp_list" value="[324089]"/> |
346 </conditional> | 376 <param name="sp_name" value="randomforestregressor__random_state"/> |
377 </repeat> | |
347 <param name="primary_scoring" value="explained_variance"/> | 378 <param name="primary_scoring" value="explained_variance"/> |
348 <param name="secondary_scoring" value="neg_mean_squared_error,r2"/> | 379 <param name="secondary_scoring" value="neg_mean_squared_error,r2"/> |
349 <param name='selected_cv' value="StratifiedKFold"/> | 380 <param name='selected_cv' value="StratifiedKFold"/> |
350 <param name="n_splits" value="3"/> | 381 <param name="n_splits" value="3"/> |
351 <param name="shuffle" value="true" /> | 382 <param name="shuffle" value="true" /> |
365 </assert_contents> | 396 </assert_contents> |
366 </output> | 397 </output> |
367 </test> | 398 </test> |
368 <test> | 399 <test> |
369 <param name="selected_search_scheme" value="GridSearchCV"/> | 400 <param name="selected_search_scheme" value="GridSearchCV"/> |
370 <param name="infile_pipeline" value="pipeline02" ftype="zip"/> | 401 <param name="infile_estimator" value="pipeline02" ftype="zip"/> |
371 <conditional name="search_param_selector"> | 402 <param name="infile_params" value="get_params02.tabular" ftype="tabular"/> |
372 <param name="search_p" value="eps: [0.01, 0.001]"/> | 403 <repeat name="param_set"> |
373 <param name="selected_param_type" value="final_estimator_p"/> | 404 <param name="sp_list" value="[0.01, 0.001]"/> |
374 </conditional> | 405 <param name="sp_name" value="lassocv__eps"/> |
406 </repeat> | |
375 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 407 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> |
376 <param name="header1" value="true" /> | 408 <param name="header1" value="true" /> |
377 <param name="selected_column_selector_option" value="all_columns"/> | 409 <param name="selected_column_selector_option" value="all_columns"/> |
378 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 410 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
379 <param name="header2" value="true" /> | 411 <param name="header2" value="true" /> |
380 <param name="selected_column_selector_option2" value="all_columns"/> | 412 <param name="selected_column_selector_option2" value="all_columns"/> |
381 <output name="outfile_result"> | 413 <output name="outfile_result"> |
382 <assert_contents> | 414 <assert_contents> |
383 <has_n_columns n="12"/> | 415 <has_n_columns n="12"/> |
384 <has_text text="0.7762968161366681" /> | 416 <has_text text="0.776296816136668" /> |
385 </assert_contents> | 417 </assert_contents> |
386 </output> | 418 </output> |
387 </test> | 419 </test> |
388 <test> | 420 <test> |
389 <param name="selected_search_scheme" value="GridSearchCV"/> | 421 <param name="selected_search_scheme" value="GridSearchCV"/> |
390 <param name="infile_pipeline" value="pipeline05" ftype="zip"/> | 422 <param name="infile_estimator" value="pipeline05" ftype="zip"/> |
391 <conditional name="search_param_selector"> | 423 <param name="infile_params" value="get_params05.tabular" ftype="tabular"/> |
392 <param name="search_p" value="n_estimators: [10, 50, 100, 300]"/> | 424 <repeat name="param_set"> |
393 <param name="selected_param_type" value="final_estimator_p"/> | 425 <param name="sp_list" value="[10, 50, 100, 300]"/> |
394 </conditional> | 426 <param name="sp_name" value="randomforestregressor__n_estimators"/> |
427 </repeat> | |
395 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 428 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> |
396 <param name="header1" value="true" /> | 429 <param name="header1" value="true" /> |
397 <param name="selected_column_selector_option" value="all_columns"/> | 430 <param name="selected_column_selector_option" value="all_columns"/> |
398 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 431 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
399 <param name="header2" value="true" /> | 432 <param name="header2" value="true" /> |
405 </assert_contents> | 438 </assert_contents> |
406 </output> | 439 </output> |
407 </test> | 440 </test> |
408 <test expect_failure="true"> | 441 <test expect_failure="true"> |
409 <param name="selected_search_scheme" value="GridSearchCV"/> | 442 <param name="selected_search_scheme" value="GridSearchCV"/> |
410 <param name="infile_pipeline" value="pipeline01" ftype="zip"/> | 443 <param name="infile_estimator" value="pipeline01" ftype="zip"/> |
411 <conditional name="search_param_selector"> | 444 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/> |
412 <param name="search_p" value="C: open('~/.ssh/authorized_keys', 'r').read()"/> | 445 <repeat name="param_set"> |
413 <param name="selected_param_type" value="final_estimator_p"/> | 446 <param name="sp_list" value="open('~/.ssh/authorized_keys', 'r').read()"/> |
414 </conditional> | 447 <param name="sp_name" value="svr__C"/> |
415 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 448 </repeat> |
416 <param name="header1" value="true" /> | 449 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> |
417 <param name="selected_column_selector_option" value="all_columns"/> | 450 <param name="header1" value="true" /> |
418 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 451 <param name="selected_column_selector_option" value="all_columns"/> |
419 <param name="header2" value="true" /> | 452 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
420 <param name="selected_column_selector_option2" value="all_columns"/> | 453 <param name="header2" value="true" /> |
421 </test> | 454 <param name="selected_column_selector_option2" value="all_columns"/> |
422 <test> | 455 </test> |
423 <param name="selected_search_scheme" value="GridSearchCV"/> | 456 <test> |
424 <param name="infile_pipeline" value="pipeline10" ftype="zip"/> | 457 <param name="selected_search_scheme" value="GridSearchCV"/> |
425 <conditional name="search_param_selector"> | 458 <param name="infile_estimator" value="pipeline10" ftype="zip"/> |
426 <param name="search_p" value="base_estimator-: [sklearn_tree.DecisionTreeRegressor(random_state=0), sklearn_tree.ExtraTreeRegressor(random_state=0)]"/> | 459 <param name="infile_params" value="get_params10.tabular" ftype="tabular"/> |
427 <param name="selected_param_type" value="final_estimator_p"/> | 460 <repeat name="param_set"> |
428 </conditional> | 461 <param name="sp_list" value=": [sklearn_tree.DecisionTreeRegressor(random_state=0), sklearn_tree.ExtraTreeRegressor(random_state=0)]"/> |
429 <conditional name="search_param_selector"> | 462 <param name="sp_name" value="adaboostregressor__base_estimator"/> |
430 <param name="search_p" value="random_state: [10]"/> | 463 </repeat> |
431 <param name="selected_param_type" value="final_estimator_p"/> | 464 <repeat name="param_set"> |
432 </conditional> | 465 <param name="sp_list" value="[10]"/> |
466 <param name="sp_name" value="adaboostregressor__random_state"/> | |
467 </repeat> | |
433 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 468 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> |
434 <param name="header1" value="true" /> | 469 <param name="header1" value="true" /> |
435 <param name="selected_column_selector_option" value="all_columns"/> | 470 <param name="selected_column_selector_option" value="all_columns"/> |
436 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 471 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
437 <param name="header2" value="true" /> | 472 <param name="header2" value="true" /> |
443 </assert_contents> | 478 </assert_contents> |
444 </output> | 479 </output> |
445 </test> | 480 </test> |
446 <test> | 481 <test> |
447 <param name="selected_search_scheme" value="GridSearchCV"/> | 482 <param name="selected_search_scheme" value="GridSearchCV"/> |
448 <param name="infile_pipeline" value="pipeline09" ftype="zip"/> | 483 <param name="infile_estimator" value="pipeline09" ftype="zip"/> |
449 <conditional name="search_param_selector"> | 484 <param name="infile_params" value="get_params09.tabular" ftype="tabular"/> |
450 <param name="search_p" value=": [sklearn_feature_selection.SelectKBest(), | 485 <repeat name="param_set"> |
486 <param name="sp_list" value=": [sklearn_feature_selection.SelectKBest(), | |
451 sklearn_feature_selection.VarianceThreshold(), skrebate_ReliefF(), sklearn_preprocessing.RobustScaler()]"/> | 487 sklearn_feature_selection.VarianceThreshold(), skrebate_ReliefF(), sklearn_preprocessing.RobustScaler()]"/> |
452 <param name="selected_param_type" value="prep_1_p"/> | 488 <param name="sp_name" value="relieff"/> |
453 </conditional> | 489 </repeat> |
454 <conditional name="search_param_selector"> | 490 <repeat name="param_set"> |
455 <param name="search_p" value="random_state: [10]"/> | 491 <param name="sp_list" value="[10]"/> |
456 <param name="selected_param_type" value="final_estimator_p"/> | 492 <param name="sp_name" value="randomforestregressor__random_state"/> |
457 </conditional> | 493 </repeat> |
458 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 494 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> |
459 <param name="header1" value="true" /> | 495 <param name="header1" value="true" /> |
460 <param name="selected_column_selector_option" value="all_columns"/> | 496 <param name="selected_column_selector_option" value="all_columns"/> |
461 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 497 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
462 <param name="header2" value="true" /> | 498 <param name="header2" value="true" /> |
468 </assert_contents> | 504 </assert_contents> |
469 </output> | 505 </output> |
470 </test> | 506 </test> |
471 <test> | 507 <test> |
472 <param name="selected_search_scheme" value="GridSearchCV"/> | 508 <param name="selected_search_scheme" value="GridSearchCV"/> |
473 <param name="infile_pipeline" value="pipeline09" ftype="zip"/> | 509 <param name="infile_estimator" value="pipeline09" ftype="zip"/> |
474 <conditional name="search_param_selector"> | 510 <param name="infile_params" value="get_params09.tabular" ftype="tabular"/> |
475 <param name="search_p" value=": [None,'sk_prep_all', 8, 14, skrebate_ReliefF(n_features_to_select=12)]"/> | 511 <repeat name="param_set"> |
476 <param name="selected_param_type" value="prep_1_p"/> | 512 <param name="sp_list" value=": [None,'sk_prep_all', 8, 14, skrebate_ReliefF(n_features_to_select=12)]"/> |
477 </conditional> | 513 <param name="sp_name" value="relieff"/> |
478 <conditional name="search_param_selector"> | 514 </repeat> |
479 <param name="search_p" value="random_state: [10]"/> | 515 <repeat name="param_set"> |
480 <param name="selected_param_type" value="final_estimator_p"/> | 516 <param name="sp_list" value="[10]"/> |
481 </conditional> | 517 <param name="sp_name" value="randomforestregressor__random_state"/> |
518 </repeat> | |
482 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 519 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> |
483 <param name="header1" value="true" /> | 520 <param name="header1" value="true" /> |
484 <param name="selected_column_selector_option" value="all_columns"/> | 521 <param name="selected_column_selector_option" value="all_columns"/> |
485 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 522 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
486 <param name="header2" value="true" /> | 523 <param name="header2" value="true" /> |
492 </assert_contents> | 529 </assert_contents> |
493 </output> | 530 </output> |
494 </test> | 531 </test> |
495 <test> | 532 <test> |
496 <param name="selected_search_scheme" value="GridSearchCV"/> | 533 <param name="selected_search_scheme" value="GridSearchCV"/> |
497 <param name="infile_pipeline" value="pipeline11" ftype="zip"/> | 534 <param name="infile_estimator" value="pipeline11" ftype="zip"/> |
498 <conditional name="search_param_selector"> | 535 <param name="infile_params" value="get_params11.tabular" ftype="tabular"/> |
499 <param name="search_p" value="n_neighbors: [3,4,5]"/> | 536 <repeat name="param_set"> |
500 <param name="selected_param_type" value="prep_1_p"/> | 537 <param name="sp_list" value="[3,4,5]"/> |
501 </conditional> | 538 <param name="sp_name" value="editednearestneighbours__n_neighbors"/> |
502 <conditional name="search_param_selector"> | 539 </repeat> |
503 <param name="search_p" value="random_state: [10]"/> | 540 <repeat name="param_set"> |
504 <param name="selected_param_type" value="prep_1_p"/> | 541 <param name="sp_list" value="[10]"/> |
505 </conditional> | 542 <param name="sp_name" value="editednearestneighbours__random_state"/> |
506 <conditional name="search_param_selector"> | 543 </repeat> |
507 <param name="search_p" value="n_estimators:[10, 50, 100, 500]"/> | 544 <repeat name="param_set"> |
508 <param name="selected_param_type" value="final_estimator_p"/> | 545 <param name="sp_list" value="[10, 50, 100, 500]"/> |
509 </conditional> | 546 <param name="sp_name" value="randomforestclassifier__n_estimators"/> |
510 <conditional name="search_param_selector"> | 547 </repeat> |
511 <param name="search_p" value="random_state: [10]"/> | 548 <repeat name="param_set"> |
512 <param name="selected_param_type" value="final_estimator_p"/> | 549 <param name="sp_list" value="[10]"/> |
513 </conditional> | 550 <param name="sp_name" value="randomforestclassifier__random_state"/> |
551 </repeat> | |
514 <param name="primary_scoring" value="f1_macro"/> | 552 <param name="primary_scoring" value="f1_macro"/> |
515 <param name="secondary_scoring" value="balanced_accuracy,accuracy"/> | 553 <param name="secondary_scoring" value="balanced_accuracy,accuracy"/> |
516 <param name="n_splits" value="5"/> | 554 <param name="n_splits" value="5"/> |
517 <param name="infile1" value="imblearn_X.tabular" ftype="tabular"/> | 555 <param name="infile1" value="imblearn_X.tabular" ftype="tabular"/> |
518 <param name="header1" value="true" /> | 556 <param name="header1" value="true" /> |
529 </assert_contents> | 567 </assert_contents> |
530 </output> | 568 </output> |
531 </test> | 569 </test> |
532 <test> | 570 <test> |
533 <param name="selected_search_scheme" value="GridSearchCV"/> | 571 <param name="selected_search_scheme" value="GridSearchCV"/> |
534 <param name="infile_pipeline" value="pipeline12" ftype="zip"/> | 572 <param name="infile_estimator" value="pipeline12" ftype="zip"/> |
535 <conditional name="search_param_selector"> | 573 <param name="infile_params" value="get_params12.tabular" ftype="tabular"/> |
536 <param name="search_p" value="estimator__n_estimators: [10, 100, 200]"/> | 574 <repeat name="param_set"> |
537 <param name="selected_param_type" value="final_estimator_p"/> | 575 <param name="sp_list" value="[10, 100, 200]"/> |
538 </conditional> | 576 <param name="sp_name" value="rfe__estimator__n_estimators"/> |
539 <conditional name="search_param_selector"> | 577 </repeat> |
540 <param name="search_p" value="n_features_to_select: [10, None]"/> | 578 <repeat name="param_set"> |
541 <param name="selected_param_type" value="final_estimator_p"/> | 579 <param name="sp_list" value="[10, None]"/> |
542 </conditional> | 580 <param name="sp_name" value="rfe__n_features_to_select"/> |
581 </repeat> | |
543 <param name="primary_scoring" value="r2"/> | 582 <param name="primary_scoring" value="r2"/> |
544 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 583 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> |
545 <param name="header1" value="true" /> | 584 <param name="header1" value="true" /> |
546 <param name="selected_column_selector_option" value="all_columns"/> | 585 <param name="selected_column_selector_option" value="all_columns"/> |
547 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 586 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
552 <has_n_columns n="13"/> | 591 <has_n_columns n="13"/> |
553 <has_text text="0.8149439619875293"/> | 592 <has_text text="0.8149439619875293"/> |
554 </assert_contents> | 593 </assert_contents> |
555 </output> | 594 </output> |
556 </test> | 595 </test> |
596 <!--test> | |
597 <conditional name="search_schemes"> | |
598 <param name="selected_search_scheme" value="GridSearchCV"/> | |
599 <param name="infile_estimator" value="pipeline05" ftype="zip"/> | |
600 <section name="search_params_builder"> | |
601 <param name="infile_params" value="get_params05.tabular" ftype="tabular"/> | |
602 <repeat name="param_set"> | |
603 <param name="sp_list" value="[10, 50, 100, 300]"/> | |
604 <param name="sp_name" value="randomforestregressor__n_estimators"/> | |
605 </repeat> | |
606 </section> | |
607 </conditional> | |
608 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | |
609 <param name="header1" value="true" /> | |
610 <param name="selected_column_selector_option" value="all_columns"/> | |
611 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | |
612 <param name="header2" value="true" /> | |
613 <param name="selected_column_selector_option2" value="all_columns"/> | |
614 <output name="outfile_result"> | |
615 <assert_contents> | |
616 <has_n_columns n="1"/> | |
617 <has_text text="0.7986842219788204" /> | |
618 </assert_contents> | |
619 </output> | |
620 </test--> | |
557 </tests> | 621 </tests> |
558 <help> | 622 <help> |
559 <![CDATA[ | 623 <![CDATA[ |
560 **What it does** | 624 **What it does** |
561 Searches optimized parameter values for an estimator or pipeline through either exhaustive grid cross validation search or Randomized cross validation search. | 625 Searches optimized parameter settings for an estimator or pipeline through either exhaustive grid cross validation search or Randomized cross validation search. |
562 please refer to `Scikit-learn model_selection GridSearchCV`_, `Scikit-learn model_selection RandomizedSearchCV`_ and `Tuning hyper-parameters`_. | 626 please refer to `Scikit-learn model_selection GridSearchCV`_, `Scikit-learn model_selection RandomizedSearchCV`_ and `Tuning hyper-parameters`_. |
563 | 627 |
564 **How to choose search patameters?** | 628 **Return** |
629 | |
630 Outputs `cv_results_` from SearchCV in a tabular dataset if no train_test_split, otherwise the test score(s). Besides, Output of the SearchCV object is optional. | |
631 | |
632 **How to choose search patameters grid?** | |
565 | 633 |
566 Please refer to `svm`_, `linear_model`_, `ensemble`_, `naive_bayes`_, `tree`_, `neighbors`_ and `xgboost`_ for estimator parameters. | 634 Please refer to `svm`_, `linear_model`_, `ensemble`_, `naive_bayes`_, `tree`_, `neighbors`_ and `xgboost`_ for estimator parameters. |
567 Refer to `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_, `cluster.FeatureAgglomeration`_ and `skrebate`_ for parameter in the pre-processing steps. | 635 Refer to `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_, `cluster.FeatureAgglomeration`_ |
568 | 636 and `skrebate`_ for parameter in the pre-processing steps. |
569 **Search parameter input** accepts parameter and setting in key:value pair. One pair per input box. Setting can be list, numpy array, or distribution. | 637 |
570 The evaluation of settings supports operations in Math, list comprehension, numpy.arange(np_arange), most numpy.random(e.g., np_random_uniform) and some scipy.stats(e.g., scipy_stats_zipf) classes or functions, and others. | 638 **Search parameter list** can be list, numpy array, or distribution. The evaluation of settings supports operations in Math, |
571 | 639 list comprehension, numpy.arange(np_arange), most numpy.random(e.g., np_random_uniform) and some scipy.stats(e.g., scipy_stats_zipf) classes or functions, and others. |
572 **Examples:** | 640 |
573 | 641 Examples: |
574 - K: [3, 5, 7, 9] | 642 |
575 | 643 - [3, 5, 7, 9] |
576 - n_estimators: list(range(50, 1001, 50)) | 644 |
577 | 645 - list(range(50, 1001, 50)) |
578 - gamma: np_arange(0.01, 1, 0.1) | 646 |
579 | 647 - np_arange(0.01, 1, 0.1) |
580 - alpha: np_random_choice(list(range(1, 51)) + [None], size=20) | 648 |
581 | 649 - np_random_choice(list(range(1, 51)) + [None], size=20) |
582 - max_depth: scipy_stats_randin(1, 11) | 650 |
583 | 651 - scipy_stats_randin(1, 11) |
584 **Estimator search/eval (additional '-')**:: | 652 |
585 | 653 **Estimator / Preprocessor search (additional `:` in the front)**:: |
586 base_estimator-: [sklearn_tree.DecisionTreeRegressor(), sklearn_tree.ExtraTreeRegressor()] | 654 |
587 | 655 : [sklearn_tree.DecisionTreeRegressor(), sklearn_tree.ExtraTreeRegressor()] |
588 **Preprocessors search/swap**:: | |
589 | 656 |
590 : [sklearn_feature_selection.SelectKBest(), sklearn_feature_selection.VarianceThreshold(), | 657 : [sklearn_feature_selection.SelectKBest(), sklearn_feature_selection.VarianceThreshold(), |
591 skrebate_ReliefF(), sklearn_preprocessing.RobustScaler()] | 658 skrebate_ReliefF(), sklearn_preprocessing.RobustScaler()] |
592 | 659 |
593 **Hot number/keyword for preprocessors**:: | 660 **Hot number/keyword for preprocessors**:: |
654 Support mix (CAUTION: Mix of imblearn and other preprocessors may not work), e.g.:: | 721 Support mix (CAUTION: Mix of imblearn and other preprocessors may not work), e.g.:: |
655 | 722 |
656 : [None, 'sk_prep_all', 22, 'k_appr_all', sklearn_feature_selection.SelectKBest(k=50)] | 723 : [None, 'sk_prep_all', 22, 'k_appr_all', sklearn_feature_selection.SelectKBest(k=50)] |
657 | 724 |
658 | 725 |
726 | |
727 **Whether to do train_test_split?** | |
728 | |
729 Please refer to `https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation`_ | |
730 | |
731 | |
732 .. image:: https://scikit-learn.org/stable/_images/grid_search_cross_validation.png | |
733 :height: 300 | |
734 :width: 400 | |
735 | |
736 | |
659 .. _`Scikit-learn model_selection GridSearchCV`: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html | 737 .. _`Scikit-learn model_selection GridSearchCV`: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html |
660 .. _`Scikit-learn model_selection RandomizedSearchCV`: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html | 738 .. _`Scikit-learn model_selection RandomizedSearchCV`: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html |
661 .. _`Tuning hyper-parameters`: http://scikit-learn.org/stable/modules/grid_search.html | 739 .. _`Tuning hyper-parameters`: http://scikit-learn.org/stable/modules/grid_search.html |
662 | 740 |
663 .. _`svm`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.svm | 741 .. _`svm`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.svm |
672 .. _`feature_selection`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.feature_selection | 750 .. _`feature_selection`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.feature_selection |
673 .. _`decomposition`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.decomposition | 751 .. _`decomposition`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.decomposition |
674 .. _`kernel_approximation`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.kernel_approximation | 752 .. _`kernel_approximation`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.kernel_approximation |
675 .. _`cluster.FeatureAgglomeration`: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html | 753 .. _`cluster.FeatureAgglomeration`: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html |
676 .. _`skrebate`: https://epistasislab.github.io/scikit-rebate/using/ | 754 .. _`skrebate`: https://epistasislab.github.io/scikit-rebate/using/ |
755 .. _`https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation`: https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation | |
677 | 756 |
678 ]]> | 757 ]]> |
679 </help> | 758 </help> |
680 <expand macro="sklearn_citation"> | 759 <expand macro="sklearn_citation"> |
681 <expand macro="skrebate_citation"/> | 760 <expand macro="skrebate_citation"/> |