Mercurial > repos > bgruening > sklearn_searchcv
comparison search_model_validation.xml @ 25:fc99237f3392 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author | bgruening |
---|---|
date | Wed, 09 Aug 2023 13:04:12 +0000 |
parents | 301e07345c93 |
children |
comparison
equal
deleted
inserted
replaced
24:301e07345c93 | 25:fc99237f3392 |
---|---|
1 <tool id="sklearn_searchcv" name="Hyperparameter Search" version="@VERSION@" profile="20.05"> | 1 <tool id="sklearn_searchcv" name="Hyperparameter Search" version="@VERSION@" profile="@PROFILE@"> |
2 <description>performs hyperparameter optimization using various SearchCVs</description> | 2 <description>performs hyperparameter optimization using various SearchCVs</description> |
3 <macros> | 3 <macros> |
4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
5 <macro name="search_cv_estimator"> | 5 <macro name="search_cv_estimator"> |
6 <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/> | 6 <param name="infile_estimator" type="data" format="h5mlm" label="Choose the dataset containing pipeline/estimator object" /> |
7 <param name="is_deep_learning" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Is the estimator a deep learning model?"/> | 7 <param name="is_deep_learning" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Is the estimator a deep learning model?" /> |
8 <section name="search_params_builder" title="Search parameters Builder" expanded="true"> | 8 <section name="search_params_builder" title="Search parameters Builder" expanded="true"> |
9 <param name="infile_params" type="data" format="tabular" optional="true" label="Choose the dataset containing parameter names" help="This dataset could be the output of `get_params` in the `Estimator Attributes` tool."/> | 9 <repeat name="param_set" min="1" max="30" title="Parameter settings for search:"> |
10 <repeat name="param_set" min="1" max="30" title="Parameter settings for search:"> | 10 <param name="sp_name" type="select" optional="true" label="Choose a parameter name (with current value)"> |
11 <param name="sp_name" type="select" optional="true" label="Choose a parameter name (with current value)"> | 11 <options from_dataset="infile_estimator" meta_file_key="hyper_params" startswith="@"> |
12 <options from_dataset="infile_params"> | 12 <column name="name" index="2" /> |
13 <column name="name" index="2"/> | 13 <column name="value" index="1" /> |
14 <column name="value" index="1"/> | 14 <filter type="unique_value" name="unique_param" column="1" /> |
15 <filter type="unique_value" name="unique_param" column="1"/> | 15 </options> |
16 </options> | 16 </param> |
17 </param> | 17 <param name="sp_list" type="text" value="" optional="true" label="Search list" help="list or array-like, for example: [1, 10, 100, 1000], [True, False] and ['auto', 'sqrt', None]. See `help` section for more examples"> |
18 <param name="sp_list" type="text" value="" optional="true" label="Search list" help="list or array-like, for example: [1, 10, 100, 1000], [True, False] and ['auto', 'sqrt', None]. See `help` section for more examples"> | 18 <sanitizer> |
19 <sanitizer> | 19 <valid initial="default"> |
20 <valid initial="default"> | 20 <add value="'" /> |
21 <add value="'"/> | 21 <add value=""" /> |
22 <add value="""/> | 22 <add value="[" /> |
23 <add value="["/> | 23 <add value="]" /> |
24 <add value="]"/> | 24 </valid> |
25 </valid> | 25 </sanitizer> |
26 </sanitizer> | 26 </param> |
27 </param> | 27 </repeat> |
28 </repeat> | |
29 </section> | 28 </section> |
30 </macro> | 29 </macro> |
31 </macros> | 30 </macros> |
32 <expand macro="python_requirements"/> | 31 <expand macro="python_requirements" /> |
33 <expand macro="macro_stdio"/> | 32 <expand macro="macro_stdio" /> |
34 <version_command>echo "@VERSION@"</version_command> | 33 <version_command>echo "@VERSION@"</version_command> |
35 <command><![CDATA[ | 34 <command> |
35 <![CDATA[ | |
36 export HDF5_USE_FILE_LOCKING='FALSE'; | 36 export HDF5_USE_FILE_LOCKING='FALSE'; |
37 #if $input_options.selected_input == 'refseq_and_interval' | 37 #if $input_options.selected_input == 'refseq_and_interval' |
38 bgzip -c '$input_options.target_file' > '${target_file.element_identifier}.gz' && | 38 bgzip -c '$input_options.target_file' > '${target_file.element_identifier}.gz' && |
39 tabix -p bed '${target_file.element_identifier}.gz' && | 39 tabix -p bed '${target_file.element_identifier}.gz' && |
40 #end if | 40 #end if |
41 python '$__tool_directory__/search_model_validation.py' | 41 python '$__tool_directory__/search_model_validation.py' |
42 --inputs '$inputs' | 42 --inputs '$inputs' |
43 --estimator '$search_schemes.infile_estimator' | 43 --estimator '$infile_estimator' |
44 #if $input_options.selected_input == 'seq_fasta' | 44 #if $input_options.selected_input == 'seq_fasta' |
45 --fasta_path '$input_options.fasta_path' | 45 --fasta_path '$input_options.fasta_path' |
46 #elif $input_options.selected_input == 'refseq_and_interval' | 46 #elif $input_options.selected_input == 'refseq_and_interval' |
47 --ref_seq '$input_options.ref_genome_file' | 47 --ref_seq '$input_options.ref_genome_file' |
48 --interval '$input_options.interval_file' | 48 --interval '$input_options.interval_file' |
49 --targets "`pwd`/${target_file.element_identifier}.gz" | 49 --targets "`pwd`/${target_file.element_identifier}.gz" |
50 #else | 50 #else |
51 --infile1 '$input_options.infile1' | 51 --infile1 '$input_options.infile1' |
52 #end if | 52 #end if |
53 --infile2 '$input_options.infile2' | 53 --infile2 '$input_options.infile2' |
54 #if $save != 'save_no_fit' | 54 #if $save != 'save_no_fit' |
55 --outfile_result '$outfile_result' | 55 --outfile_result '$outfile_result' |
56 #end if | 56 #end if |
57 #if $save == 'save_estimator' | 57 #if $save == 'save_estimator' |
58 --outfile_object '$outfile_object' | 58 --outfile_object '$outfile_object' |
59 #end if | 59 #end if |
60 #if $save == 'save_no_fit' | 60 #if $save == 'save_no_fit' |
61 --outfile_object '$outfile_object_no_fit' | 61 --outfile_object '$outfile_object_no_fit' |
62 #end if | 62 #end if |
63 #if $search_schemes.is_deep_learning == 'booltrue' and $save == 'save_estimator' and $outer_split.split_mode == 'nested_cv' | 63 #if $options.cv_selector.selected_cv in ['GroupKFold', 'GroupShuffleSplit', 'LeaveOneGroupOut', 'LeavePGroupsOut'] |
64 --outfile_weights '$outfile_weights' | 64 --groups '$options.cv_selector.groups_selector.infile_g' |
65 #end if | 65 #end if |
66 #if $search_schemes.options.cv_selector.selected_cv in ['GroupKFold', 'GroupShuffleSplit', 'LeaveOneGroupOut', 'LeavePGroupsOut'] | 66 |
67 --groups '$search_schemes.options.cv_selector.groups_selector.infile_g' | 67 ]]> |
68 #end if | |
69 ]]> | |
70 </command> | 68 </command> |
71 <configfiles> | 69 <configfiles> |
72 <inputs name="inputs" /> | 70 <inputs name="inputs" /> |
73 </configfiles> | 71 </configfiles> |
74 <inputs> | 72 <inputs> |
75 <conditional name="search_schemes"> | 73 <conditional name="search_algos"> |
76 <param name="selected_search_scheme" type="select" label="Select a model selection search scheme"> | 74 <param name="selected_search_algo" type="select" label="Select a hyperparameter search algorithm"> |
77 <option value="GridSearchCV" selected="true">GridSearchCV - Exhaustive search over specified parameter values for an estimator </option> | 75 <option value="GridSearchCV" selected="true">GridSearchCV - Exhaustive search over specified parameter values for an estimator </option> |
78 <option value="RandomizedSearchCV">RandomizedSearchCV - Randomized search on hyper parameters for an estimator</option> | 76 <option value="RandomizedSearchCV">RandomizedSearchCV - Randomized search on hyper parameters for an estimator</option> |
77 <option value="skopt.BayesSearchCV">BayesSearchCV - Bayesian optimization over hyper parameters</option> | |
79 </param> | 78 </param> |
80 <when value="GridSearchCV"> | 79 <when value="GridSearchCV"> |
81 <expand macro="search_cv_estimator"/> | |
82 <section name="options" title="Advanced Options for SearchCV" expanded="false"> | |
83 <expand macro="search_cv_options"/> | |
84 </section> | |
85 </when> | 80 </when> |
86 <when value="RandomizedSearchCV"> | 81 <when value="RandomizedSearchCV"> |
87 <expand macro="search_cv_estimator"/> | 82 <param argument="n_iter" type="integer" value="10" label="Number of parameter settings that are sampled" /> |
88 <section name="options" title="Advanced Options for SearchCV" expanded="false"> | 83 <expand macro="random_state" /> |
89 <expand macro="search_cv_options"/> | 84 </when> |
90 <param argument="n_iter" type="integer" value="10" label="Number of parameter settings that are sampled"/> | 85 <when value="skopt.BayesSearchCV"> |
91 <expand macro="random_state"/> | 86 <param argument="optimizer_kwargs" type="text" value="" optional="true" help="dict, optional Dict of arguments passed to :class:Optimizer. For example, {'base_estimator': 'RF'} would use a Random Forest surrogate instead of the default Gaussian Process."> |
92 </section> | 87 <sanitizer> |
88 <valid initial="default"> | |
89 <add value="{" /> | |
90 <add value="}" /> | |
91 </valid> | |
92 </sanitizer> | |
93 </param> | |
94 <param argument="n_iter" type="integer" value="50" label="Number of parameter settings that are sampled" /> | |
95 <param argument="n_points" type="integer" value="1" label="Number of parameter settings to sample in parallel" help="Maximum parallel equals n_points times cv jobs." /> | |
96 <expand macro="random_state" /> | |
93 </when> | 97 </when> |
94 </conditional> | 98 </conditional> |
95 <expand macro="sl_mixed_input_plus_sequence"/> | 99 <expand macro="search_cv_estimator" /> |
100 <section name="options" title="Advanced Options for SearchCV" expanded="false"> | |
101 <expand macro="search_cv_options" /> | |
102 </section> | |
103 <expand macro="sl_mixed_input_plus_sequence" /> | |
96 <conditional name="outer_split"> | 104 <conditional name="outer_split"> |
97 <param name="split_mode" type="select" label="Whether to hold a portion of samples for test exclusively?" help="Nested CV or train_test_split"> | 105 <param name="split_mode" type="select" label="Whether to hold a portion of samples for test exclusively, nested CV?"> |
98 <option value="no" selected="true">Nope</option> | 106 <option value="no" selected="true">Nope</option> |
99 <option value="nested_cv">Yes - do nested CV</option> | 107 <option value="nested_cv">Yes - do nested CV</option> |
100 </param> | 108 </param> |
101 <when value='no'/> | 109 <when value='no' /> |
102 <when value="nested_cv"> | 110 <when value="nested_cv"> |
103 <expand macro="cv_reduced" label="Select the outer cv splitter"/> | 111 <expand macro="cv_reduced" label="Select the outer cv splitter" /> |
104 </when> | 112 </when> |
105 </conditional> | 113 </conditional> |
106 <param name="save" type="select" label="Save best estimator?" help="For a non-deep learning model, save will output fitted best_estimator_ (refit must be true) or a list of cv_results_ from each outer split in nested CV mode. For a deep learning model, by checking the boolean option below the model input, the outputs are two parts, model skeleton and weights. Save Deep learning model for nested CV is not supported."> | 114 <param name="save" type="select" label="Save best estimator?" help="For a non-deep learning model, save will output fitted best_estimator_ or a list of cv_results_ from each outer split if in nested CV mode. For a deep learning model, by checking the boolean option below the model input, the outputs are two parts, model skeleton and weights. Save Deep learning model for nested CV is not supported."> |
107 <option value="nope">Nope, save is unnecessary</option> | 115 <option value="nope">Nope, save is unnecessary</option> |
108 <option value="save_estimator" selected="true">Fitted best estimator or Detailed cv_results_ from nested CV</option> | 116 <option value="save_estimator" selected="true">Fitted best estimator or Detailed cv_results_ from nested CV</option> |
109 <option value="save_no_fit">SearchCV object without fitting</option> | 117 <option value="save_no_fit">SearchCV object without fitting</option> |
110 </param> | 118 </param> |
111 </inputs> | 119 </inputs> |
112 <outputs> | 120 <outputs> |
113 <data format="tabular" name="outfile_result"> | 121 <data format="tabular" name="outfile_result"> |
114 <filter>save != 'save_no_fit'</filter> | 122 <filter>save != 'save_no_fit'</filter> |
115 </data> | 123 </data> |
116 <data format="zip" name="outfile_object" label="Fitted estimator or estimator skeleton on ${on_string}"> | 124 <data format="h5mlm" name="outfile_object" label="Fitted best estimator on ${on_string}"> |
117 <filter>save == 'save_estimator' and outer_split['split_mode'] == 'no'</filter> | 125 <filter>save == 'save_estimator' and outer_split['split_mode'] == 'no'</filter> |
118 </data> | 126 </data> |
119 <data format="h5" name="outfile_weights" label="Weights trained on ${on_string}"> | |
120 <filter>search_schemes['is_deep_learning'] and save == 'save_estimator' and outer_split['split_mode'] == 'no'</filter> | |
121 </data> | |
122 <collection type="list" name="outfile_in_splits" label="cv_results_ from splits on ${on_string}"> | 127 <collection type="list" name="outfile_in_splits" label="cv_results_ from splits on ${on_string}"> |
123 <filter>not search_schemes['is_deep_learning'] and save == 'save_estimator' and outer_split['split_mode'] == 'nested_cv'</filter> | 128 <filter>not is_deep_learning and save == 'save_estimator' and outer_split['split_mode'] == 'nested_cv'</filter> |
124 <discover_datasets format="tabular" pattern="__name__" directory="cv_results_in_folds"/> | 129 <discover_datasets format="tabular" pattern="__name__" directory="cv_results_in_folds" /> |
125 </collection> | 130 </collection> |
126 <data format="zip" name="outfile_object_no_fit" label="Unfitted SearchCV on ${on_string}"> | 131 <data format="h5mlm" name="outfile_object_no_fit" label="Unfitted SearchCV on ${on_string}"> |
127 <filter>save == 'save_no_fit'</filter> | 132 <filter>save == 'save_no_fit'</filter> |
128 </data> | 133 </data> |
129 </outputs> | 134 </outputs> |
130 <tests> | 135 <tests> |
131 <test> | 136 <test> |
132 <param name="selected_search_scheme" value="GridSearchCV"/> | 137 <param name="selected_search_algo" value="GridSearchCV" /> |
133 <param name="infile_estimator" value="pipeline01" ftype="zip"/> | 138 <param name="infile_estimator" value="pipeline01" ftype="h5mlm" /> |
134 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/> | 139 <repeat name="param_set"> |
135 <repeat name="param_set"> | 140 <param name="sp_list" value="[1, 10, 100, 1000]" /> |
136 <param name="sp_list" value="[1, 10, 100, 1000]"/> | 141 <param name="sp_name" value="svr__C" /> |
137 <param name="sp_name" value="svr__C"/> | 142 </repeat> |
138 </repeat> | 143 <repeat name="param_set"> |
139 <repeat name="param_set"> | 144 <param name="sp_list" value="['all', 3, 5, 7, 9]" /> |
140 <param name="sp_list" value="[-1, 3, 5, 7, 9]"/> | 145 <param name="sp_name" value="selectkbest__k" /> |
141 <param name="sp_name" value="selectkbest__k"/> | 146 </repeat> |
142 </repeat> | 147 <param name="error_score" value="false" /> |
143 <param name="error_score" value="false"/> | 148 <param name="n_splits" value="3" /> |
144 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 149 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
145 <param name="header1" value="true" /> | 150 <param name="header1" value="true" /> |
146 <param name="selected_column_selector_option" value="all_columns"/> | 151 <param name="selected_column_selector_option" value="all_columns" /> |
147 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 152 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
148 <param name="header2" value="true" /> | 153 <param name="header2" value="true" /> |
149 <param name="selected_column_selector_option2" value="all_columns"/> | 154 <param name="selected_column_selector_option2" value="all_columns" /> |
150 <output name="outfile_result"> | 155 <output name="outfile_result"> |
151 <assert_contents> | 156 <assert_contents> |
152 <has_n_columns n="13"/> | 157 <has_n_columns n="13" /> |
153 <has_text text="0.7938837807353147"/> | 158 <has_text text="0.7938837807" /> |
154 <has_text text="{'selectkbest__k': 9, 'svr__C': 1}"/> | 159 <has_text text="{'selectkbest__k': 9, 'svr__C': 1}" /> |
155 </assert_contents> | 160 </assert_contents> |
156 </output> | 161 </output> |
157 </test> | 162 </test> |
158 <test expect_failure="true"> | 163 <test expect_failure="true"> |
159 <param name="selected_search_scheme" value="GridSearchCV"/> | 164 <param name="selected_search_algo" value="GridSearchCV" /> |
160 <param name="infile_estimator" value="pipeline01" ftype="zip"/> | 165 <param name="infile_estimator" value="pipeline01" ftype="h5mlm" /> |
161 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/> | 166 <repeat name="param_set"> |
162 <repeat name="param_set"> | 167 <param name="sp_list" value="[1, 10, 100, 1000]" /> |
163 <param name="sp_list" value="[1, 10, 100, 1000]"/> | 168 <param name="sp_name" value="svr__C" /> |
164 <param name="sp_name" value="svr__C"/> | 169 </repeat> |
165 </repeat> | 170 <repeat name="param_set"> |
166 <repeat name="param_set"> | 171 <param name="sp_list" value="[-1, 3, 5, 7, 9]" /> |
167 <param name="sp_list" value="[-1, 3, 5, 7, 9]"/> | 172 <param name="sp_name" value="selectkbest__k" /> |
168 <param name="sp_name" value="selectkbest__k"/> | 173 </repeat> |
169 </repeat> | 174 <param name="error_score" value="true" /> |
170 <param name="error_score" value="true"/> | 175 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
171 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 176 <param name="header1" value="true" /> |
172 <param name="header1" value="true" /> | 177 <param name="selected_column_selector_option" value="all_columns" /> |
173 <param name="selected_column_selector_option" value="all_columns"/> | 178 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
174 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 179 <param name="header2" value="true" /> |
175 <param name="header2" value="true" /> | 180 <param name="selected_column_selector_option2" value="all_columns" /> |
176 <param name="selected_column_selector_option2" value="all_columns"/> | 181 </test> |
177 </test> | 182 <test> |
178 <test> | 183 <param name="selected_search_algo" value="RandomizedSearchCV" /> |
179 <param name="selected_search_scheme" value="RandomizedSearchCV"/> | 184 <param name="infile_estimator" value="pipeline01" ftype="h5mlm" /> |
180 <param name="infile_estimator" value="pipeline01" ftype="zip"/> | 185 <repeat name="param_set"> |
181 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/> | 186 <param name="sp_list" value="[1, 10, 100, 1000]" /> |
182 <repeat name="param_set"> | 187 <param name="sp_name" value="svr__C" /> |
183 <param name="sp_list" value="[1, 10, 100, 1000]"/> | 188 </repeat> |
184 <param name="sp_name" value="svr__C"/> | 189 <repeat name="param_set"> |
185 </repeat> | 190 <param name="sp_list" value="['linear', 'poly', 'rbf', 'sigmoid']" /> |
186 <repeat name="param_set"> | 191 <param name="sp_name" value="svr__kernel" /> |
187 <param name="sp_list" value="['linear', 'poly', 'rbf', 'sigmoid']"/> | 192 </repeat> |
188 <param name="sp_name" value="svr__kernel"/> | 193 <repeat name="param_set"> |
189 </repeat> | 194 <param name="sp_list" value="[3, 5, 7, 9]" /> |
190 <repeat name="param_set"> | 195 <param name="sp_name" value="selectkbest__k" /> |
191 <param name="sp_list" value="[3, 5, 7, 9]"/> | 196 </repeat> |
192 <param name="sp_name" value="selectkbest__k"/> | 197 <repeat name="param_set"> |
193 </repeat> | 198 <param name="sp_list" value="[True, False]" /> |
194 <repeat name="param_set"> | 199 <param name="sp_name" value="robustscaler__with_centering" /> |
195 <param name="sp_list" value="[True, False]"/> | 200 </repeat> |
196 <param name="sp_name" value="robustscaler__with_centering"/> | 201 <section name="options"> |
197 </repeat> | 202 <conditional name="cv_selector"> |
198 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 203 <param name="selected_cv" value="default" /> |
199 <param name="header1" value="true" /> | 204 <param name="n_splits" value="3" /> |
200 <param name="selected_column_selector_option" value="all_columns"/> | 205 </conditional> |
201 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 206 </section> |
202 <param name="header2" value="true" /> | 207 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
203 <param name="selected_column_selector_option2" value="all_columns"/> | 208 <param name="header1" value="true" /> |
209 <param name="selected_column_selector_option" value="all_columns" /> | |
210 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> | |
211 <param name="header2" value="true" /> | |
212 <param name="selected_column_selector_option2" value="all_columns" /> | |
204 <output name="outfile_result" > | 213 <output name="outfile_result" > |
205 <assert_contents> | 214 <assert_contents> |
206 <has_n_columns n="15" /> | 215 <has_n_columns n="15" /> |
207 <has_text text="param_robustscaler__with_centering"/> | 216 <has_text text="param_robustscaler__with_centering" /> |
208 </assert_contents> | 217 </assert_contents> |
209 </output> | 218 </output> |
210 </test> | 219 </test> |
211 <test> | 220 <test> |
212 <param name="selected_search_scheme" value="RandomizedSearchCV"/> | 221 <param name="selected_search_algo" value="RandomizedSearchCV" /> |
213 <param name="infile_estimator" value="pipeline03" ftype="zip"/> | 222 <param name="infile_estimator" value="pipeline03" ftype="h5mlm" /> |
214 <param name="infile_params" value="get_params03.tabular" ftype="tabular"/> | 223 <repeat name="param_set"> |
215 <repeat name="param_set"> | 224 <param name="sp_list" value="np_arange(50, 1001, 50)" /> |
216 <param name="sp_list" value="np_arange(50, 1001, 50)"/> | 225 <param name="sp_name" value="xgbclassifier__n_estimators" /> |
217 <param name="sp_name" value="xgbclassifier__n_estimators"/> | 226 </repeat> |
218 </repeat> | 227 <repeat name="param_set"> |
219 <repeat name="param_set"> | 228 <param name="sp_list" value="scipy_stats_randint(1, 51)" /> |
220 <param name="sp_list" value="scipy_stats_randint(1, 51)"/> | 229 <param name="sp_name" value="xgbclassifier__max_depth" /> |
221 <param name="sp_name" value="xgbclassifier__max_depth"/> | 230 </repeat> |
222 </repeat> | 231 <repeat name="param_set"> |
223 <repeat name="param_set"> | 232 <param name="sp_list" value="scipy_stats_uniform(0., 1.)" /> |
224 <param name="sp_list" value="scipy_stats_uniform(0., 1.)"/> | 233 <param name="sp_name" value="xgbclassifier__gamma" /> |
225 <param name="sp_name" value="xgbclassifier__gamma"/> | 234 </repeat> |
226 </repeat> | 235 <repeat name="param_set"> |
227 <repeat name="param_set"> | 236 <param name="sp_list" value="[324089]" /> |
228 <param name="sp_list" value="[324089]"/> | 237 <param name="sp_name" value="xgbclassifier__random_state" /> |
229 <param name="sp_name" value="xgbclassifier__random_state"/> | 238 </repeat> |
230 </repeat> | 239 <param name="n_splits" value="3" /> |
231 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 240 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
232 <param name="header1" value="true" /> | 241 <param name="header1" value="true" /> |
233 <param name="selected_column_selector_option" value="all_columns"/> | 242 <param name="selected_column_selector_option" value="all_columns" /> |
234 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 243 <param name="infile2" value="classifier_y.tabular" ftype="tabular" /> |
235 <param name="header2" value="true" /> | 244 <param name="header2" value="true" /> |
236 <param name="selected_column_selector_option2" value="all_columns"/> | 245 <param name="selected_column_selector_option2" value="all_columns" /> |
237 <output name="outfile_result" > | 246 <output name="outfile_result" > |
238 <assert_contents> | 247 <assert_contents> |
239 <has_n_columns n="15" /> | 248 <has_n_columns n="15" /> |
240 <has_text text="param_xgbclassifier__max_depth"/> | 249 <has_text text="param_xgbclassifier__max_depth" /> |
241 </assert_contents> | 250 </assert_contents> |
242 </output> | 251 </output> |
243 </test> | 252 </test> |
244 <test> | 253 <test> |
245 <param name="selected_search_scheme" value="GridSearchCV"/> | 254 <param name="selected_search_algo" value="GridSearchCV" /> |
246 <param name="infile_estimator" value="pipeline04" ftype="zip"/> | 255 <param name="infile_estimator" value="pipeline04" ftype="h5mlm" /> |
247 <param name="infile_params" value="get_params04.tabular" ftype="tabular"/> | 256 <repeat name="param_set"> |
248 <repeat name="param_set"> | 257 <param name="sp_list" value="list(range(100, 1001, 100))" /> |
249 <param name="sp_list" value="list(range(100, 1001, 100))"/> | 258 <param name="sp_name" value="linearsvc__random_state" /> |
250 <param name="sp_name" value="linearsvc__random_state"/> | 259 </repeat> |
251 </repeat> | 260 <repeat name="param_set"> |
252 <repeat name="param_set"> | 261 <param name="sp_list" value=": [sklearn_ensemble.ExtraTreesClassifier(n_estimators=100, random_state=324089)]" /> |
253 <param name="sp_list" value=": [sklearn_ensemble.ExtraTreesClassifier(n_estimators=100, random_state=324089)]"/> | 262 <param name="sp_name" value="selectfrommodel__estimator" /> |
254 <param name="sp_name" value="selectfrommodel__estimator"/> | 263 </repeat> |
255 </repeat> | 264 <param name="n_splits" value="3" /> |
256 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 265 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
257 <param name="header1" value="true" /> | 266 <param name="header1" value="true" /> |
258 <param name="selected_column_selector_option" value="all_columns"/> | 267 <param name="selected_column_selector_option" value="all_columns" /> |
259 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 268 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
260 <param name="header2" value="true" /> | 269 <param name="header2" value="true" /> |
261 <param name="selected_column_selector_option2" value="all_columns"/> | 270 <param name="selected_column_selector_option2" value="all_columns" /> |
262 <output name="outfile_result"> | 271 <output name="outfile_result"> |
263 <assert_contents> | 272 <assert_contents> |
264 <has_n_columns n="13"/> | 273 <has_n_columns n="13" /> |
265 <has_text text="0.05363984674329502"/> | 274 <has_text text="0.05363984674329502" /> |
266 </assert_contents> | 275 </assert_contents> |
267 </output> | 276 </output> |
268 </test> | 277 </test> |
269 <test> | 278 <test> |
270 <param name="selected_search_scheme" value="GridSearchCV"/> | 279 <param name="selected_search_algo" value="GridSearchCV" /> |
271 <param name="infile_estimator" value="pipeline01" ftype="zip"/> | 280 <param name="infile_estimator" value="pipeline01" ftype="h5mlm" /> |
272 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/> | 281 <repeat name="param_set"> |
273 <repeat name="param_set"> | 282 <param name="sp_list" value="[1, 10, 100, 1000]" /> |
274 <param name="sp_list" value="[1, 10, 100, 1000]"/> | 283 <param name="sp_name" value="svr__C" /> |
275 <param name="sp_name" value="svr__C"/> | 284 </repeat> |
276 </repeat> | 285 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
277 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 286 <param name="header1" value="true" /> |
278 <param name="header1" value="true" /> | 287 <param name="selected_column_selector_option" value="all_columns" /> |
279 <param name="selected_column_selector_option" value="all_columns"/> | 288 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
280 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 289 <param name="header2" value="true" /> |
281 <param name="header2" value="true" /> | 290 <param name="selected_column_selector_option2" value="all_columns" /> |
282 <param name="selected_column_selector_option2" value="all_columns"/> | 291 <param name="save" value="save_estimator" /> |
283 <param name="save" value="save_estimator"/> | 292 <output name="outfile_object" file="searchCV01" compare="sim_size" delta="10" /> |
284 <output name="outfile_object" file="searchCV01" compare="sim_size" delta="10"/> | 293 </test> |
285 </test> | 294 <test> |
286 <test> | 295 <param name="selected_search_algo" value="GridSearchCV" /> |
287 <param name="selected_search_scheme" value="GridSearchCV"/> | 296 <param name="infile_estimator" value="pipeline06" ftype="h5mlm" /> |
288 <param name="infile_estimator" value="pipeline06" ftype="zip"/> | 297 <repeat name="param_set"> |
289 <param name="infile_params" value="get_params06.tabular" ftype="tabular"/> | 298 <param name="sp_list" value="[10, 50, 200, 1000]" /> |
290 <repeat name="param_set"> | 299 <param name="sp_name" value="adaboostregressor__n_estimators" /> |
291 <param name="sp_list" value="[10, 50, 200, 1000]"/> | 300 </repeat> |
292 <param name="sp_name" value="adaboostregressor__n_estimators"/> | 301 <repeat name="param_set"> |
293 </repeat> | 302 <param name="sp_list" value="[324089]" /> |
294 <repeat name="param_set"> | 303 <param name="sp_name" value="adaboostregressor__random_state" /> |
295 <param name="sp_list" value="[324089]"/> | 304 </repeat> |
296 <param name="sp_name" value="adaboostregressor__random_state"/> | 305 <param name="n_splits" value="3" /> |
297 </repeat> | 306 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
298 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 307 <param name="header1" value="true" /> |
299 <param name="header1" value="true" /> | 308 <param name="selected_column_selector_option" value="all_columns" /> |
300 <param name="selected_column_selector_option" value="all_columns"/> | 309 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
301 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 310 <param name="header2" value="true" /> |
302 <param name="header2" value="true" /> | 311 <param name="selected_column_selector_option2" value="all_columns" /> |
303 <param name="selected_column_selector_option2" value="all_columns"/> | 312 <output name="outfile_result"> |
304 <output name="outfile_result"> | 313 <assert_contents> |
305 <assert_contents> | 314 <has_n_columns n="13" /> |
306 <has_n_columns n="13"/> | |
307 <has_text_matching expression=".+0.7772355090078996" /> | 315 <has_text_matching expression=".+0.7772355090078996" /> |
308 </assert_contents> | 316 </assert_contents> |
309 </output> | 317 </output> |
310 </test> | 318 </test> |
311 <test> | 319 <test> |
312 <param name="selected_search_scheme" value="GridSearchCV"/> | 320 <param name="selected_search_algo" value="GridSearchCV" /> |
313 <param name="infile_estimator" value="pipeline07" ftype="zip"/> | 321 <param name="infile_estimator" value="pipeline07" ftype="h5mlm" /> |
314 <param name="infile_params" value="get_params07.tabular" ftype="tabular"/> | 322 <repeat name="param_set"> |
315 <repeat name="param_set"> | 323 <param name="sp_list" value="[10, 50, 100, 200]" /> |
316 <param name="sp_list" value="[10, 50, 100, 200]"/> | 324 <param name="sp_name" value="adaboostclassifier__n_estimators" /> |
317 <param name="sp_name" value="adaboostclassifier__n_estimators"/> | 325 </repeat> |
318 </repeat> | 326 <repeat name="param_set"> |
319 <repeat name="param_set"> | 327 <param name="sp_list" value="[324089]" /> |
320 <param name="sp_list" value="[324089]"/> | 328 <param name="sp_name" value="adaboostclassifier__random_state" /> |
321 <param name="sp_name" value="adaboostclassifier__random_state"/> | 329 </repeat> |
322 </repeat> | 330 <repeat name="param_set"> |
323 <repeat name="param_set"> | 331 <param name="sp_list" value="[1.0, 2.0]" /> |
324 <param name="sp_list" value="[1.0, 2.0]"/> | 332 <param name="sp_name" value="rbfsampler__gamma" /> |
325 <param name="sp_name" value="rbfsampler__gamma"/> | 333 </repeat> |
326 </repeat> | 334 <param name='selected_cv' value="default" /> |
327 <param name='selected_cv' value="default"/> | 335 <param name="n_splits" value="3" /> |
328 <param name="n_splits" value="3"/> | 336 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
329 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 337 <param name="header1" value="true" /> |
330 <param name="header1" value="true" /> | 338 <param name="selected_column_selector_option" value="all_columns" /> |
331 <param name="selected_column_selector_option" value="all_columns"/> | 339 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
332 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 340 <param name="header2" value="true" /> |
333 <param name="header2" value="true" /> | 341 <param name="selected_column_selector_option2" value="all_columns" /> |
334 <param name="selected_column_selector_option2" value="all_columns"/> | 342 <output name="outfile_result"> |
335 <output name="outfile_result"> | 343 <assert_contents> |
336 <assert_contents> | 344 <has_n_columns n="14" /> |
337 <has_n_columns n="14"/> | |
338 <has_text_matching expression=".+0.05747126436781609[^/d]" /> | 345 <has_text_matching expression=".+0.05747126436781609[^/d]" /> |
339 </assert_contents> | 346 </assert_contents> |
340 </output> | 347 </output> |
341 </test> | 348 </test> |
342 <test> | 349 <test> |
343 <param name="selected_search_scheme" value="GridSearchCV"/> | 350 <param name="selected_search_algo" value="GridSearchCV" /> |
344 <param name="infile_estimator" value="pipeline08" ftype="zip"/> | 351 <param name="infile_estimator" value="pipeline08" ftype="h5mlm" /> |
345 <param name="infile_params" value="get_params08.tabular" ftype="tabular"/> | 352 <repeat name="param_set"> |
346 <repeat name="param_set"> | 353 <param name="sp_list" value="[10, 50, 100, 200]" /> |
347 <param name="sp_list" value="[10, 50, 100, 200]"/> | 354 <param name="sp_name" value="adaboostclassifier__n_estimators" /> |
348 <param name="sp_name" value="adaboostclassifier__n_estimators"/> | 355 </repeat> |
349 </repeat> | 356 <repeat name="param_set"> |
350 <repeat name="param_set"> | 357 <param name="sp_list" value="[324089]" /> |
351 <param name="sp_list" value="[324089]"/> | 358 <param name="sp_name" value="adaboostclassifier__random_state" /> |
352 <param name="sp_name" value="adaboostclassifier__random_state"/> | 359 </repeat> |
353 </repeat> | 360 <repeat name="param_set"> |
354 <repeat name="param_set"> | 361 <param name="sp_list" value="['ward', 'complete', 'average']" /> |
355 <param name="sp_list" value="['ward', 'complete', 'average']"/> | 362 <param name="sp_name" value="featureagglomeration__linkage" /> |
356 <param name="sp_name" value="featureagglomeration__linkage"/> | 363 </repeat> |
357 </repeat> | 364 <param name="n_splits" value="3" /> |
358 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 365 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
359 <param name="header1" value="true" /> | 366 <param name="header1" value="true" /> |
360 <param name="selected_column_selector_option" value="all_columns"/> | 367 <param name="selected_column_selector_option" value="all_columns" /> |
361 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 368 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
362 <param name="header2" value="true" /> | 369 <param name="header2" value="true" /> |
363 <param name="selected_column_selector_option2" value="all_columns"/> | 370 <param name="selected_column_selector_option2" value="all_columns" /> |
364 <output name="outfile_result"> | 371 <output name="outfile_result"> |
365 <assert_contents> | 372 <assert_contents> |
366 <has_text_matching expression=".+0.08045977011494253[^/w]+10[^/w]" /> | 373 <has_text_matching expression=".+0.08045977011494253[^/w]+10[^/w]" /> |
367 </assert_contents> | 374 </assert_contents> |
368 </output> | 375 </output> |
369 </test> | 376 </test> |
370 <test> | 377 <test> |
371 <param name="selected_search_scheme" value="GridSearchCV"/> | 378 <param name="selected_search_algo" value="GridSearchCV" /> |
372 <param name="infile_estimator" value="pipeline01" ftype="zip"/> | 379 <param name="infile_estimator" value="pipeline01" ftype="h5mlm" /> |
373 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/> | 380 <repeat name="param_set"> |
374 <repeat name="param_set"> | 381 <param name="sp_list" value="[1, 10, 100, 1000]" /> |
375 <param name="sp_list" value="[1, 10, 100, 1000]"/> | 382 <param name="sp_name" value="svr__C" /> |
376 <param name="sp_name" value="svr__C"/> | 383 </repeat> |
377 </repeat> | 384 <param name='selected_cv' value="StratifiedKFold" /> |
378 <param name='selected_cv' value="StratifiedKFold"/> | 385 <param name="n_splits" value="3" /> |
379 <param name="n_splits" value="3"/> | |
380 <param name="shuffle" value="true" /> | 386 <param name="shuffle" value="true" /> |
381 <param name="random_state" value="10"/> | 387 <param name="random_state" value="10" /> |
382 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 388 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
383 <param name="header1" value="true" /> | 389 <param name="header1" value="true" /> |
384 <param name="selected_column_selector_option" value="all_columns"/> | 390 <param name="selected_column_selector_option" value="all_columns" /> |
385 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 391 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
386 <param name="header2" value="true" /> | 392 <param name="header2" value="true" /> |
387 <param name="selected_column_selector_option2" value="all_columns"/> | 393 <param name="selected_column_selector_option2" value="all_columns" /> |
388 <param name="save" value="save_estimator"/> | 394 <param name="save" value="save_estimator" /> |
389 <output name="outfile_object" file="searchCV02" compare="sim_size" delta="10"/> | 395 <output name="outfile_object" file="searchCV02" compare="sim_size" delta="10" /> |
390 </test> | 396 </test> |
391 <test> | 397 <test> |
392 <param name="selected_search_scheme" value="GridSearchCV"/> | 398 <param name="selected_search_algo" value="GridSearchCV" /> |
393 <param name="infile_estimator" value="pipeline03" ftype="zip"/> | 399 <param name="infile_estimator" value="pipeline03" ftype="h5mlm" /> |
394 <param name="infile_params" value="get_params03.tabular" ftype="tabular"/> | 400 <repeat name="param_set"> |
395 <repeat name="param_set"> | 401 <param name="sp_list" value="[10, 50, 200, 1000]" /> |
396 <param name="sp_list" value="[10, 50, 200, 1000]"/> | 402 <param name="sp_name" value="xgbclassifier__n_estimators" /> |
397 <param name="sp_name" value="xgbclassifier__n_estimators"/> | 403 </repeat> |
398 </repeat> | 404 <repeat name="param_set"> |
399 <repeat name="param_set"> | 405 <param name="sp_list" value="[324089]" /> |
400 <param name="sp_list" value="[324089]"/> | 406 <param name="sp_name" value="xgbclassifier__random_state" /> |
401 <param name="sp_name" value="xgbclassifier__random_state"/> | 407 </repeat> |
402 </repeat> | 408 <param name="primary_scoring" value="balanced_accuracy" /> |
403 <param name="primary_scoring" value="balanced_accuracy"/> | 409 <param name='selected_cv' value="StratifiedKFold" /> |
404 <param name='selected_cv' value="StratifiedKFold"/> | 410 <param name="n_splits" value="3" /> |
405 <param name="n_splits" value="3"/> | |
406 <param name="shuffle" value="true" /> | 411 <param name="shuffle" value="true" /> |
407 <param name="random_state" value="10"/> | 412 <param name="random_state" value="10" /> |
408 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 413 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
409 <param name="header1" value="true" /> | 414 <param name="header1" value="true" /> |
410 <param name="selected_column_selector_option" value="all_columns"/> | 415 <param name="selected_column_selector_option" value="all_columns" /> |
411 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 416 <param name="infile2" value="classifier_y.tabular" ftype="tabular" /> |
412 <param name="header2" value="true" /> | 417 <param name="header2" value="true" /> |
413 <param name="selected_column_selector_option2" value="all_columns"/> | 418 <param name="selected_column_selector_option2" value="all_columns" /> |
414 <output name="outfile_result" > | 419 <output name="outfile_result" > |
415 <assert_contents> | 420 <assert_contents> |
416 <has_n_columns n="13" /> | 421 <has_n_columns n="13" /> |
417 <has_text text="0.08719866399898475"/> | 422 <has_text text="0.7927378" /> |
418 </assert_contents> | 423 </assert_contents> |
419 </output> | 424 </output> |
420 </test> | 425 </test> |
421 <test> | 426 <test> |
422 <param name="selected_search_scheme" value="GridSearchCV"/> | 427 <param name="selected_search_algo" value="GridSearchCV" /> |
423 <param name="infile_estimator" value="pipeline02" ftype="zip"/> | 428 <param name="infile_estimator" value="pipeline02" ftype="h5mlm" /> |
424 <param name="infile_params" value="get_params02.tabular" ftype="tabular"/> | 429 <repeat name="param_set"> |
425 <repeat name="param_set"> | 430 <param name="sp_list" value="[0.01, 0.001]" /> |
426 <param name="sp_list" value="[0.01, 0.001]"/> | 431 <param name="sp_name" value="lassocv__eps" /> |
427 <param name="sp_name" value="lassocv__eps"/> | 432 </repeat> |
428 </repeat> | 433 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
429 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 434 <param name="header1" value="true" /> |
430 <param name="header1" value="true" /> | 435 <param name="selected_column_selector_option" value="all_columns" /> |
431 <param name="selected_column_selector_option" value="all_columns"/> | 436 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
432 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 437 <param name="header2" value="true" /> |
433 <param name="header2" value="true" /> | 438 <param name="selected_column_selector_option2" value="all_columns" /> |
434 <param name="selected_column_selector_option2" value="all_columns"/> | 439 <output name="outfile_result"> |
435 <output name="outfile_result"> | 440 <assert_contents> |
436 <assert_contents> | 441 <has_n_columns n="14" /> |
437 <has_n_columns n="12"/> | 442 <has_text text="0.78685093734485" /> |
438 <has_text text="0.776296816136668" /> | 443 </assert_contents> |
439 </assert_contents> | 444 </output> |
440 </output> | 445 </test> |
441 </test> | 446 <test> |
442 <test> | 447 <param name="selected_search_algo" value="GridSearchCV" /> |
443 <param name="selected_search_scheme" value="GridSearchCV"/> | 448 <param name="infile_estimator" value="pipeline05" ftype="h5mlm" /> |
444 <param name="infile_estimator" value="pipeline05" ftype="zip"/> | 449 <repeat name="param_set"> |
445 <param name="infile_params" value="get_params05.tabular" ftype="tabular"/> | 450 <param name="sp_list" value="[10, 50, 100]" /> |
446 <repeat name="param_set"> | 451 <param name="sp_name" value="n_estimators" /> |
447 <param name="sp_list" value="[10, 50, 100, 300]"/> | 452 </repeat> |
448 <param name="sp_name" value="n_estimators"/> | 453 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
449 </repeat> | 454 <param name="header1" value="true" /> |
450 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 455 <param name="selected_column_selector_option" value="all_columns" /> |
451 <param name="header1" value="true" /> | 456 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
452 <param name="selected_column_selector_option" value="all_columns"/> | 457 <param name="header2" value="true" /> |
453 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 458 <param name="selected_column_selector_option2" value="all_columns" /> |
454 <param name="header2" value="true" /> | 459 <output name="outfile_result"> |
455 <param name="selected_column_selector_option2" value="all_columns"/> | 460 <assert_contents> |
456 <output name="outfile_result"> | 461 <has_n_columns n="14" /> |
457 <assert_contents> | 462 <has_text text="0.8101624993383203" /> |
458 <has_n_columns n="12"/> | 463 </assert_contents> |
459 <has_text text="0.8176576686816003" /> | 464 </output> |
460 </assert_contents> | 465 <output name="outfile_object" file="searchCV03" compare="sim_size" delta="10" /> |
461 </output> | |
462 </test> | 466 </test> |
463 <test expect_failure="true"> | 467 <test expect_failure="true"> |
464 <param name="selected_search_scheme" value="GridSearchCV"/> | 468 <param name="selected_search_algo" value="GridSearchCV" /> |
465 <param name="infile_estimator" value="pipeline01" ftype="zip"/> | 469 <param name="infile_estimator" value="pipeline01" ftype="h5mlm" /> |
466 <param name="infile_params" value="get_params01.tabular" ftype="tabular"/> | 470 <repeat name="param_set"> |
467 <repeat name="param_set"> | 471 <param name="sp_list" value="open('~/.ssh/authorized_keys', 'r').read()" /> |
468 <param name="sp_list" value="open('~/.ssh/authorized_keys', 'r').read()"/> | 472 <param name="sp_name" value="svr__C" /> |
469 <param name="sp_name" value="svr__C"/> | 473 </repeat> |
470 </repeat> | 474 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
471 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 475 <param name="header1" value="true" /> |
472 <param name="header1" value="true" /> | 476 <param name="selected_column_selector_option" value="all_columns" /> |
473 <param name="selected_column_selector_option" value="all_columns"/> | 477 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
474 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 478 <param name="header2" value="true" /> |
475 <param name="header2" value="true" /> | 479 <param name="selected_column_selector_option2" value="all_columns" /> |
476 <param name="selected_column_selector_option2" value="all_columns"/> | 480 </test> |
477 </test> | 481 <test> |
478 <test> | 482 <param name="selected_search_algo" value="GridSearchCV" /> |
479 <param name="selected_search_scheme" value="GridSearchCV"/> | 483 <param name="infile_estimator" value="pipeline10" ftype="h5mlm" /> |
480 <param name="infile_estimator" value="pipeline10" ftype="zip"/> | 484 <repeat name="param_set"> |
481 <param name="infile_params" value="get_params10.tabular" ftype="tabular"/> | 485 <param name="sp_list" value=": [sklearn_tree.DecisionTreeRegressor(random_state=0), sklearn_tree.ExtraTreeRegressor(random_state=0)]" /> |
482 <repeat name="param_set"> | 486 <param name="sp_name" value="adaboostregressor__base_estimator" /> |
483 <param name="sp_list" value=": [sklearn_tree.DecisionTreeRegressor(random_state=0), sklearn_tree.ExtraTreeRegressor(random_state=0)]"/> | 487 </repeat> |
484 <param name="sp_name" value="adaboostregressor__base_estimator"/> | 488 <repeat name="param_set"> |
485 </repeat> | 489 <param name="sp_list" value="[10]" /> |
486 <repeat name="param_set"> | 490 <param name="sp_name" value="adaboostregressor__random_state" /> |
487 <param name="sp_list" value="[10]"/> | 491 </repeat> |
488 <param name="sp_name" value="adaboostregressor__random_state"/> | 492 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
489 </repeat> | 493 <param name="header1" value="true" /> |
490 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 494 <param name="selected_column_selector_option" value="all_columns" /> |
491 <param name="header1" value="true" /> | 495 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
492 <param name="selected_column_selector_option" value="all_columns"/> | 496 <param name="header2" value="true" /> |
493 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 497 <param name="selected_column_selector_option2" value="all_columns" /> |
494 <param name="header2" value="true" /> | 498 <output name="outfile_result"> |
495 <param name="selected_column_selector_option2" value="all_columns"/> | 499 <assert_contents> |
496 <output name="outfile_result"> | 500 <has_n_columns n="15" /> |
497 <assert_contents> | 501 <has_text text="0.7981150937087843" /> |
498 <has_n_columns n="13"/> | 502 </assert_contents> |
499 <has_text text="0.8165699136618538"/> | 503 </output> |
500 </assert_contents> | 504 </test> |
501 </output> | 505 <test> |
502 </test> | 506 <param name="selected_search_algo" value="GridSearchCV" /> |
503 <test> | 507 <param name="infile_estimator" value="pipeline09" ftype="h5mlm" /> |
504 <param name="selected_search_scheme" value="GridSearchCV"/> | |
505 <param name="infile_estimator" value="pipeline09" ftype="zip"/> | |
506 <param name="infile_params" value="get_params09.tabular" ftype="tabular"/> | |
507 <repeat name="param_set"> | 508 <repeat name="param_set"> |
508 <param name="sp_list" value=": [sklearn_feature_selection.SelectKBest(), | 509 <param name="sp_list" value=": [sklearn_feature_selection.SelectKBest(), |
509 sklearn_feature_selection.VarianceThreshold(), skrebate_ReliefF(), sklearn_preprocessing.RobustScaler()]"/> | 510 sklearn_feature_selection.VarianceThreshold(), skrebate_ReliefF(), sklearn_preprocessing.RobustScaler()]" /> |
510 <param name="sp_name" value="relieff"/> | 511 <param name="sp_name" value="relieff" /> |
511 </repeat> | 512 </repeat> |
512 <repeat name="param_set"> | 513 <repeat name="param_set"> |
513 <param name="sp_list" value="[10]"/> | 514 <param name="sp_list" value="[10]" /> |
514 <param name="sp_name" value="randomforestregressor__random_state"/> | 515 <param name="sp_name" value="randomforestregressor__random_state" /> |
515 </repeat> | 516 </repeat> |
516 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 517 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
517 <param name="header1" value="true" /> | 518 <param name="header1" value="true" /> |
518 <param name="selected_column_selector_option" value="all_columns"/> | 519 <param name="selected_column_selector_option" value="all_columns" /> |
519 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 520 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
520 <param name="header2" value="true" /> | 521 <param name="header2" value="true" /> |
521 <param name="selected_column_selector_option2" value="all_columns"/> | 522 <param name="selected_column_selector_option2" value="all_columns" /> |
522 <output name="outfile_result"> | 523 <output name="outfile_result"> |
523 <assert_contents> | 524 <assert_contents> |
524 <has_n_columns n="13"/> | 525 <has_n_columns n="15" /> |
525 <has_text text="0.8151250518677202"/> | 526 <has_text text="0.8136054873316014" /> |
526 </assert_contents> | 527 </assert_contents> |
527 </output> | 528 </output> |
528 </test> | 529 </test> |
529 <test> | 530 <test> |
530 <param name="selected_search_scheme" value="GridSearchCV"/> | 531 <param name="selected_search_algo" value="GridSearchCV" /> |
531 <param name="infile_estimator" value="pipeline09" ftype="zip"/> | 532 <param name="infile_estimator" value="pipeline09" ftype="h5mlm" /> |
532 <param name="infile_params" value="get_params09.tabular" ftype="tabular"/> | 533 <repeat name="param_set"> |
533 <repeat name="param_set"> | 534 <param name="sp_list" value=": [None,'sk_prep_all', 7, 13, skrebate_ReliefF(n_features_to_select=12)]" /> |
534 <param name="sp_list" value=": [None,'sk_prep_all', 7, 13, skrebate_ReliefF(n_features_to_select=12)]"/> | 535 <param name="sp_name" value="relieff" /> |
535 <param name="sp_name" value="relieff"/> | 536 </repeat> |
536 </repeat> | 537 <repeat name="param_set"> |
537 <repeat name="param_set"> | 538 <param name="sp_list" value="[10]" /> |
538 <param name="sp_list" value="[10]"/> | 539 <param name="sp_name" value="randomforestregressor__random_state" /> |
539 <param name="sp_name" value="randomforestregressor__random_state"/> | 540 </repeat> |
540 </repeat> | 541 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
541 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 542 <param name="header1" value="true" /> |
542 <param name="header1" value="true" /> | 543 <param name="selected_column_selector_option" value="all_columns" /> |
543 <param name="selected_column_selector_option" value="all_columns"/> | 544 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
544 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 545 <param name="header2" value="true" /> |
545 <param name="header2" value="true" /> | 546 <param name="selected_column_selector_option2" value="all_columns" /> |
546 <param name="selected_column_selector_option2" value="all_columns"/> | 547 <output name="outfile_result"> |
547 <output name="outfile_result"> | 548 <assert_contents> |
548 <assert_contents> | 549 <has_n_columns n="15" /> |
549 <has_n_columns n="13"/> | 550 <has_text text="0.8137203241980131" /> |
550 <has_text text="0.8151250518677202"/> | 551 </assert_contents> |
551 </assert_contents> | 552 </output> |
552 </output> | 553 </test> |
553 </test> | 554 <test> |
554 <test> | 555 <param name="selected_search_algo" value="GridSearchCV" /> |
555 <param name="selected_search_scheme" value="GridSearchCV"/> | 556 <param name="infile_estimator" value="pipeline11" ftype="h5mlm" /> |
556 <param name="infile_estimator" value="pipeline11" ftype="zip"/> | 557 <repeat name="param_set"> |
557 <param name="infile_params" value="get_params11.tabular" ftype="tabular"/> | 558 <param name="sp_list" value="[3,4,5]" /> |
558 <repeat name="param_set"> | 559 <param name="sp_name" value="editednearestneighbours__n_neighbors" /> |
559 <param name="sp_list" value="[3,4,5]"/> | 560 </repeat> |
560 <param name="sp_name" value="editednearestneighbours__n_neighbors"/> | 561 <repeat name="param_set"> |
561 </repeat> | 562 <param name="sp_list" value="[10, 50, 100, 500]" /> |
562 <repeat name="param_set"> | 563 <param name="sp_name" value="randomforestclassifier__n_estimators" /> |
563 <param name="sp_list" value="[10]"/> | 564 </repeat> |
564 <param name="sp_name" value="editednearestneighbours__random_state"/> | 565 <repeat name="param_set"> |
565 </repeat> | 566 <param name="sp_list" value="[10]" /> |
566 <repeat name="param_set"> | 567 <param name="sp_name" value="randomforestclassifier__random_state" /> |
567 <param name="sp_list" value="[10, 50, 100, 500]"/> | 568 </repeat> |
568 <param name="sp_name" value="randomforestclassifier__n_estimators"/> | 569 <param name="primary_scoring" value="f1_macro" /> |
569 </repeat> | 570 <param name="secondary_scoring" value="balanced_accuracy,accuracy" /> |
570 <repeat name="param_set"> | 571 <param name="n_splits" value="5" /> |
571 <param name="sp_list" value="[10]"/> | 572 <param name="infile1" value="imblearn_X.tabular" ftype="tabular" /> |
572 <param name="sp_name" value="randomforestclassifier__random_state"/> | 573 <param name="header1" value="true" /> |
573 </repeat> | 574 <param name="selected_column_selector_option" value="all_columns" /> |
574 <param name="primary_scoring" value="f1_macro"/> | 575 <param name="infile2" value="imblearn_y.tabular" ftype="tabular" /> |
575 <param name="secondary_scoring" value="balanced_accuracy,accuracy"/> | 576 <param name="header2" value="true" /> |
576 <param name="n_splits" value="5"/> | 577 <param name="selected_column_selector_option2" value="all_columns" /> |
577 <param name="infile1" value="imblearn_X.tabular" ftype="tabular"/> | 578 <output name="outfile_result"> |
578 <param name="header1" value="true" /> | 579 <assert_contents> |
579 <param name="selected_column_selector_option" value="all_columns"/> | 580 <has_n_columns n="32" /> |
580 <param name="infile2" value="imblearn_y.tabular" ftype="tabular"/> | 581 <has_text text="0.9945648481554453" /> |
581 <param name="header2" value="true" /> | 582 <has_text text="0.9988888888888889" /> |
582 <param name="selected_column_selector_option2" value="all_columns"/> | 583 <has_text text="0.998" /> |
583 <output name="outfile_result"> | 584 </assert_contents> |
584 <assert_contents> | 585 </output> |
585 <has_n_columns n="33"/> | 586 </test> |
586 <has_text text="0.9945648481554453"/> | 587 <test> |
587 <has_text text="0.9988888888888889"/> | 588 <param name="selected_search_algo" value="GridSearchCV" /> |
588 <has_text text="0.998"/> | 589 <param name="infile_estimator" value="pipeline12" ftype="h5mlm" /> |
589 </assert_contents> | 590 <repeat name="param_set"> |
590 </output> | 591 <param name="sp_list" value="[10, 100, 200]" /> |
591 </test> | 592 <param name="sp_name" value="estimator__n_estimators" /> |
592 <test> | 593 </repeat> |
593 <param name="selected_search_scheme" value="GridSearchCV"/> | 594 <repeat name="param_set"> |
594 <param name="infile_estimator" value="pipeline12" ftype="zip"/> | 595 <param name="sp_list" value="[10, None]" /> |
595 <param name="infile_params" value="get_params12.tabular" ftype="tabular"/> | 596 <param name="sp_name" value="n_features_to_select" /> |
596 <repeat name="param_set"> | 597 </repeat> |
597 <param name="sp_list" value="[10, 100, 200]"/> | 598 <param name="primary_scoring" value="r2" /> |
598 <param name="sp_name" value="estimator__n_estimators"/> | 599 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
599 </repeat> | 600 <param name="header1" value="true" /> |
600 <repeat name="param_set"> | 601 <param name="selected_column_selector_option" value="all_columns" /> |
601 <param name="sp_list" value="[10, None]"/> | 602 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
602 <param name="sp_name" value="n_features_to_select"/> | 603 <param name="header2" value="true" /> |
603 </repeat> | 604 <param name="selected_column_selector_option2" value="all_columns" /> |
604 <param name="primary_scoring" value="r2"/> | 605 <output name="outfile_result"> |
605 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 606 <assert_contents> |
606 <param name="header1" value="true" /> | 607 <has_n_columns n="15" /> |
607 <param name="selected_column_selector_option" value="all_columns"/> | 608 <has_text text="0.78510" /> |
608 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 609 </assert_contents> |
609 <param name="header2" value="true" /> | 610 </output> |
610 <param name="selected_column_selector_option2" value="all_columns"/> | 611 </test> |
611 <output name="outfile_result"> | 612 <test> |
612 <assert_contents> | 613 <conditional name="search_algos"> |
613 <has_n_columns n="13"/> | 614 <param name="selected_search_algo" value="GridSearchCV" /> |
614 <has_text text="0.8149439619875293"/> | |
615 </assert_contents> | |
616 </output> | |
617 </test> | |
618 <test> | |
619 <conditional name="search_schemes"> | |
620 <param name="selected_search_scheme" value="GridSearchCV"/> | |
621 <param name="infile_estimator" value="pipeline05" ftype="zip"/> | |
622 <section name="search_params_builder"> | |
623 <param name="infile_params" value="get_params05.tabular" ftype="tabular"/> | |
624 <repeat name="param_set"> | |
625 <param name="sp_list" value="[10, 50, 100, 300]"/> | |
626 <param name="sp_name" value="n_estimators"/> | |
627 </repeat> | |
628 </section> | |
629 </conditional> | 615 </conditional> |
630 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 616 <param name="infile_estimator" value="pipeline05" ftype="h5mlm" /> |
631 <param name="header1" value="true" /> | 617 <section name="search_params_builder"> |
632 <param name="selected_column_selector_option" value="all_columns"/> | 618 <repeat name="param_set"> |
633 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 619 <param name="sp_list" value="[10, 50, 100, 300]" /> |
634 <param name="header2" value="true" /> | 620 <param name="sp_name" value="n_estimators" /> |
635 <param name="selected_column_selector_option2" value="all_columns"/> | 621 </repeat> |
622 </section> | |
623 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> | |
624 <param name="header1" value="true" /> | |
625 <param name="selected_column_selector_option" value="all_columns" /> | |
626 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> | |
627 <param name="header2" value="true" /> | |
628 <param name="selected_column_selector_option2" value="all_columns" /> | |
636 <conditional name="outer_split"> | 629 <conditional name="outer_split"> |
637 <param name="split_mode" value="nested_cv"/> | 630 <param name="split_mode" value="nested_cv" /> |
638 <conditional name="cv_selector"> | 631 <conditional name="cv_selector"> |
639 <param name='selected_cv' value="KFold"/> | 632 <param name='selected_cv' value="KFold" /> |
640 <param name="n_splits" value="3"/> | 633 <param name="n_splits" value="3" /> |
641 <param name="shuffle" value="true" /> | 634 <param name="shuffle" value="true" /> |
642 <param name="random_state" value="123"/> | 635 <param name="random_state" value="123" /> |
643 </conditional> | 636 </conditional> |
644 </conditional> | 637 </conditional> |
645 <output name="outfile_result"> | 638 <output name="outfile_result"> |
646 <assert_contents> | 639 <assert_contents> |
647 <has_n_columns n="4"/> | 640 <has_n_columns n="4" /> |
648 <has_text text="0.8044418936007722" /> | 641 <has_text text="0.8355084087564864" /> |
649 </assert_contents> | 642 </assert_contents> |
650 </output> | 643 </output> |
651 </test> | 644 </test> |
652 </tests> | 645 </tests> |
653 <help> | 646 <help> |
677 | 670 |
678 - np_arange(0.01, 1, 0.1) | 671 - np_arange(0.01, 1, 0.1) |
679 | 672 |
680 - np_random_choice(list(range(1, 51)) + [None], size=20) | 673 - np_random_choice(list(range(1, 51)) + [None], size=20) |
681 | 674 |
682 - scipy_stats_randin(1, 11) | 675 - scipy_stats_randint(1, 11) |
683 | 676 |
684 **Estimator / Preprocessor search (additional `:` in the front)**:: | 677 **Estimator / Preprocessor search (additional `:` in the front)**:: |
685 | 678 |
686 : [sklearn_tree.DecisionTreeRegressor(), sklearn_tree.ExtraTreeRegressor()] | 679 : [sklearn_tree.DecisionTreeRegressor(), sklearn_tree.ExtraTreeRegressor()] |
687 | 680 |
785 .. _`https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation`: https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation | 778 .. _`https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation`: https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation |
786 | 779 |
787 ]]> | 780 ]]> |
788 </help> | 781 </help> |
789 <expand macro="sklearn_citation"> | 782 <expand macro="sklearn_citation"> |
790 <expand macro="skrebate_citation"/> | 783 <expand macro="skrebate_citation" /> |
791 <expand macro="xgboost_citation"/> | 784 <expand macro="xgboost_citation" /> |
792 <expand macro="imblearn_citation"/> | 785 <expand macro="imblearn_citation" /> |
793 </expand> | 786 </expand> |
794 </tool> | 787 </tool> |