Mercurial > repos > bgruening > sklearn_stacking_ensemble_models
diff stacking_ensembles.xml @ 0:fcc5eaaec401 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ab963ec9498bd05d2fb2f24f75adb2fccae7958c
author | bgruening |
---|---|
date | Wed, 15 May 2019 07:25:29 -0400 |
parents | |
children | 22560cf810b8 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/stacking_ensembles.xml Wed May 15 07:25:29 2019 -0400 @@ -0,0 +1,178 @@ +<tool id="sklearn_stacking_ensemble_models" name="Stacking Ensemble Models" version="0.1.0"> + <description>builds a strong model by stacking multiple algorithms</description> + <macros> + <import>main_macros.xml</import> + </macros> + <expand macro="python_requirements"/> + <expand macro="macro_stdio"/> + <version_command>echo "$version"</version_command> + <command> + <![CDATA[ + #for $i, $base in enumerate($base_est_builder) + #if $i == 0 + #if $base.estimator_selector.selected_module == 'custom_estimator' + bases='${base.estimator_selector.c_estimator}'; + #else + bases='None'; + #end if + #elif $base.estimator_selector.selected_module == 'custom_estimator' + bases="\$bases,${base.estimator_selector.c_estimator}"; + #else + bases="\$bases,None"; + #end if + #end for + python '$__tool_directory__/stacking_ensembles.py' + --inputs '$inputs' + --outfile '$outfile' + --bases "\$bases" + #if $meta_estimator.estimator_selector.selected_module == 'custom_estimator' + --meta '${meta_estimator.estimator_selector.c_estimator}' + #end if + #if $get_params + --outfile_params '$outfile_params' + #end if + ]]> + </command> + <configfiles> + <inputs name="inputs" /> + </configfiles> + <inputs> + <conditional name="algo_selection"> + <param name="estimator_type" type="select" label="Choose the stacking ensemble type"> + <option value="StackingCVClassifier" selected="true">classification -- StackingCVClassifier</option> + <option value="StackingClassifier">classification -- StackingClassifier</option> + <option value="StackingCVRegressor">regression -- StackingCVRegressor</option> + <option value="StackingRegressor">regression -- StackingRegressor</option> + </param> + <when value="StackingCVClassifier"> + <expand macro="stacking_ensemble_inputs"> + <expand macro="cv_reduced"/> + <expand macro="shuffle" label="shuffle"/> + <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data."/> + <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/> + </expand> + </when> + <when value="StackingClassifier"> + <expand macro="stacking_ensemble_inputs"> + <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/> + <param argument="average_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/> + </expand> + </when> + <when value="StackingCVRegressor"> + <expand macro="stacking_ensemble_inputs"> + <expand macro="cv_reduced"/> + <!--TODO support group splitters. Hint: `groups` is a fit_param--> + <expand macro="shuffle" label="shuffle"/> + <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data."/> + <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true"/> + </expand> + </when> + <when value="StackingRegressor"> + <expand macro="stacking_ensemble_inputs"> + <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true"/> + </expand> + </when> + </conditional> + <repeat name="base_est_builder" min="1" max="20" title="Base Estimator"> + <expand macro="stacking_base_estimator"/> + <!--param name="base_estimator" type="data" format="zip,json" label="Select the dataset containing base estimator" help="One estimator at a time."/--> + </repeat> + <!--param name="meta_estimator" type="data" format="zip,json" label="Select the dataset containing the Meta estimator"/--> + <section name="meta_estimator" title="Meta Estimator" expanded="true"> + <expand macro="stacking_base_estimator"/> + </section> + <param name="get_params" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Output parameters for searchCV?" + help="Optional. Tunable parameters could be obtained through `estimator_attributes` tool."/> + </inputs> + <outputs> + <data format="zip" name="outfile" label="${algo_selection.estimator_type} on ${on_string}"/> + <data format="tabular" name="outfile_params" label="get_params for ${algo_selection.estimator_type}"> + <filter>get_params</filter> + </data> + </outputs> + <tests> + <test> + <conditional name="algo_selection"> + <param name="estimator_type" value="StackingCVRegressor"/> + </conditional> + <repeat name="base_est_builder"> + <conditional name="estimator_selector"> + <param name="selected_module" value="custom_estimator"/> + <param name="c_estimator" value="RandomForestRegressor01.zip" ftype="zip"/> + </conditional> + </repeat> + <repeat name="base_est_builder"> + <conditional name="estimator_selector"> + <param name="selected_module" value="custom_estimator"/> + <param name="c_estimator" value="XGBRegressor01.zip" ftype="zip"/> + </conditional> + </repeat> + <section name="meta_estimator"> + <conditional name="estimator_selector"> + <param name="selected_module" value="custom_estimator"/> + <param name="c_estimator" value="LinearRegression01.zip" ftype="zip"/> + </conditional> + </section> + <param name="get_params" value="false"/> + <output name="outfile" file="StackingCVRegressor01.zip" compare="sim_size" delta="5"/> + </test> + <test> + <conditional name="algo_selection"> + <param name="estimator_type" value="StackingCVRegressor"/> + </conditional> + <repeat name="base_est_builder"> + <conditional name="estimator_selector"> + <param name="selected_module" value="custom_estimator"/> + <param name="c_estimator" value="RandomForestRegressor01.zip" ftype="zip"/> + </conditional> + </repeat> + <repeat name="base_est_builder"> + <conditional name="estimator_selector"> + <param name="selected_module" value="xgboost"/> + <param name="selected_estimator" value="XGBRegressor"/> + </conditional> + </repeat> + <section name="meta_estimator"> + <conditional name="estimator_selector"> + <param name="selected_module" value="svm"/> + <param name="selected_estimator" value="SVR"/> + </conditional> + </section> + <param name="get_params" value="false"/> + <output name="outfile" file="StackingCVRegressor02.zip" compare="sim_size" delta="5"/> + </test> + </tests> + <help> + <![CDATA[ +This tool wrapps Stacking Regression, also called Super Learning, in which different base algorithms train +on the original dataset and predict results respectively, a second level of `metalearner` fits on the previous +prediction results to ensemble a strong learner. +Refer to `http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction`_. + +.. _`http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction`: + http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction + + ]]> + </help> + <expand macro="sklearn_citation"> + <expand macro="skrebate_citation"/> + <expand macro="xgboost_citation"/> + <expand macro="imblearn_citation"/> + <citation type="bibtex"> + @article{raschkas_2018_mlxtend, + author = {Sebastian Raschka}, + title = {MLxtend: Providing machine learning and data science + utilities and extensions to Python’s + scientific computing stack}, + journal = {The Journal of Open Source Software}, + volume = {3}, + number = {24}, + month = apr, + year = 2018, + publisher = {The Open Journal}, + doi = {10.21105/joss.00638}, + url = {http://joss.theoj.org/papers/10.21105/joss.00638} + } + </citation> + </expand> +</tool>