view stacking_ensembles.xml @ 5:3dc6734056e6 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
author bgruening
date Fri, 01 Nov 2019 17:36:17 -0400
parents 22560cf810b8
children a2e4a45c6083
line wrap: on
line source

<tool id="sklearn_stacking_ensemble_models" name="Stacking Ensemble Models" version="@ENSEMBLE_VERSION@.1">
    <description>builds a strong model by stacking multiple algorithms</description>
    <macros>
        <import>main_macros.xml</import>
    </macros>
    <expand macro="python_requirements"/>
    <expand macro="macro_stdio"/>
    <version_command>echo "$ENSEMBLE_VERSION"</version_command>
    <command>
        <![CDATA[
        #for $i, $base in enumerate($base_est_builder)
        #if $i == 0
            #if $base.estimator_selector.selected_module == 'custom_estimator'
            bases='${base.estimator_selector.c_estimator}';
            #else
            bases='None';
            #end if
        #elif $base.estimator_selector.selected_module == 'custom_estimator'
        bases="\$bases,${base.estimator_selector.c_estimator}";
        #else
        bases="\$bases,None";
        #end if
        #end for
        python '$__tool_directory__/stacking_ensembles.py'
            --inputs '$inputs'
            --outfile '$outfile'
            --bases "\$bases"
            #if $algo_selection.estimator_type not in ('sklearn.ensemble_VotingClassifier', 'sklearn.ensemble_VotingRegressor')
            #if $algo_selection.meta_estimator.estimator_selector.selected_module == 'custom_estimator'
            --meta '${algo_selection.meta_estimator.estimator_selector.c_estimator}'
            #end if
            #end if
            #if $get_params
            --outfile_params '$outfile_params'
            #end if
        ]]>
    </command>
    <configfiles>
        <inputs name="inputs" />
    </configfiles>
    <inputs>
        <conditional name="algo_selection">
            <param name="estimator_type" type="select" label="Choose the stacking ensemble type">
                <option value="sklearn.ensemble_VotingClassifier" selected="true">sklearn.ensemble -- VotingClassifier</option>
                <option value="sklearn.ensemble_VotingRegressor">sklearn.ensemble -- VotingRegressor</option>
                <option value="mlxtend.classifier_StackingCVClassifier">mlxtend.classifier -- StackingCVClassifier</option>
                <option value="mlxtend.classifier_StackingClassifier">mlxtend.classifier -- StackingClassifier</option>
                <option value="mlxtend.regressor_StackingCVRegressor">mlxtend.regressor -- StackingCVRegressor</option>
                <option value="mlxtend.regressor_StackingRegressor">mlxtend.regressor -- StackingRegressor</option>
            </param>
            <when value="sklearn.ensemble_VotingClassifier">
                <expand macro="stacking_voting_weights">
                    <param argument="voting" type="select" help="If ‘hard’, uses predicted class labels for majority rule voting. Else if ‘soft’, predicts the class label based on the argmax of the sums of the predicted probabilities, which is recommended for an ensemble of well-calibrated classifiers.">
                        <option value="hard" selected="true">hard</option>
                        <option value="soft">soft</option>
                    </param>
                    <param argument="flatten_transform" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" help=""/>
                </expand>
            </when>
            <when value="sklearn.ensemble_VotingRegressor">
                <expand macro="stacking_voting_weights"/>
            </when>
            <when value="mlxtend.classifier_StackingCVClassifier">
                <expand macro="stacking_ensemble_inputs">
                    <expand macro="cv_reduced"/>
                    <expand macro="shuffle" label="shuffle"/>
                    <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data."/>
                    <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/>
                </expand>
                <section name="meta_estimator" title="Meta Estimator" expanded="true">
                    <expand macro="stacking_base_estimator"/>
                </section>
            </when>
            <when value="mlxtend.classifier_StackingClassifier">
                <expand macro="stacking_ensemble_inputs">
                    <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/>
                    <param argument="average_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/>
                </expand>
                <section name="meta_estimator" title="Meta Estimator" expanded="true">
                    <expand macro="stacking_base_estimator"/>
                </section>
            </when>
            <when value="mlxtend.regressor_StackingCVRegressor">
                <expand macro="stacking_ensemble_inputs">
                    <expand macro="cv_reduced"/>
                    <!--TODO support group splitters. Hint: `groups` is a fit_param-->
                    <expand macro="shuffle" label="shuffle"/>
                    <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data."/>
                    <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true"/>
                </expand>
                <section name="meta_estimator" title="Meta Estimator" expanded="true">
                    <expand macro="stacking_base_estimator"/>
                </section>
            </when>
            <when value="mlxtend.regressor_StackingRegressor">
                <expand macro="stacking_ensemble_inputs">
                    <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true"/>
                </expand>
                <section name="meta_estimator" title="Meta Estimator" expanded="true">
                    <expand macro="stacking_base_estimator"/>
                </section>
            </when>
        </conditional>
        <repeat name="base_est_builder" min="1" max="20" title="Base Estimator">
            <expand macro="stacking_base_estimator"/>
            <!--param name="base_estimator" type="data" format="zip,json" label="Select the dataset containing base estimator" help="One estimator at a time."/-->
        </repeat>
        <!--param name="meta_estimator" type="data" format="zip,json" label="Select the dataset containing the Meta estimator"/-->
        <param name="get_params" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Output parameters for searchCV?"
                help="Optional. Tunable parameters could be obtained through `estimator_attributes` tool."/>
    </inputs>
    <outputs>
        <data format="zip" name="outfile" label="${algo_selection.estimator_type} on ${on_string}"/>
        <data format="tabular" name="outfile_params" label="get_params for ${algo_selection.estimator_type}">
            <filter>get_params</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <conditional name="algo_selection">
                <param name="estimator_type" value="sklearn.ensemble_VotingClassifier"/>
                <section name="options">
                    <param name="weights" value="[1, 2]"/>
                </section>
            </conditional>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="svm"/>
                    <param name="selected_estimator" value="SVC"/>
                </conditional>
            </repeat>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="xgboost"/>
                    <param name="selected_estimator" value="XGBClassifier"/>
                </conditional>
            </repeat>
            <param name="get_params" value="false"/>
            <output name="outfile" file="StackingVoting03.zip" compare="sim_size" delta="5"/>
        </test>
        <test>
            <conditional name="algo_selection">
                <param name="estimator_type" value="mlxtend.regressor_StackingCVRegressor"/>
                <section name="meta_estimator">
                    <conditional name="estimator_selector">
                        <param name="selected_module" value="custom_estimator"/>
                        <param name="c_estimator" value="LinearRegression01.zip" ftype="zip"/>
                    </conditional>
                </section>
            </conditional>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="custom_estimator"/>
                    <param name="c_estimator" value="RandomForestRegressor01.zip" ftype="zip"/>
                </conditional>
            </repeat>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="custom_estimator"/>
                    <param name="c_estimator" value="XGBRegressor01.zip" ftype="zip"/>
                </conditional>
            </repeat>
            <param name="get_params" value="false"/>
            <output name="outfile" file="StackingCVRegressor01.zip" compare="sim_size" delta="5"/>
        </test>
        <test>
            <conditional name="algo_selection">
                <param name="estimator_type" value="mlxtend.regressor_StackingRegressor"/>
                <section name="meta_estimator">
                    <conditional name="estimator_selector">
                        <param name="selected_module" value="svm"/>
                        <param name="selected_estimator" value="SVR"/>
                    </conditional>
                </section>
            </conditional>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="custom_estimator"/>
                    <param name="c_estimator" value="RandomForestRegressor01.zip" ftype="zip"/>
                </conditional>
            </repeat>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="xgboost"/>
                    <param name="selected_estimator" value="XGBRegressor"/>
                </conditional>
            </repeat>
            <param name="get_params" value="false"/>
            <output name="outfile" file="StackingRegressor02.zip" compare="sim_size" delta="5"/>
        </test>
    </tests>
    <help>
        <![CDATA[
This tool wrapps Stacking Regression, also called Super Learning, in which different base algorithms train
on the original dataset and predict results respectively, a second level of `metalearner` fits on the previous
prediction results to ensemble a strong learner.
Refer to `http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction`_.

.. _`http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction`:
 http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction

        ]]>
    </help>
    <expand macro="sklearn_citation">
        <expand macro="skrebate_citation"/>
        <expand macro="xgboost_citation"/>
        <expand macro="imblearn_citation"/>
        <citation type="bibtex">
            @article{raschkas_2018_mlxtend,
                author       = {Sebastian Raschka},
                title        = {MLxtend: Providing machine learning and data science 
                                                utilities and extensions to Python’s  
                                                scientific computing stack},
                journal      = {The Journal of Open Source Software},
                volume       = {3},
                number       = {24},
                month        = apr,
                year         = 2018,
                publisher    = {The Open Journal},
                doi          = {10.21105/joss.00638},
                url          = {http://joss.theoj.org/papers/10.21105/joss.00638}
            }
        </citation>
    </expand>
</tool>