view stacking_ensembles.xml @ 15:b94babda32e4 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit f031d8ddfb73cec24572648666ac44ee47f08aad
author bgruening
date Thu, 11 Aug 2022 09:11:27 +0000
parents ac40a2fe5750
children
line wrap: on
line source

<tool id="sklearn_stacking_ensemble_models" name="Stacking Ensembles" version="@VERSION@" profile="20.05">
    <description>builds stacking, voting ensemble models with numerous base options</description>
    <macros>
        <import>main_macros.xml</import>
    </macros>
    <expand macro="python_requirements" />
    <expand macro="macro_stdio" />
    <version_command>echo "$ENSEMBLE_VERSION"</version_command>
    <command>
        <![CDATA[
        #for $i, $base in enumerate($base_est_builder)
        #if $i == 0
            #if $base.estimator_selector.selected_module == 'custom_estimator'
            bases='${base.estimator_selector.c_estimator}';
            #else
            bases='None';
            #end if
        #elif $base.estimator_selector.selected_module == 'custom_estimator'
        bases="\$bases,${base.estimator_selector.c_estimator}";
        #else
        bases="\$bases,None";
        #end if
        #end for
        python '$__tool_directory__/stacking_ensembles.py'
            --inputs '$inputs'
            --outfile '$outfile'
            --bases "\$bases"
            #if $algo_selection.estimator_type not in ('sklearn.ensemble_VotingClassifier', 'sklearn.ensemble_VotingRegressor')
            #if $algo_selection.meta_estimator.estimator_selector.selected_module == 'custom_estimator'
            --meta '${algo_selection.meta_estimator.estimator_selector.c_estimator}'
            #end if
            #end if
            #if $get_params
            --outfile_params '$outfile_params'
            #end if
        ]]>
    </command>
    <configfiles>
        <inputs name="inputs" />
    </configfiles>
    <inputs>
        <conditional name="algo_selection">
            <param name="estimator_type" type="select" label="Choose the stacking ensemble type">
                <option value="sklearn.ensemble_VotingClassifier" selected="true">sklearn.ensemble -- VotingClassifier</option>
                <option value="sklearn.ensemble_VotingRegressor">sklearn.ensemble -- VotingRegressor</option>
                <option value="mlxtend.classifier_StackingCVClassifier">mlxtend.classifier -- StackingCVClassifier</option>
                <option value="mlxtend.classifier_StackingClassifier">mlxtend.classifier -- StackingClassifier</option>
                <option value="mlxtend.regressor_StackingCVRegressor">mlxtend.regressor -- StackingCVRegressor</option>
                <option value="mlxtend.regressor_StackingRegressor">mlxtend.regressor -- StackingRegressor</option>
            </param>
            <when value="sklearn.ensemble_VotingClassifier">
                <expand macro="stacking_voting_weights">
                    <param argument="voting" type="select" help="If ‘hard’, uses predicted class labels for majority rule voting. Else if ‘soft’, predicts the class label based on the argmax of the sums of the predicted probabilities, which is recommended for an ensemble of well-calibrated classifiers.">
                        <option value="hard" selected="true">hard</option>
                        <option value="soft">soft</option>
                    </param>
                    <param argument="flatten_transform" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" help="" />
                </expand>
            </when>
            <when value="sklearn.ensemble_VotingRegressor">
                <expand macro="stacking_voting_weights" />
            </when>
            <when value="mlxtend.classifier_StackingCVClassifier">
                <expand macro="stacking_ensemble_inputs">
                    <expand macro="cv_reduced" />
                    <expand macro="shuffle" label="shuffle" />
                    <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data." />
                    <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
                </expand>
                <section name="meta_estimator" title="Meta Estimator" expanded="true">
                    <expand macro="stacking_base_estimator" />
                </section>
            </when>
            <when value="mlxtend.classifier_StackingClassifier">
                <expand macro="stacking_ensemble_inputs">
                    <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
                    <param argument="average_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" />
                </expand>
                <section name="meta_estimator" title="Meta Estimator" expanded="true">
                    <expand macro="stacking_base_estimator" />
                </section>
            </when>
            <when value="mlxtend.regressor_StackingCVRegressor">
                <expand macro="stacking_ensemble_inputs">
                    <expand macro="cv_reduced" />
                    <!--TODO support group splitters. Hint: `groups` is a fit_param-->
                    <expand macro="shuffle" label="shuffle" />
                    <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data." />
                    <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" />
                </expand>
                <section name="meta_estimator" title="Meta Estimator" expanded="true">
                    <expand macro="stacking_base_estimator" />
                </section>
            </when>
            <when value="mlxtend.regressor_StackingRegressor">
                <expand macro="stacking_ensemble_inputs">
                    <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" />
                </expand>
                <section name="meta_estimator" title="Meta Estimator" expanded="true">
                    <expand macro="stacking_base_estimator" />
                </section>
            </when>
        </conditional>
        <repeat name="base_est_builder" min="1" max="20" title="Base Estimator">
            <expand macro="stacking_base_estimator" />
            <!--param name="base_estimator" type="data" format="zip,json" label="Select the dataset containing base estimator" help="One estimator at a time."/-->
        </repeat>
        <!--param name="meta_estimator" type="data" format="zip,json" label="Select the dataset containing the Meta estimator"/-->
        <param name="get_params" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Output parameters for searchCV?" help="Optional. Tunable parameters could be obtained through `estimator_attributes` tool." />
    </inputs>
    <outputs>
        <data format="zip" name="outfile" label="${algo_selection.estimator_type} on ${on_string}" />
        <data format="tabular" name="outfile_params" label="get_params for ${algo_selection.estimator_type}">
            <filter>get_params</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <conditional name="algo_selection">
                <param name="estimator_type" value="sklearn.ensemble_VotingClassifier" />
                <section name="options">
                    <param name="weights" value="[1, 2]" />
                </section>
            </conditional>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="svm" />
                    <param name="selected_estimator" value="SVC" />
                </conditional>
            </repeat>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="xgboost" />
                    <param name="selected_estimator" value="XGBClassifier" />
                </conditional>
            </repeat>
            <param name="get_params" value="false" />
            <output name="outfile" file="StackingVoting03.zip" compare="sim_size" delta="5" />
        </test>
        <test>
            <conditional name="algo_selection">
                <param name="estimator_type" value="mlxtend.regressor_StackingCVRegressor" />
                <section name="meta_estimator">
                    <conditional name="estimator_selector">
                        <param name="selected_module" value="custom_estimator" />
                        <param name="c_estimator" value="LinearRegression01.zip" ftype="zip" />
                    </conditional>
                </section>
            </conditional>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="custom_estimator" />
                    <param name="c_estimator" value="RandomForestRegressor01.zip" ftype="zip" />
                </conditional>
            </repeat>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="custom_estimator" />
                    <param name="c_estimator" value="XGBRegressor01.zip" ftype="zip" />
                </conditional>
            </repeat>
            <param name="get_params" value="false" />
            <output name="outfile" file="StackingCVRegressor01.zip" compare="sim_size" delta="5" />
        </test>
        <test>
            <conditional name="algo_selection">
                <param name="estimator_type" value="mlxtend.regressor_StackingRegressor" />
                <section name="meta_estimator">
                    <conditional name="estimator_selector">
                        <param name="selected_module" value="svm" />
                        <param name="selected_estimator" value="SVR" />
                    </conditional>
                </section>
            </conditional>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="custom_estimator" />
                    <param name="c_estimator" value="RandomForestRegressor01.zip" ftype="zip" />
                </conditional>
            </repeat>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="xgboost" />
                    <param name="selected_estimator" value="XGBRegressor" />
                </conditional>
            </repeat>
            <param name="get_params" value="false" />
            <output name="outfile" file="StackingRegressor02.zip" compare="sim_size" delta="5" />
        </test>
    </tests>
    <help>
        <![CDATA[
This tool wrapps Stacking Regression, also called Super Learning, in which different base algorithms train
on the original dataset and predict results respectively, a second level of `metalearner` fits on the previous
prediction results to ensemble a strong learner.
Refer to `http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction`_.

.. _`http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction`:
 http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction

        ]]>
    </help>
    <expand macro="sklearn_citation">
        <expand macro="skrebate_citation" />
        <expand macro="xgboost_citation" />
        <expand macro="imblearn_citation" />
        <citation type="bibtex">
            @article{raschkas_2018_mlxtend,
                author       = {Sebastian Raschka},
                title        = {MLxtend: Providing machine learning and data science 
                                                utilities and extensions to Python’s  
                                                scientific computing stack},
                journal      = {The Journal of Open Source Software},
                volume       = {3},
                number       = {24},
                month        = apr,
                year         = 2018,
                publisher    = {The Open Journal},
                doi          = {10.21105/joss.00638},
                url          = {http://joss.theoj.org/papers/10.21105/joss.00638}
            }
        </citation>
    </expand>
</tool>