view stacking_ensembles.xml @ 0:8e93241d5d28 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit c0a3a186966888e5787335a7628bf0a4382637e7
author bgruening
date Tue, 14 May 2019 18:04:46 -0400
parents
children
line wrap: on
line source

<tool id="stacking_ensemble_models" name="Stacking Ensemble Models" version="0.1.0">
    <description>builds a strong model by stacking multiple algorithms</description>
    <macros>
        <import>main_macros.xml</import>
    </macros>
    <expand macro="python_requirements"/>
    <expand macro="macro_stdio"/>
    <version_command>echo "$version"</version_command>
    <command>
        <![CDATA[
        #for $i, $base in enumerate($base_est_builder)
        #if $i == 0
            #if $base.estimator_selector.selected_module == 'custom_estimator'
            bases='${base.estimator_selector.c_estimator}';
            #else
            bases='None';
            #end if
        #elif $base.estimator_selector.selected_module == 'custom_estimator'
        bases="\$bases,${base.estimator_selector.c_estimator}";
        #else
        bases="\$bases,None";
        #end if
        #end for
        python '$__tool_directory__/stacking_ensembles.py'
            --inputs '$inputs'
            --outfile '$outfile'
            --bases "\$bases"
            #if $meta_estimator.estimator_selector.selected_module == 'custom_estimator'
            --meta '${meta_estimator.estimator_selector.c_estimator}'
            #end if
            #if $get_params
            --outfile_params '$outfile_params'
            #end if
        ]]>
    </command>
    <configfiles>
        <inputs name="inputs" />
    </configfiles>
    <inputs>
        <conditional name="algo_selection">
            <param name="estimator_type" type="select" label="Choose the stacking ensemble type">
                <option value="StackingCVClassifier" selected="true">classification -- StackingCVClassifier</option>
                <option value="StackingClassifier">classification -- StackingClassifier</option>
                <option value="StackingCVRegressor">regression -- StackingCVRegressor</option>
                <option value="StackingRegressor">regression -- StackingRegressor</option>
            </param>
            <when value="StackingCVClassifier">
                <expand macro="stacking_ensemble_inputs">
                    <expand macro="cv_reduced"/>
                    <expand macro="shuffle" label="shuffle"/>
                    <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data."/>
                    <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/>
                </expand>
            </when>
            <when value="StackingClassifier">
                <expand macro="stacking_ensemble_inputs">
                    <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/>
                    <param argument="average_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/>
                </expand>
            </when>
            <when value="StackingCVRegressor">
                <expand macro="stacking_ensemble_inputs">
                    <expand macro="cv_reduced"/>
                    <!--TODO support group splitters. Hint: `groups` is a fit_param-->
                    <expand macro="shuffle" label="shuffle"/>
                    <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data."/>
                    <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true"/>
                </expand>
            </when>
            <when value="StackingRegressor">
                <expand macro="stacking_ensemble_inputs">
                    <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true"/>
                </expand>
            </when>
        </conditional>
        <repeat name="base_est_builder" min="1" max="20" title="Base Estimator">
            <expand macro="stacking_base_estimator"/>
            <!--param name="base_estimator" type="data" format="zip,json" label="Select the dataset containing base estimator" help="One estimator at a time."/-->
        </repeat>
        <!--param name="meta_estimator" type="data" format="zip,json" label="Select the dataset containing the Meta estimator"/-->
        <section name="meta_estimator" title="Meta Estimator" expanded="true">
            <expand macro="stacking_base_estimator"/>
        </section>
        <param name="get_params" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Output parameters for searchCV?"
                help="Optional. Tunable parameters could be obtained through `estimator_attributes` tool."/>
    </inputs>
    <outputs>
        <data format="zip" name="outfile" label="${algo_selection.estimator_type} on ${on_string}"/>
        <data format="tabular" name="outfile_params" label="get_params for ${algo_selection.estimator_type}">
            <filter>get_params</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <conditional name="algo_selection">
                <param name="estimator_type" value="StackingCVRegressor"/>
            </conditional>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="custom_estimator"/>
                    <param name="c_estimator" value="RandomForestRegressor01.zip" ftype="zip"/>
                </conditional>
            </repeat>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="custom_estimator"/>
                    <param name="c_estimator" value="XGBRegressor01.zip" ftype="zip"/>
                </conditional>
            </repeat>
            <section name="meta_estimator">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="custom_estimator"/>
                    <param name="c_estimator" value="LinearRegression01.zip" ftype="zip"/>
                </conditional>
            </section>
            <param name="get_params" value="false"/>
            <output name="outfile" file="StackingCVRegressor01.zip" compare="sim_size" delta="5"/>
        </test>
        <test>
            <conditional name="algo_selection">
                <param name="estimator_type" value="StackingCVRegressor"/>
            </conditional>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="custom_estimator"/>
                    <param name="c_estimator" value="RandomForestRegressor01.zip" ftype="zip"/>
                </conditional>
            </repeat>
            <repeat name="base_est_builder">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="xgboost"/>
                    <param name="selected_estimator" value="XGBRegressor"/>
                </conditional>
            </repeat>
            <section name="meta_estimator">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="svm"/>
                    <param name="selected_estimator" value="SVR"/>
                </conditional>
            </section>
            <param name="get_params" value="false"/>
            <output name="outfile" file="StackingCVRegressor02.zip" compare="sim_size" delta="5"/>
        </test>
    </tests>
    <help>
        <![CDATA[
This tool wrapps Stacking Regression, also called Super Learning, in which different base algorithms train
on the original dataset and predict results respectively, a second level of `metalearner` fits on the previous
prediction results to ensemble a strong learner.
Refer to `http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction`_.

.. _`http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction`:
 http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction

        ]]>
    </help>
    <expand macro="sklearn_citation">
        <expand macro="skrebate_citation"/>
        <expand macro="xgboost_citation"/>
        <expand macro="imblearn_citation"/>
        <citation type="bibtex">
            @article{raschkas_2018_mlxtend,
                author       = {Sebastian Raschka},
                title        = {MLxtend: Providing machine learning and data science 
                                                utilities and extensions to Python’s  
                                                scientific computing stack},
                journal      = {The Journal of Open Source Software},
                volume       = {3},
                number       = {24},
                month        = apr,
                year         = 2018,
                publisher    = {The Open Journal},
                doi          = {10.21105/joss.00638},
                url          = {http://joss.theoj.org/papers/10.21105/joss.00638}
            }
        </citation>
    </expand>
</tool>