Mercurial > repos > bgruening > stacking_ensemble_models

diff stacking_ensembles.xml @ 0:8e93241d5d28 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit c0a3a186966888e5787335a7628bf0a4382637e7
author: bgruening
date: Tue, 14 May 2019 18:04:46 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/stacking_ensembles.xml	Tue May 14 18:04:46 2019 -0400
@@ -0,0 +1,178 @@
+<tool id="stacking_ensemble_models" name="Stacking Ensemble Models" version="0.1.0">
+    <description>builds a strong model by stacking multiple algorithms</description>
+    <macros>
+        <import>main_macros.xml</import>
+    </macros>
+    <expand macro="python_requirements"/>
+    <expand macro="macro_stdio"/>
+    <version_command>echo "$version"</version_command>
+    <command>
+        <![CDATA[
+        #for $i, $base in enumerate($base_est_builder)
+        #if $i == 0
+            #if $base.estimator_selector.selected_module == 'custom_estimator'
+            bases='${base.estimator_selector.c_estimator}';
+            #else
+            bases='None';
+            #end if
+        #elif $base.estimator_selector.selected_module == 'custom_estimator'
+        bases="\$bases,${base.estimator_selector.c_estimator}";
+        #else
+        bases="\$bases,None";
+        #end if
+        #end for
+        python '$__tool_directory__/stacking_ensembles.py'
+            --inputs '$inputs'
+            --outfile '$outfile'
+            --bases "\$bases"
+            #if $meta_estimator.estimator_selector.selected_module == 'custom_estimator'
+            --meta '${meta_estimator.estimator_selector.c_estimator}'
+            #end if
+            #if $get_params
+            --outfile_params '$outfile_params'
+            #end if
+        ]]>
+    </command>
+    <configfiles>
+        <inputs name="inputs" />
+    </configfiles>
+    <inputs>
+        <conditional name="algo_selection">
+            <param name="estimator_type" type="select" label="Choose the stacking ensemble type">
+                <option value="StackingCVClassifier" selected="true">classification -- StackingCVClassifier</option>
+                <option value="StackingClassifier">classification -- StackingClassifier</option>
+                <option value="StackingCVRegressor">regression -- StackingCVRegressor</option>
+                <option value="StackingRegressor">regression -- StackingRegressor</option>
+            </param>
+            <when value="StackingCVClassifier">
+                <expand macro="stacking_ensemble_inputs">
+                    <expand macro="cv_reduced"/>
+                    <expand macro="shuffle" label="shuffle"/>
+                    <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data."/>
+                    <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/>
+                </expand>
+            </when>
+            <when value="StackingClassifier">
+                <expand macro="stacking_ensemble_inputs">
+                    <param argument="use_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/>
+                    <param argument="average_probas" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/>
+                </expand>
+            </when>
+            <when value="StackingCVRegressor">
+                <expand macro="stacking_ensemble_inputs">
+                    <expand macro="cv_reduced"/>
+                    <!--TODO support group splitters. Hint: `groups` is a fit_param-->
+                    <expand macro="shuffle" label="shuffle"/>
+                    <expand macro="random_state" default_value="" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data."/>
+                    <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true"/>
+                </expand>
+            </when>
+            <when value="StackingRegressor">
+                <expand macro="stacking_ensemble_inputs">
+                    <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true"/>
+                </expand>
+            </when>
+        </conditional>
+        <repeat name="base_est_builder" min="1" max="20" title="Base Estimator">
+            <expand macro="stacking_base_estimator"/>
+            <!--param name="base_estimator" type="data" format="zip,json" label="Select the dataset containing base estimator" help="One estimator at a time."/-->
+        </repeat>
+        <!--param name="meta_estimator" type="data" format="zip,json" label="Select the dataset containing the Meta estimator"/-->
+        <section name="meta_estimator" title="Meta Estimator" expanded="true">
+            <expand macro="stacking_base_estimator"/>
+        </section>
+        <param name="get_params" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Output parameters for searchCV?"
+                help="Optional. Tunable parameters could be obtained through `estimator_attributes` tool."/>
+    </inputs>
+    <outputs>
+        <data format="zip" name="outfile" label="${algo_selection.estimator_type} on ${on_string}"/>
+        <data format="tabular" name="outfile_params" label="get_params for ${algo_selection.estimator_type}">
+            <filter>get_params</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <conditional name="algo_selection">
+                <param name="estimator_type" value="StackingCVRegressor"/>
+            </conditional>
+            <repeat name="base_est_builder">
+                <conditional name="estimator_selector">
+                    <param name="selected_module" value="custom_estimator"/>
+                    <param name="c_estimator" value="RandomForestRegressor01.zip" ftype="zip"/>
+                </conditional>
+            </repeat>
+            <repeat name="base_est_builder">
+                <conditional name="estimator_selector">
+                    <param name="selected_module" value="custom_estimator"/>
+                    <param name="c_estimator" value="XGBRegressor01.zip" ftype="zip"/>
+                </conditional>
+            </repeat>
+            <section name="meta_estimator">
+                <conditional name="estimator_selector">
+                    <param name="selected_module" value="custom_estimator"/>
+                    <param name="c_estimator" value="LinearRegression01.zip" ftype="zip"/>
+                </conditional>
+            </section>
+            <param name="get_params" value="false"/>
+            <output name="outfile" file="StackingCVRegressor01.zip" compare="sim_size" delta="5"/>
+        </test>
+        <test>
+            <conditional name="algo_selection">
+                <param name="estimator_type" value="StackingCVRegressor"/>
+            </conditional>
+            <repeat name="base_est_builder">
+                <conditional name="estimator_selector">
+                    <param name="selected_module" value="custom_estimator"/>
+                    <param name="c_estimator" value="RandomForestRegressor01.zip" ftype="zip"/>
+                </conditional>
+            </repeat>
+            <repeat name="base_est_builder">
+                <conditional name="estimator_selector">
+                    <param name="selected_module" value="xgboost"/>
+                    <param name="selected_estimator" value="XGBRegressor"/>
+                </conditional>
+            </repeat>
+            <section name="meta_estimator">
+                <conditional name="estimator_selector">
+                    <param name="selected_module" value="svm"/>
+                    <param name="selected_estimator" value="SVR"/>
+                </conditional>
+            </section>
+            <param name="get_params" value="false"/>
+            <output name="outfile" file="StackingCVRegressor02.zip" compare="sim_size" delta="5"/>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+This tool wrapps Stacking Regression, also called Super Learning, in which different base algorithms train
+on the original dataset and predict results respectively, a second level of `metalearner` fits on the previous
+prediction results to ensemble a strong learner.
+Refer to `http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction`_.
+
+.. _`http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction`:
+ http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/stacked-ensembles.html#introduction
+
+        ]]>
+    </help>
+    <expand macro="sklearn_citation">
+        <expand macro="skrebate_citation"/>
+        <expand macro="xgboost_citation"/>
+        <expand macro="imblearn_citation"/>
+        <citation type="bibtex">
+            @article{raschkas_2018_mlxtend,
+                author       = {Sebastian Raschka},
+                title        = {MLxtend: Providing machine learning and data science 
+                                                utilities and extensions to Python’s  
+                                                scientific computing stack},
+                journal      = {The Journal of Open Source Software},
+                volume       = {3},
+                number       = {24},
+                month        = apr,
+                year         = 2018,
+                publisher    = {The Open Journal},
+                doi          = {10.21105/joss.00638},
+                url          = {http://joss.theoj.org/papers/10.21105/joss.00638}
+            }
+        </citation>
+    </expand>
+</tool>
author	bgruening
date	Tue, 14 May 2019 18:04:46 -0400
parents
children