Mercurial > repos > bgruening > sklearn_model_fit

<tool id="sklearn_model_fit" name="Fit a Pipeline, Ensemble" version="@VERSION@" profile="20.05">
    <description>or other models using a labeled dataset</description>
    <macros>
        <import>main_macros.xml</import>
        <import>keras_macros.xml</import>
    </macros>
    <expand macro="python_requirements" />
    <expand macro="macro_stdio" />
    <version_command>echo "@VERSION@"</version_command>
    <command>
        <![CDATA[
        export HDF5_USE_FILE_LOCKING='FALSE';
        python '$__tool_directory__/simple_model_fit.py'
            --inputs '$inputs'
            --infile_estimator '$infile_estimator'
            --infile1 '$input_options.infile1'
            --infile2 '$input_options.infile2'
            --out_object '$out_object'
            #if $is_deep_learning == 'booltrue'
            --out_weights '$out_weights'
            #end if
        ]]>
    </command>
    <configfiles>
        <inputs name="inputs" />
    </configfiles>
    <inputs>
        <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator" />
        <param name="is_deep_learning" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Is the estimator a deep learning model?" />
        <conditional name="input_options">
            <expand macro="data_input_options" />
            <when value="tabular">
                <expand macro="samples_tabular" label1="Choose the training dataset containing features" multiple1="true" multiple2="false" />
            </when>
            <when value="sparse">
                <expand macro="sparse_target" />
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format="zip" name="out_object" label="Fitted model (skeleton) on $(on_string)" />
        <data format="h5" name="out_weights" label="Weights trained on ${on_string}">
            <filter>is_deep_learning</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="infile_estimator" value="pipeline05" ftype="zip" />
            <param name="infile1" value="regression_X.tabular" ftype="tabular" />
            <param name="header1" value="true" />
            <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17" />
            <param name="infile2" value="regression_y.tabular" ftype="tabular" />
            <param name="header2" value="true" />
            <param name="col2" value="1" />
            <output name="out_object" file="model_fit01" compare="sim_size" delta="50" />
        </test>
        <test>
            <param name="infile_estimator" value="keras_model04" />
            <param name="is_deep_learning" value="true" />
            <param name="infile1" value="regression_X.tabular" ftype="tabular" />
            <param name="header1" value="true" />
            <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17" />
            <param name="infile2" value="regression_y.tabular" ftype="tabular" />
            <param name="header2" value="true" />
            <param name="col2" value="1" />
            <output name="out_object" file="model_fit02" compare="sim_size" delta="50" />
            <output name="out_weights" file="model_fit02.h5" compare="sim_size" delta="50" />
        </test>
    </tests>
    <help>
        <![CDATA[
**What it does**

This tools takes a pre-built model to simply fit on the provided labeled dataset by calling scikit-learn API `fit`. The model could be built in `build_pipeline` tool, `stacking_ensemble` tool or other places. Limited deep learning model is also supported.

The supported labeled dataset are the following:

- tabular

- sparse


**Output**

- fitted model, a pickled python object in zip format.
- optional hdf5 file containing weights for deep learning models.

        ]]>
    </help>
    <expand macro="sklearn_citation">
        <expand macro="keras_citation" />
        <expand macro="selene_citation" />
    </expand>
</tool>
author	bgruening
date	Tue, 13 Apr 2021 22:46:15 +0000
parents	26decbf4bdb8
children	f903c8cf1455