view pipeline.xml @ 18:9c3e1d3235c8 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2afb24f3c81d625312186750a714d702363012b5"
author bgruening
date Fri, 02 Oct 2020 08:49:25 +0000
parents 3f3c6dc38f3e
children 4de3d598c116
line wrap: on
line source

<tool id="sklearn_build_pipeline" name="Pipeline Builder" version="@VERSION@">
    <description>an all-in-one platform to build pipeline, single estimator, preprocessor and custom wrappers</description>
    <macros>
        <import>main_macros.xml</import>
    </macros>
    <expand macro="python_requirements"/>
    <expand macro="macro_stdio"/>
    <version_command>echo "@VERSION@"</version_command>
    <command>
        <![CDATA[
        python "$sklearn_pipeline_script" '$inputs'
        ]]>
    </command>
    <configfiles>
        <inputs name="inputs" />
        <configfile name="sklearn_pipeline_script">
            <![CDATA[
import imblearn
import json
import pandas as pd
import pickle
import pprint
import skrebate
import sys
import warnings
from sklearn import (
    cluster, compose, decomposition, ensemble, feature_extraction,
    feature_selection, gaussian_process, kernel_approximation, metrics,
    model_selection, naive_bayes, neighbors, pipeline, preprocessing,
    svm, linear_model, tree, discriminant_analysis)
from sklearn.pipeline import make_pipeline
from imblearn.pipeline import make_pipeline as imb_make_pipeline
from galaxy_ml.utils import (SafeEval, feature_selector, get_estimator,
                             try_get_attr, get_search_params, load_model)

## TODO remove following imports after scikit-learn v0.22
from sklearn.experimental import enable_hist_gradient_boosting


N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1))

warnings.filterwarnings('ignore')

safe_eval = SafeEval()

input_json_path = sys.argv[1]
with open(input_json_path, 'r') as param_handler:
    params = json.load(param_handler)

#if $final_estimator.estimator_selector.selected_module == 'custom_estimator':
params['final_estimator']['estimator_selector']['c_estimator'] =\
        '$final_estimator.estimator_selector.c_estimator'
#end if

#if $final_estimator.estimator_selector.selected_module == 'binarize_target':
params['final_estimator']['estimator_selector']['wrapped_estimator'] =\
        '$final_estimator.estimator_selector.wrapped_estimator'
#end if

pipeline_steps = []

def get_component(input_json, check_none=False):
    is_imblearn = False
    if input_json['component_type'] == 'None':
        if not check_none:
            return None, False
        else:
            sys.exit("The pre-processing component type can't be None "
                     "when the number of components is greater than 1.")
    if input_json['component_type'] == 'pre_processor':
        preprocessor = input_json['pre_processors']['selected_pre_processor']
        pre_processor_options = input_json['pre_processors']['options']
        if 'feature_range' in pre_processor_options:
            feature_range = safe_eval(pre_processor_options['feature_range'].strip())
            if not feature_range:
                feature_range = (0, 1)
            pre_processor_options['feature_range'] = feature_range
        my_class = getattr(preprocessing, preprocessor)
        obj = my_class(**pre_processor_options)
    elif input_json['component_type'] == 'feature_selection':
        obj = feature_selector(input_json['fs_algorithm_selector'])
    elif input_json['component_type'] == 'decomposition':
        algorithm = input_json['matrix_decomposition_selector']['select_algorithm']
        obj = getattr(decomposition, algorithm)()
        options = input_json['matrix_decomposition_selector']['text_params'].strip()
        if options != '':
            options = safe_eval( 'dict(' + options + ')' )
            obj.set_params(**options)
    elif input_json['component_type'] == 'kernel_approximation':
        algorithm = input_json['kernel_approximation_selector']['select_algorithm']
        obj = getattr(kernel_approximation, algorithm)()
        options = input_json['kernel_approximation_selector']['text_params'].strip()
        if options != '':
            options = safe_eval( 'dict(' + options + ')' )
            obj.set_params(**options)
    elif input_json['component_type'] == 'FeatureAgglomeration':
        algorithm = input_json['FeatureAgglomeration_selector']['select_algorithm']
        obj = getattr(cluster, algorithm)()
        options = input_json['FeatureAgglomeration_selector']['text_params'].strip()
        if options != '':
            options = safe_eval( 'dict(' + options + ')' )
            obj.set_params(**options)
    elif input_json['component_type'] == 'skrebate':
        algorithm = input_json['skrebate_selector']['select_algorithm']
        if algorithm == 'TuRF':
            obj = getattr(skrebate, algorithm)(core_algorithm='ReliefF')
        else:
            obj = getattr(skrebate, algorithm)()
        options = input_json['skrebate_selector']['text_params'].strip()
        if options != '':
            options = safe_eval( 'dict(' + options + ')' )
            obj.set_params(**options)
    elif input_json['component_type'] == 'imblearn':
        is_imblearn = True
        algorithm = input_json['imblearn_selector']['select_algorithm']
        if algorithm == 'over_sampling.SMOTENC':
            obj = over_sampling.SMOTENC(categorical_features=[])
        elif algorithm == 'Z_RandomOverSampler':
            Z_RandomOverSampler = try_get_attr('galaxy_ml.preprocessors',
                                               'Z_RandomOverSampler')
            obj = Z_RandomOverSampler()
        else:
            globals = algorithm.split('.')
            mod, klass = globals[0], globals[1]
            obj = getattr(getattr(imblearn, mod), klass)()
        options = input_json['imblearn_selector']['text_params'].strip()
        if options != '':
            options = safe_eval( 'dict(' + options + ')' )
            obj.set_params(**options)
    elif input_json['component_type'] == 'IRAPS':
        iraps_core = try_get_attr('galaxy_ml.iraps_classifier','IRAPSCore')()
        core_params = input_json['text_params'].strip()
        if core_params != '':
            try:
                params = safe_eval('dict(' + core_params + ')')
            except ValueError:
                sys.exit("Unsupported parameter input: `%s`" % core_params)
            iraps_core.set_params(**params)
        options = {}
        if input_json['p_thres'] is not None:
            options['p_thres'] = input_json['p_thres']
        if input_json['fc_thres'] is not None:
            options['fc_thres'] = input_json['fc_thres']
        if input_json['occurrence'] is not None:
            options['occurrence'] = input_json['occurrence']
        if input_json['discretize'] is not None:
            options['discretize'] = input_json['discretize']
        IRAPSClassifier = try_get_attr('galaxy_ml.iraps_classifier','IRAPSClassifier')
        obj = IRAPSClassifier(iraps_core, **options)
    elif input_json['component_type'] == 'preprocessors':
        encoder_selection = input_json['encoder_selection']
        encoder_type = encoder_selection.pop('encoder_type')
        klass = try_get_attr('galaxy_ml.preprocessors', encoder_type)
        obj = klass(**encoder_selection)

    if 'n_jobs' in obj.get_params():
        obj.set_params( n_jobs=N_JOBS )
    return obj, is_imblearn

has_imblearn = False
if len(params['pipeline_component']) == 1:
    step_obj, is_imblearn = get_component( params['pipeline_component'][0]['component_selector'])
    if step_obj:
        pipeline_steps.append( step_obj )
        if is_imblearn:
            has_imblearn = True
else:
    for i, c in enumerate(params['pipeline_component']):
        step_obj, is_imblearn = get_component( c['component_selector'], check_none=True )
        pipeline_steps.append(  step_obj )
        if is_imblearn:
            has_imblearn = True

## Set up final estimator and add to pipeline.
estimator_json = params['final_estimator']['estimator_selector']
if estimator_json['selected_module'] == 'none':
    if len(pipeline_steps) == 0:
        sys.exit("No pipeline steps specified!")
    ## else:  turn the last pre-process component to final estimator
elif estimator_json['selected_module'] == 'sklearn.compose':
    #if $final_estimator.estimator_selector.selected_module == 'sklearn.compose':
    regressor_path = '$final_estimator.estimator_selector.regressor'
    transformer_path = '$final_estimator.estimator_selector.transformer'
    #end if
    with open(regressor_path, 'rb') as f:
        regressor = load_model(f)
    with open(transformer_path, 'rb') as f:
        transformer = load_model(f)
    estimator = compose.TransformedTargetRegressor(regressor=regressor, transformer=transformer)
    pipeline_steps.append( estimator )
else:
    estimator = get_estimator(estimator_json)
    pipeline_steps.append( estimator )

if len(pipeline_steps) == 1:
    out_obj = pipeline_steps[-1]
    print(out_obj)
else:
    if has_imblearn:
        out_obj = imb_make_pipeline(*pipeline_steps)
    else:
        out_obj = make_pipeline(*pipeline_steps)
    pprint.pprint(out_obj.named_steps)

with open('$outfile', 'wb') as out_handler:
    pickle.dump(out_obj, out_handler, pickle.HIGHEST_PROTOCOL)

#if $get_params
results = get_search_params(out_obj)
df = pd.DataFrame(results, columns=['', 'Parameter', 'Value'])
df.to_csv('$outfile_params', sep='\t', index=False)
#end if
            ]]>
        </configfile>
    </configfiles>
    <inputs>
        <repeat name="pipeline_component" min="1" max="5" title="Pre-processing step">
            <conditional name="component_selector">
                <param name="component_type" type="select" label="Choose the type of transformation:">
                    <option value="None" selected="true">None</option>
                    <option value="pre_processor">Sklearn Preprocessor</option>
                    <option value="feature_selection">Feature Selection</option>
                    <option value="decomposition">Matrix Decomposition</option>
                    <option value="kernel_approximation">Kernel Approximation</option>
                    <option value="FeatureAgglomeration">Agglomerate Features</option>
                    <option value="skrebate">SK-rebate Feature Selection</option>
                    <option value="imblearn">Imbalanced-learn Sampling</option>
                    <option value="IRAPS">IRAPS -- feature selector and classifier</option>
                    <option value="preprocessors">Bio-sequence Encoders</option>
                </param>
                <when value="None"/>
                <when value="pre_processor">
                    <conditional name="pre_processors">
                        <expand macro="sparse_preprocessors_ext" />
                        <expand macro="sparse_preprocessor_options_ext" />
                    </conditional>
                </when>
                <when value="feature_selection">
                    <expand macro="feature_selection_pipeline"/>
                </when>
                <when value="decomposition">
                    <expand macro="matrix_decomposition_all"/>
                </when>
                <when value="kernel_approximation">
                    <expand macro="kernel_approximation_all"/>
                </when>
                <when value="FeatureAgglomeration">
                    <expand macro="FeatureAgglomeration"/>
                </when>
                <when value="skrebate">
                    <expand macro="skrebate"/>
                </when>
                <when value="imblearn">
                    <expand macro="imbalanced_learn_sampling"/>
                </when>
                <when value="IRAPS">
                    <expand macro="estimator_params_text"
                        label="Type in parameter settings for IRAPSCore if different from default:"
                        help="Default(=blank): n_iter=1000, responsive_thres=-1, resistant_thres=0, random_state=None. No double quotes"/>
                    <param argument="p_thres" type="float" value="0.001" label="P value threshold" help="Float. default=0.001"/>
                    <param argument="fc_thres" type="float" value="0.1" label="fold change threshold" help="Float. default=0.1"/>
                    <param argument="occurrence" type="float" value="0.7" label="reservation factor" help="Float. default=0.7"/>
                    <param argument="discretize" type="float" value="-1" label="The z_score threshold to discretize target value" help="Float. default=-1"/>
                </when>
                <when value="preprocessors">
                    <expand macro="preprocessors_sequence_encoders"/>
                </when>
            </conditional>
        </repeat>
        <section name="final_estimator" title="Final Estimator" expanded="true">
            <conditional name="estimator_selector">
                <param name="selected_module" type="select" label="Choose the module that contains target estimator:" >
                    <expand macro="estimator_module_options">
                        <option value="sklearn.compose">sklearn.compose</option>
                        <option value="binarize_target">Binarize Target Classifier or Regressor</option>
                        <option value="custom_estimator">Load a custom estimator</option>
                        <option value="none">none -- The last component of pre-processing step will turn to a final estimator</option>
                    </expand>
                </param>
                <expand macro="estimator_suboptions">
                    <when value="sklearn.compose">
                        <param name="selected_estimator" type="select" label="Choose estimator class:">
                            <option value="TransformedTargetRegressor" selected="true">TransformedTargetRegressor</option>
                        </param>
                        <param name="regressor" type="data" format="zip" label="Choose the dataset containing the wrapped regressor"/>
                        <param name="transformer" type="data" format="zip" label="Choose the dataset containing transformer"/>
                    </when>
                    <when value="binarize_target">
                        <param name="clf_or_regr" type="select" label="Classifier or Regressor:">
                            <option value="BinarizeTargetClassifier">BinarizeTargetClassifier</option>
                            <option value="BinarizeTargetRegressor">BinarizeTargetRegressor</option>
                        </param>
                        <param name="wrapped_estimator" type="data" format="zip" label="Choose the dataset containing the wrapped estimator or pipeline"/>
                        <param name='z_score' type="float" value="-1" optional="false" label="Discrize target values using z_score"/>
                        <param name='value' type="float" value="" optional="true" label="Discretize target values using a fixed value instead" help="Optional. default: None."/>
                        <param name="less_is_positive" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Are the detecting values smaller than others?"/>
                    </when>
                    <when value="custom_estimator">
                        <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the custom estimator or pipeline"/>
                    </when>
                    <when value="none"/>
                </expand>
            </conditional>
        </section>
        <!--param name="output_type" type="select" label="Output the final estimator instead?">
            <option value="Pipeline_Builder" selected="true">Pipeline</option>
            <option value="Final_Estimator_Builder">Final Estimator</option>
        </param>-->
        <param name="get_params" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Output parameters for searchCV?"
                help="Optional. Tunable parameters could be obtained through `estimator_attributes` tool."/>
    </inputs>
    <outputs>
        <data format="zip" name="outfile" label="New Pipleline/Estimator"/>
        <data format="tabular" name="outfile_params" label="get_params for Pipleline/Estimator">
            <filter>get_params</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <conditional name="component_selector">
                <param name="component_type" value="pre_processor"/>
                <conditional name="pre_processors">
                    <param name="selected_pre_processor" value="QuantileTransformer"/>
                    <section name="options">
                        <param name="random_state" value="10"/>
                    </section>
                </conditional>
            </conditional>
            <section name="final_estimator">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="none"/>
                </conditional>
            </section>
            <output name="outfile" file="pipeline17" compare="sim_size" delta="5"/>
        </test>
        <test>
            <conditional name="component_selector">
                <param name="component_type" value="pre_processor"/>
                <conditional name="pre_processors">
                    <param name="selected_pre_processor" value="PowerTransformer"/>
                </conditional>
            </conditional>
            <section name="final_estimator">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="sklearn.compose"/>
                    <param name="regressor" value="RandomForestRegressor01.zip" ftype="zip"/>
                    <param name="transformer" value="pipeline17" ftype="zip"/>
                </conditional>
            </section>
            <param name="get_params" value="true"/>
            <output name="outfile_params" file="pipeline_params18" ftype="tabular"/>
        </test>
        <test>
            <repeat name="pipeline_component">
                <conditional name="component_selector">
                    <param name="component_type" value="pre_processor"/>
                    <conditional name="pre_processors">
                        <param name="selected_pre_processor" value="RobustScaler"/>
                    </conditional>
                </conditional>
            </repeat>
            <repeat name="pipeline_component">
                <conditional name="component_selector">
                    <param name="component_type" value="feature_selection"/>
                    <conditional name="fs_algorithm_selector">
                        <param name="selected_algorithm" value="SelectKBest"/>
                        <param name="score_func" value="f_classif"/>
                    </conditional>
                </conditional>
            </repeat>
            <param name="selected_module" value="svm"/>
            <param name="selected_estimator" value="SVR"/>
            <param name="text_params" value="kernel='linear'"/>
            <output name="outfile" file="pipeline01" compare="sim_size" delta="5"/>
        </test>
        <test>
            <conditional name="component_selector">
                <param name="component_type" value="pre_processor"/>
                <conditional name="pre_processors">
                    <param name="selected_pre_processor" value="RobustScaler"/>
                </conditional>
            </conditional>
            <param name="selected_module" value="linear_model"/>
            <param name="selected_estimator" value="LassoCV"/>
            <output name="outfile" file="pipeline02" compare="sim_size" delta="5"/>
        </test>
        <test>
            <conditional name="component_selector">
                <param name="component_type" value="pre_processor"/>
                <conditional name="pre_processors">
                    <param name="selected_pre_processor" value="RobustScaler"/>
                </conditional>
            </conditional>
            <param name="selected_module" value="xgboost"/>
            <param name="selected_estimator" value="XGBClassifier"/>
            <output name="outfile" file="pipeline03" compare="sim_size" delta="5"/>
        </test>
        <test>
            <conditional name="component_selector">
                <param name="component_type" value="feature_selection"/>
                <conditional name="fs_algorithm_selector">
                    <param name="selected_algorithm" value="SelectFromModel"/>
                    <conditional name="model_inputter">
                        <conditional name="estimator_selector">
                            <param name="selected_module" value="ensemble"/>
                            <param name="selected_estimator" value="AdaBoostClassifier"/>
                        </conditional>
                    </conditional>
                </conditional>
            </conditional>
            <section name="final_estimator">
                <param name="selected_module" value="svm"/>
                <param name="selected_estimator" value="LinearSVC"/>
            </section>
            <output name="outfile" file="pipeline04" compare="sim_size" delta="5"/>
        </test>
        <test>
            <conditional name="component_selector">
                <param name="component_type" value="None"/>
            </conditional>
            <param name="selected_module" value="ensemble"/>
            <param name="selected_estimator" value="RandomForestRegressor"/>
            <param name="text_params" value="n_estimators=100, random_state=42"/>
            <param name="get_params" value="true"/>
            <output name="outfile" file="pipeline05" compare="sim_size" delta="5"/>
            <output name="outfile_params" file="pipeline_params05.tabular" ftype="tabular"/>
        </test>
        <test>
            <conditional name="component_selector">
                <param name="component_type" value="decomposition"/>
                    <conditional name="matrix_decomposition_selector">
                        <param name="select_algorithm" value="PCA"/>
                    </conditional>
            </conditional>
            <param name="selected_module" value="ensemble"/>
            <param name="selected_estimator" value="AdaBoostRegressor"/>
            <output name="outfile" file="pipeline06" compare="sim_size" delta="5"/>
        </test>
        <test>
            <conditional name="component_selector">
                <param name="component_type" value="kernel_approximation"/>
                    <conditional name="kernel_approximation_selector">
                        <param name="select_algorithm" value="RBFSampler"/>
                        <param name="text_params" value="n_components=10, gamma=2.0"/>
                    </conditional>
            </conditional>
            <param name="selected_module" value="ensemble"/>
            <param name="selected_estimator" value="AdaBoostClassifier"/>
            <output name="outfile" file="pipeline07" compare="sim_size" delta="5"/>
        </test>
        <test>
            <conditional name="component_selector">
                <param name="component_type" value="FeatureAgglomeration"/>
                    <conditional name="FeatureAgglomeration_selector">
                        <param name="select_algorithm" value="FeatureAgglomeration"/>
                        <param name="text_params" value="n_clusters=3, affinity='euclidean'"/>
                    </conditional>
            </conditional>
            <param name="selected_module" value="ensemble"/>
            <param name="selected_estimator" value="AdaBoostClassifier"/>
            <output name="outfile" file="pipeline08" compare="sim_size" delta="20"/>
        </test>
        <test>
            <conditional name="component_selector">
                <param name="component_type" value="skrebate"/>
                    <conditional name="skrebate_selector">
                        <param name="select_algorithm" value="ReliefF"/>
                        <param name="text_params" value="n_features_to_select=3, n_neighbors=100"/>
                    </conditional>
            </conditional>
            <param name="selected_module" value="ensemble"/>
            <param name="selected_estimator" value="RandomForestRegressor"/>
            <output name="outfile" file="pipeline09" compare="sim_size" delta="5"/>
        </test>
        <test>
            <conditional name="component_selector">
                <param name="component_type" value="imblearn"/>
                <conditional name="imblearn_selector">
                    <param name="select_algorithm" value="under_sampling.EditedNearestNeighbours"/>
                </conditional>
            </conditional>
            <param name="selected_module" value="ensemble"/>
            <param name="selected_estimator" value="RandomForestClassifier"/>
            <output name="outfile" file="pipeline11" compare="sim_size" delta="5"/>
        </test>
        <test expect_failure="true">
            <conditional name="component_selector">
                <param name="component_type" value="None"/>
            </conditional>
            <param name="selected_module" value="ensemble"/>
            <param name="selected_estimator" value="RandomForestRegressor"/>
            <param name="text_params" value="n_estimators=__import__('os').system('ls ~')"/>
        </test>
        <test>
            <conditional name="component_selector">
                <param name="component_type" value="feature_selection"/>
                <conditional name="fs_algorithm_selector">
                    <param name="selected_algorithm" value="RFE"/>
                    <conditional name="estimator_selector">
                        <param name="selected_module" value="xgboost"/>
                        <param name="selected_estimator" value="XGBRegressor"/>
                        <param name="text_params" value="random_state=0"/>
                    </conditional>
                </conditional>
            </conditional>
            <section name="final_estimator">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="none"/>
                </conditional>
            </section>
            <output name="outfile" file="pipeline12" compare="sim_size" delta="5"/>
        </test>
        <test>
            <conditional name="component_selector">
                <param name="component_type" value="None"/>
            </conditional>
            <param name="selected_module" value="ensemble"/>
            <param name="selected_estimator" value="RandomForestClassifier"/>
            <output name="outfile" file="RandomForestClassifier.zip" compare="sim_size" delta="5"/>
        </test>
        <test>
            <conditional name="component_selector">
                <param name="component_type" value="IRAPS"/>
            </conditional>
            <section name="final_estimator">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="none"/>
                </conditional>
            </section>
            <output name="outfile" file="pipeline14" compare="sim_size" delta="5"/>
        </test>
        <test>
            <conditional name="component_selector">
                <param name="component_type" value="None"/>
            </conditional>
            <section name="final_estimator">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="binarize_target"/>
                    <param name="clf_or_regr" value="BinarizeTargetClassifier"/>
                    <param name="wrapped_estimator" value="RandomForestClassifier.zip" ftype="zip"/>
                </conditional>
            </section>
            <output name="outfile" file="pipeline15" compare="sim_size" delta="5"/>
        </test>
        <test>
            <conditional name="component_selector">
                <param name="component_type" value="preprocessors"/>
                <conditional name="encoder_selection">
                    <param name="encoder_type" value="GenomeOneHotEncoder"/>
                    <param name="seq_length" value="1000"/>
                    <param name="padding" value="True"/>
                </conditional>
            </conditional>
            <section name="final_estimator">
                <conditional name="estimator_selector">
                    <param name="selected_module" value="custom_estimator"/>
                    <param name="c_estimator" value="keras_model02" ftype="zip"/>
                </conditional>
            </section>
            <output name="outfile" file="pipeline16" compare="sim_size" delta="5"/>
        </test>
    </tests>
    <help>
        <![CDATA[
**What it does**
This tool not only builds sklearn pipeline object, but also builds single main estimator or single preprocessing component. The output object type is based on the length of pipeline steps. When there is only one step (choose `None` for others), either a main estimator or preprocessor, the component is output directly instead of wrapping in a pipeline object.

A typical pipeline chains one or more preprocessing steps plus a final main estimator, for example, [VarianceThreshold, StandardScaler, SGDClassifier] which is composed of a feature selctor, a preprocessing scaler and a main estimator together.
For more information, please refer to `Scikit-learn pipeline Pipeline`_.

**Pre-processing components** come from `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_, `cluster.FeatureAgglomeration`_, `skrebate`_ and more.

**Final Estimator** supports estimators from `xgboost`_ and many scikit-learn modules, including `svm`_, `linear_model`_, `ensemble`_, `naive_bayes`_, `tree`_, `neighbors`_ and so on.

**Custom estimators**

- `GenomeOneHotEncoder`_

- `ProteinOnehotEncoder`_

- `IRAPSClassifier`_

- `BinarizeTargetClassifier`_

- `BinarizeTargetRegressor`_

**Output**

- Pickled pipeline/estimator object

- Hyperparameter of the ojbect (optional)


.. _`Scikit-learn pipeline Pipeline`: http://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html
.. _`svm`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.svm
.. _`linear_model`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.linear_model
.. _`ensemble`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.ensemble
.. _`naive_bayes`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.naive_bayes
.. _`tree`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.tree
.. _`neighbors`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.neighbors
.. _`xgboost`: https://xgboost.readthedocs.io/en/latest/python/python_api.html

.. _`sklearn.preprocessing`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.preprocessing
.. _`feature_selection`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.feature_selection
.. _`decomposition`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.decomposition
.. _`kernel_approximation`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.kernel_approximation
.. _`cluster.FeatureAgglomeration`: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html
.. _`skrebate`: https://epistasislab.github.io/scikit-rebate/using/

.. _`GenomeOneHotEncoder`: https://goeckslab.github.io/Galaxy-ML/APIs/preprocessors/#genomeonehotencoder
.. _`ProteinOnehotEncoder`: https://goeckslab.github.io/Galaxy-ML/APIs/preprocessors/#proteinonehotencoder
.. _`IRAPSClassifier`: https://goeckslab.github.io/Galaxy-ML/APIs/iraps-classifier/#irapsclassifier
.. _`BinarizeTargetClassifier`: https://goeckslab.github.io/Galaxy-ML/APIs/binarize-target/#binarizetargetclassifier
.. _`BinarizeTargetRegressor`: https://goeckslab.github.io/Galaxy-ML/APIs/binarize-target/#binarizetargetregressor

        ]]>
    </help>
    <expand macro="sklearn_citation">
        <expand macro="skrebate_citation"/>
        <expand macro="xgboost_citation"/>
        <expand macro="imblearn_citation"/>
    </expand>
</tool>