Mercurial > repos > bgruening > sklearn_searchcv

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/feature_selectors.py	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,357 @@
+"""
+DyRFE
+DyRFECV
+MyPipeline
+MyimbPipeline
+check_feature_importances
+"""
+import numpy as np
+
+from imblearn import under_sampling, over_sampling, combine
+from imblearn.pipeline import Pipeline as imbPipeline
+from sklearn import (cluster, compose, decomposition, ensemble,
+                     feature_extraction, feature_selection,
+                     gaussian_process, kernel_approximation,
+                     metrics, model_selection, naive_bayes,
+                     neighbors, pipeline, preprocessing,
+                     svm, linear_model, tree, discriminant_analysis)
+
+from sklearn.base import BaseEstimator
+from sklearn.base import MetaEstimatorMixin, clone, is_classifier
+from sklearn.feature_selection.rfe import _rfe_single_fit, RFE, RFECV
+from sklearn.model_selection import check_cv
+from sklearn.metrics.scorer import check_scoring
+from sklearn.utils import check_X_y, safe_indexing, safe_sqr
+from sklearn.utils._joblib import Parallel, delayed, effective_n_jobs
+
+
+class DyRFE(RFE):
+    """
+    Mainly used with DyRFECV
+
+    Parameters
+    ----------
+    estimator : object
+        A supervised learning estimator with a ``fit`` method that provides
+        information about feature importance either through a ``coef_``
+        attribute or through a ``feature_importances_`` attribute.
+    n_features_to_select : int or None (default=None)
+        The number of features to select. If `None`, half of the features
+        are selected.
+    step : int, float or list, optional (default=1)
+        If greater than or equal to 1, then ``step`` corresponds to the
+        (integer) number of features to remove at each iteration.
+        If within (0.0, 1.0), then ``step`` corresponds to the percentage
+        (rounded down) of features to remove at each iteration.
+        If list, a series of steps of features to remove at each iteration.
+        Iterations stops when steps finish
+    verbose : int, (default=0)
+        Controls verbosity of output.
+
+    """
+    def __init__(self, estimator, n_features_to_select=None, step=1,
+                 verbose=0):
+        super(DyRFE, self).__init__(estimator, n_features_to_select,
+                                    step, verbose)
+
+    def _fit(self, X, y, step_score=None):
+
+        if type(self.step) is not list:
+            return super(DyRFE, self)._fit(X, y, step_score)
+
+        # dynamic step
+        X, y = check_X_y(X, y, "csc")
+        # Initialization
+        n_features = X.shape[1]
+        if self.n_features_to_select is None:
+            n_features_to_select = n_features // 2
+        else:
+            n_features_to_select = self.n_features_to_select
+
+        step = []
+        for s in self.step:
+            if 0.0 < s < 1.0:
+                step.append(int(max(1, s * n_features)))
+            else:
+                step.append(int(s))
+            if s <= 0:
+                raise ValueError("Step must be >0")
+
+        support_ = np.ones(n_features, dtype=np.bool)
+        ranking_ = np.ones(n_features, dtype=np.int)
+
+        if step_score:
+            self.scores_ = []
+
+        step_i = 0
+        # Elimination
+        while np.sum(support_) > n_features_to_select and step_i < len(step):
+
+            # if last step is 1, will keep loop
+            if step_i == len(step) - 1 and step[step_i] != 0:
+                step.append(step[step_i])
+
+            # Remaining features
+            features = np.arange(n_features)[support_]
+
+            # Rank the remaining features
+            estimator = clone(self.estimator)
+            if self.verbose > 0:
+                print("Fitting estimator with %d features." % np.sum(support_))
+
+            estimator.fit(X[:, features], y)
+
+            # Get coefs
+            if hasattr(estimator, 'coef_'):
+                coefs = estimator.coef_
+            else:
+                coefs = getattr(estimator, 'feature_importances_', None)
+            if coefs is None:
+                raise RuntimeError('The classifier does not expose '
+                                   '"coef_" or "feature_importances_" '
+                                   'attributes')
+
+            # Get ranks
+            if coefs.ndim > 1:
+                ranks = np.argsort(safe_sqr(coefs).sum(axis=0))
+            else:
+                ranks = np.argsort(safe_sqr(coefs))
+
+            # for sparse case ranks is matrix
+            ranks = np.ravel(ranks)
+
+            # Eliminate the worse features
+            threshold =\
+                min(step[step_i], np.sum(support_) - n_features_to_select)
+
+            # Compute step score on the previous selection iteration
+            # because 'estimator' must use features
+            # that have not been eliminated yet
+            if step_score:
+                self.scores_.append(step_score(estimator, features))
+            support_[features[ranks][:threshold]] = False
+            ranking_[np.logical_not(support_)] += 1
+
+            step_i += 1
+
+        # Set final attributes
+        features = np.arange(n_features)[support_]
+        self.estimator_ = clone(self.estimator)
+        self.estimator_.fit(X[:, features], y)
+
+        # Compute step score when only n_features_to_select features left
+        if step_score:
+            self.scores_.append(step_score(self.estimator_, features))
+        self.n_features_ = support_.sum()
+        self.support_ = support_
+        self.ranking_ = ranking_
+
+        return self
+
+
+class DyRFECV(RFECV, MetaEstimatorMixin):
+    """
+    Compared with RFECV, DyRFECV offers flexiable `step` to eleminate
+    features, in the format of list, while RFECV supports only fixed number
+    of `step`.
+
+    Parameters
+    ----------
+    estimator : object
+        A supervised learning estimator with a ``fit`` method that provides
+        information about feature importance either through a ``coef_``
+        attribute or through a ``feature_importances_`` attribute.
+    step : int or float, optional (default=1)
+        If greater than or equal to 1, then ``step`` corresponds to the
+        (integer) number of features to remove at each iteration.
+        If within (0.0, 1.0), then ``step`` corresponds to the percentage
+        (rounded down) of features to remove at each iteration.
+        If list, a series of step to remove at each iteration. iteration stopes
+        when finishing all steps
+        Note that the last iteration may remove fewer than ``step`` features in
+        order to reach ``min_features_to_select``.
+    min_features_to_select : int, (default=1)
+        The minimum number of features to be selected. This number of features
+        will always be scored, even if the difference between the original
+        feature count and ``min_features_to_select`` isn't divisible by
+        ``step``.
+    cv : int, cross-validation generator or an iterable, optional
+        Determines the cross-validation splitting strategy.
+        Possible inputs for cv are:
+        - None, to use the default 3-fold cross-validation,
+        - integer, to specify the number of folds.
+        - :term:`CV splitter`,
+        - An iterable yielding (train, test) splits as arrays of indices.
+        For integer/None inputs, if ``y`` is binary or multiclass,
+        :class:`sklearn.model_selection.StratifiedKFold` is used. If the
+        estimator is a classifier or if ``y`` is neither binary nor multiclass,
+        :class:`sklearn.model_selection.KFold` is used.
+        Refer :ref:`User Guide <cross_validation>` for the various
+        cross-validation strategies that can be used here.
+        .. versionchanged:: 0.20
+            ``cv`` default value of None will change from 3-fold to 5-fold
+            in v0.22.
+    scoring : string, callable or None, optional, (default=None)
+        A string (see model evaluation documentation) or
+        a scorer callable object / function with signature
+        ``scorer(estimator, X, y)``.
+    verbose : int, (default=0)
+        Controls verbosity of output.
+    n_jobs : int or None, optional (default=None)
+        Number of cores to run in parallel while fitting across folds.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+    """
+    def __init__(self, estimator, step=1, min_features_to_select=1, cv='warn',
+                 scoring=None, verbose=0, n_jobs=None):
+        super(DyRFECV, self).__init__(
+            estimator, step=step,
+            min_features_to_select=min_features_to_select,
+            cv=cv, scoring=scoring, verbose=verbose,
+            n_jobs=n_jobs)
+
+    def fit(self, X, y, groups=None):
+        """Fit the RFE model and automatically tune the number of selected
+           features.
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
+            Training vector, where `n_samples` is the number of samples and
+            `n_features` is the total number of features.
+        y : array-like, shape = [n_samples]
+            Target values (integers for classification, real numbers for
+            regression).
+        groups : array-like, shape = [n_samples], optional
+            Group labels for the samples used while splitting the dataset into
+            train/test set.
+        """
+        if type(self.step) is not list:
+            return super(DyRFECV, self).fit(X, y, groups)
+
+        X, y = check_X_y(X, y, "csr")
+
+        # Initialization
+        cv = check_cv(self.cv, y, is_classifier(self.estimator))
+        scorer = check_scoring(self.estimator, scoring=self.scoring)
+        n_features = X.shape[1]
+
+        step = []
+        for s in self.step:
+            if 0.0 < s < 1.0:
+                step.append(int(max(1, s * n_features)))
+            else:
+                step.append(int(s))
+            if s <= 0:
+                raise ValueError("Step must be >0")
+
+        # Build an RFE object, which will evaluate and score each possible
+        # feature count, down to self.min_features_to_select
+        rfe = DyRFE(estimator=self.estimator,
+                    n_features_to_select=self.min_features_to_select,
+                    step=self.step, verbose=self.verbose)
+
+        # Determine the number of subsets of features by fitting across
+        # the train folds and choosing the "features_to_select" parameter
+        # that gives the least averaged error across all folds.
+
+        # Note that joblib raises a non-picklable error for bound methods
+        # even if n_jobs is set to 1 with the default multiprocessing
+        # backend.
+        # This branching is done so that to
+        # make sure that user code that sets n_jobs to 1
+        # and provides bound methods as scorers is not broken with the
+        # addition of n_jobs parameter in version 0.18.
+
+        if effective_n_jobs(self.n_jobs) == 1:
+            parallel, func = list, _rfe_single_fit
+        else:
+            parallel = Parallel(n_jobs=self.n_jobs)
+            func = delayed(_rfe_single_fit)
+
+        scores = parallel(
+            func(rfe, self.estimator, X, y, train, test, scorer)
+            for train, test in cv.split(X, y, groups))
+
+        scores = np.sum(scores, axis=0)
+        diff = int(scores.shape[0]) - len(step)
+        if diff > 0:
+            step = np.r_[step, [step[-1]] * diff]
+        scores_rev = scores[::-1]
+        argmax_idx = len(scores) - np.argmax(scores_rev) - 1
+        n_features_to_select = max(
+            n_features - sum(step[:argmax_idx]),
+            self.min_features_to_select)
+
+        # Re-execute an elimination with best_k over the whole set
+        rfe = DyRFE(estimator=self.estimator,
+                    n_features_to_select=n_features_to_select, step=self.step,
+                    verbose=self.verbose)
+
+        rfe.fit(X, y)
+
+        # Set final attributes
+        self.support_ = rfe.support_
+        self.n_features_ = rfe.n_features_
+        self.ranking_ = rfe.ranking_
+        self.estimator_ = clone(self.estimator)
+        self.estimator_.fit(self.transform(X), y)
+
+        # Fixing a normalization error, n is equal to get_n_splits(X, y) - 1
+        # here, the scores are normalized by get_n_splits(X, y)
+        self.grid_scores_ = scores[::-1] / cv.get_n_splits(X, y, groups)
+        return self
+
+
+class MyPipeline(pipeline.Pipeline):
+    """
+    Extend pipeline object to have feature_importances_ attribute
+    """
+    def fit(self, X, y=None, **fit_params):
+        super(MyPipeline, self).fit(X, y, **fit_params)
+        estimator = self.steps[-1][-1]
+        if hasattr(estimator, 'coef_'):
+            coefs = estimator.coef_
+        else:
+            coefs = getattr(estimator, 'feature_importances_', None)
+        if coefs is None:
+            raise RuntimeError('The estimator in the pipeline does not expose '
+                               '"coef_" or "feature_importances_" '
+                               'attributes')
+        self.feature_importances_ = coefs
+        return self
+
+
+class MyimbPipeline(imbPipeline):
+    """
+    Extend imblance pipeline object to have feature_importances_ attribute
+    """
+    def fit(self, X, y=None, **fit_params):
+        super(MyimbPipeline, self).fit(X, y, **fit_params)
+        estimator = self.steps[-1][-1]
+        if hasattr(estimator, 'coef_'):
+            coefs = estimator.coef_
+        else:
+            coefs = getattr(estimator, 'feature_importances_', None)
+        if coefs is None:
+            raise RuntimeError('The estimator in the pipeline does not expose '
+                               '"coef_" or "feature_importances_" '
+                               'attributes')
+        self.feature_importances_ = coefs
+        return self
+
+
+def check_feature_importances(estimator):
+    """
+    For pipeline object which has no feature_importances_ property,
+    this function returns the same comfigured pipeline object with
+    attached the last estimator's feature_importances_.
+    """
+    if estimator.__class__.__module__ == 'sklearn.pipeline':
+        pipeline_steps = estimator.get_params()['steps']
+        estimator = MyPipeline(pipeline_steps)
+    elif estimator.__class__.__module__ == 'imblearn.pipeline':
+        pipeline_steps = estimator.get_params()['steps']
+        estimator = MyimbPipeline(pipeline_steps)
+    else:
+        return estimator
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/iraps_classifier.py	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,569 @@
+"""
+class IRAPSCore
+class IRAPSClassifier
+class BinarizeTargetClassifier
+class BinarizeTargetRegressor
+class _BinarizeTargetScorer
+class _BinarizeTargetProbaScorer
+
+binarize_auc_scorer
+binarize_average_precision_scorer
+
+binarize_accuracy_scorer
+binarize_balanced_accuracy_scorer
+binarize_precision_scorer
+binarize_recall_scorer
+"""
+
+
+import numpy as np
+import random
+import warnings
+
+from abc import ABCMeta
+from scipy.stats import ttest_ind
+from sklearn import metrics
+from sklearn.base import BaseEstimator, clone, RegressorMixin
+from sklearn.externals import six
+from sklearn.feature_selection.univariate_selection import _BaseFilter
+from sklearn.metrics.scorer import _BaseScorer
+from sklearn.pipeline import Pipeline
+from sklearn.utils import as_float_array, check_X_y
+from sklearn.utils._joblib import Parallel, delayed
+from sklearn.utils.validation import (check_array, check_is_fitted,
+                                      check_memory, column_or_1d)
+
+
+VERSION = '0.1.1'
+
+
+class IRAPSCore(six.with_metaclass(ABCMeta, BaseEstimator)):
+    """
+    Base class of IRAPSClassifier
+    From sklearn BaseEstimator:
+        get_params()
+        set_params()
+
+    Parameters
+    ----------
+    n_iter : int
+        sample count
+
+    positive_thres : float
+        z_score shreshold to discretize positive target values
+
+    negative_thres : float
+        z_score threshold to discretize negative target values
+
+    verbose : int
+        0 or geater, if not 0, print progress
+
+    n_jobs : int, default=1
+        The number of CPUs to use to do the computation.
+
+    pre_dispatch : int, or string.
+        Controls the number of jobs that get dispatched during parallel
+        execution. Reducing this number can be useful to avoid an
+        explosion of memory consumption when more jobs get dispatched
+        than CPUs can process. This parameter can be:
+            - None, in which case all the jobs are immediately
+              created and spawned. Use this for lightweight and
+              fast-running jobs, to avoid delays due to on-demand
+              spawning of the jobs
+            - An int, giving the exact number of total jobs that are
+              spawned
+            - A string, giving an expression as a function of n_jobs,
+              as in '2*n_jobs'
+
+    random_state : int or None
+    """
+
+    def __init__(self, n_iter=1000, positive_thres=-1, negative_thres=0,
+                 verbose=0, n_jobs=1, pre_dispatch='2*n_jobs',
+                 random_state=None):
+        """
+        IRAPS turns towwards general Anomaly Detection
+        It comapares positive_thres with negative_thres,
+        and decide which portion is the positive target.
+        e.g.:
+        (positive_thres=-1, negative_thres=0)
+                 => positive = Z_score of target < -1
+        (positive_thres=1, negative_thres=0)
+                 => positive = Z_score of target > 1
+
+        Note: The positive targets here is always the
+            abnormal minority group.
+        """
+        self.n_iter = n_iter
+        self.positive_thres = positive_thres
+        self.negative_thres = negative_thres
+        self.verbose = verbose
+        self.n_jobs = n_jobs
+        self.pre_dispatch = pre_dispatch
+        self.random_state = random_state
+
+    def fit(self, X, y):
+        """
+        X: array-like (n_samples x n_features)
+        y: 1-d array-like (n_samples)
+        """
+        X, y = check_X_y(X, y, ['csr', 'csc'], multi_output=False)
+
+        def _stochastic_sampling(X, y, random_state=None, positive_thres=-1,
+                                 negative_thres=0):
+            # each iteration select a random number of random subset of
+            # training samples. this is somewhat different from the original
+            # IRAPS method, but effect is almost the same.
+            SAMPLE_SIZE = [0.25, 0.75]
+            n_samples = X.shape[0]
+
+            if random_state is None:
+                n_select = random.randint(int(n_samples * SAMPLE_SIZE[0]),
+                                          int(n_samples * SAMPLE_SIZE[1]))
+                index = random.sample(list(range(n_samples)), n_select)
+            else:
+                n_select = random.Random(random_state).randint(
+                                    int(n_samples * SAMPLE_SIZE[0]),
+                                    int(n_samples * SAMPLE_SIZE[1]))
+                index = random.Random(random_state).sample(
+                                    list(range(n_samples)), n_select)
+
+            X_selected, y_selected = X[index], y[index]
+
+            # Spliting by z_scores.
+            y_selected = (y_selected - y_selected.mean()) / y_selected.std()
+            if positive_thres < negative_thres:
+                X_selected_positive = X_selected[y_selected < positive_thres]
+                X_selected_negative = X_selected[y_selected > negative_thres]
+            else:
+                X_selected_positive = X_selected[y_selected > positive_thres]
+                X_selected_negative = X_selected[y_selected < negative_thres]
+
+            # For every iteration, at least 5 responders are selected
+            if X_selected_positive.shape[0] < 5:
+                warnings.warn("Warning: fewer than 5 positives were selected!")
+                return
+
+            # p_values
+            _, p = ttest_ind(X_selected_positive, X_selected_negative,
+                             axis=0, equal_var=False)
+
+            # fold_change == mean change?
+            # TODO implement other normalization method
+            positive_mean = X_selected_positive.mean(axis=0)
+            negative_mean = X_selected_negative.mean(axis=0)
+            mean_change = positive_mean - negative_mean
+            # mean_change = np.select(
+            #       [positive_mean > negative_mean,
+            #           positive_mean < negative_mean],
+            #       [positive_mean / negative_mean,
+            #           -negative_mean / positive_mean])
+            # mean_change could be adjusted by power of 2
+            # mean_change = 2**mean_change \
+            #       if mean_change>0 else -2**abs(mean_change)
+
+            return p, mean_change, negative_mean
+
+        parallel = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
+                            pre_dispatch=self.pre_dispatch)
+        if self.random_state is None:
+            res = parallel(delayed(_stochastic_sampling)(
+                    X, y, random_state=None,
+                    positive_thres=self.positive_thres,
+                    negative_thres=self.negative_thres)
+                        for i in range(self.n_iter))
+        else:
+            res = parallel(delayed(_stochastic_sampling)(
+                    X, y, random_state=seed,
+                    positive_thres=self.positive_thres,
+                    negative_thres=self.negative_thres)
+                        for seed in range(self.random_state,
+                                          self.random_state+self.n_iter))
+        res = [_ for _ in res if _]
+        if len(res) < 50:
+            raise ValueError("too few (%d) valid feature lists "
+                             "were generated!" % len(res))
+        pvalues = np.vstack([x[0] for x in res])
+        fold_changes = np.vstack([x[1] for x in res])
+        base_values = np.vstack([x[2] for x in res])
+
+        self.pvalues_ = np.asarray(pvalues)
+        self.fold_changes_ = np.asarray(fold_changes)
+        self.base_values_ = np.asarray(base_values)
+
+        return self
+
+
+def _iraps_core_fit(iraps_core, X, y):
+    return iraps_core.fit(X, y)
+
+
+class IRAPSClassifier(six.with_metaclass(ABCMeta, _BaseFilter,
+                                         BaseEstimator, RegressorMixin)):
+    """
+    Extend the bases of both sklearn feature_selector and classifier.
+    From sklearn BaseEstimator:
+        get_params()
+        set_params()
+    From sklearn _BaseFilter:
+        get_support()
+        fit_transform(X)
+        transform(X)
+    From sklearn RegressorMixin:
+        score(X, y): R2
+    New:
+        predict(X)
+        predict_label(X)
+        get_signature()
+    Properties:
+        discretize_value
+
+    Parameters
+    ----------
+    iraps_core: object
+    p_thres: float, threshold for p_values
+    fc_thres: float, threshold for fold change or mean difference
+    occurrence: float, occurrence rate selected by set of p_thres and fc_thres
+    discretize: float, threshold of z_score to discretize target value
+    memory: None, str or joblib.Memory object
+    min_signature_features: int, the mininum number of features in a signature
+    """
+
+    def __init__(self, iraps_core, p_thres=1e-4, fc_thres=0.1,
+                 occurrence=0.8, discretize=-1, memory=None,
+                 min_signature_features=1):
+        self.iraps_core = iraps_core
+        self.p_thres = p_thres
+        self.fc_thres = fc_thres
+        self.occurrence = occurrence
+        self.discretize = discretize
+        self.memory = memory
+        self.min_signature_features = min_signature_features
+
+    def fit(self, X, y):
+        memory = check_memory(self.memory)
+        cached_fit = memory.cache(_iraps_core_fit)
+        iraps_core = clone(self.iraps_core)
+        # allow pre-fitted iraps_core here
+        if not hasattr(iraps_core, 'pvalues_'):
+            iraps_core = cached_fit(iraps_core, X, y)
+        self.iraps_core_ = iraps_core
+
+        pvalues = as_float_array(iraps_core.pvalues_, copy=True)
+        # why np.nan is here?
+        pvalues[np.isnan(pvalues)] = np.finfo(pvalues.dtype).max
+
+        fold_changes = as_float_array(iraps_core.fold_changes_, copy=True)
+        fold_changes[np.isnan(fold_changes)] = 0.0
+
+        base_values = as_float_array(iraps_core.base_values_, copy=True)
+
+        p_thres = self.p_thres
+        fc_thres = self.fc_thres
+        occurrence = self.occurrence
+
+        mask_0 = np.zeros(pvalues.shape, dtype=np.int32)
+        # mark p_values less than the threashold
+        mask_0[pvalues <= p_thres] = 1
+        # mark fold_changes only when greater than the threashold
+        mask_0[abs(fold_changes) < fc_thres] = 0
+
+        # count the occurrence and mask greater than the threshold
+        counts = mask_0.sum(axis=0)
+        occurrence_thres = int(occurrence * iraps_core.n_iter)
+        mask = np.zeros(counts.shape, dtype=bool)
+        mask[counts >= occurrence_thres] = 1
+
+        # generate signature
+        fold_changes[mask_0 == 0] = 0.0
+        signature = fold_changes[:, mask].sum(axis=0) / counts[mask]
+        signature = np.vstack((signature, base_values[:, mask].mean(axis=0)))
+        # It's not clearn whether min_size could impact prediction
+        # performance
+        if signature is None\
+                or signature.shape[1] < self.min_signature_features:
+            raise ValueError("The classifier got None signature or the number "
+                             "of sinature feature is less than minimum!")
+
+        self.signature_ = np.asarray(signature)
+        self.mask_ = mask
+        # TODO: support other discretize method: fixed value, upper
+        # third quater, etc.
+        self.discretize_value = y.mean() + y.std() * self.discretize
+        if iraps_core.negative_thres > iraps_core.positive_thres:
+            self.less_is_positive = True
+        else:
+            self.less_is_positive = False
+
+        return self
+
+    def _get_support_mask(self):
+        """
+        return mask of feature selection indices
+        """
+        check_is_fitted(self, 'mask_')
+
+        return self.mask_
+
+    def get_signature(self):
+        """
+        return signature
+        """
+        check_is_fitted(self, 'signature_')
+
+        return self.signature_
+
+    def predict(self, X):
+        """
+        compute the correlation coefficient with irpas signature
+        """
+        signature = self.get_signature()
+
+        X = as_float_array(X)
+        X_transformed = self.transform(X) - signature[1]
+        corrcoef = np.array(
+            [np.corrcoef(signature[0], e)[0][1] for e in X_transformed])
+        corrcoef[np.isnan(corrcoef)] = np.finfo(np.float32).min
+
+        return corrcoef
+
+    def predict_label(self, X, clf_cutoff=0.4):
+        return self.predict(X) >= clf_cutoff
+
+
+class BinarizeTargetClassifier(BaseEstimator, RegressorMixin):
+    """
+    Convert continuous target to binary labels (True and False)
+    and apply a classification estimator.
+
+    Parameters
+    ----------
+    classifier: object
+        Estimator object such as derived from sklearn `ClassifierMixin`.
+
+    z_score: float, default=-1.0
+        Threshold value based on z_score. Will be ignored when
+        fixed_value is set
+
+    value: float, default=None
+        Threshold value
+
+    less_is_positive: boolean, default=True
+        When target is less the threshold value, it will be converted
+        to True, False otherwise.
+
+    Attributes
+    ----------
+    classifier_: object
+        Fitted classifier
+
+    discretize_value: float
+        The threshold value used to discretize True and False targets
+    """
+
+    def __init__(self, classifier, z_score=-1, value=None,
+                 less_is_positive=True):
+        self.classifier = classifier
+        self.z_score = z_score
+        self.value = value
+        self.less_is_positive = less_is_positive
+
+    def fit(self, X, y, sample_weight=None):
+        """
+        Convert y to True and False labels and then fit the classifier
+        with X and new y
+
+        Returns
+        ------
+        self: object
+        """
+        y = check_array(y, accept_sparse=False, force_all_finite=True,
+                        ensure_2d=False, dtype='numeric')
+        y = column_or_1d(y)
+
+        if self.value is None:
+            discretize_value = y.mean() + y.std() * self.z_score
+        else:
+            discretize_value = self.Value
+        self.discretize_value = discretize_value
+
+        if self.less_is_positive:
+            y_trans = y < discretize_value
+        else:
+            y_trans = y > discretize_value
+
+        self.classifier_ = clone(self.classifier)
+
+        if sample_weight is not None:
+            self.classifier_.fit(X, y_trans, sample_weight=sample_weight)
+        else:
+            self.classifier_.fit(X, y_trans)
+
+        if hasattr(self.classifier_, 'feature_importances_'):
+            self.feature_importances_ = self.classifier_.feature_importances_
+        if hasattr(self.classifier_, 'coef_'):
+            self.coef_ = self.classifier_.coef_
+        if hasattr(self.classifier_, 'n_outputs_'):
+            self.n_outputs_ = self.classifier_.n_outputs_
+        if hasattr(self.classifier_, 'n_features_'):
+            self.n_features_ = self.classifier_.n_features_
+
+        return self
+
+    def predict(self, X):
+        """
+        Predict class probabilities of X.
+        """
+        check_is_fitted(self, 'classifier_')
+        proba = self.classifier_.predict_proba(X)
+        return proba[:, 1]
+
+    def predict_label(self, X):
+        """Predict class label of X
+        """
+        check_is_fitted(self, 'classifier_')
+        return self.classifier_.predict(X)
+
+
+class _BinarizeTargetProbaScorer(_BaseScorer):
+    """
+    base class to make binarized target specific scorer
+    """
+
+    def __call__(self, clf, X, y, sample_weight=None):
+        clf_name = clf.__class__.__name__
+        # support pipeline object
+        if isinstance(clf, Pipeline):
+            main_estimator = clf.steps[-1][-1]
+        # support stacking ensemble estimators
+        # TODO support nested pipeline/stacking estimators
+        elif clf_name in ['StackingCVClassifier', 'StackingClassifier']:
+            main_estimator = clf.meta_clf_
+        elif clf_name in ['StackingCVRegressor', 'StackingRegressor']:
+            main_estimator = clf.meta_regr_
+        else:
+            main_estimator = clf
+
+        discretize_value = main_estimator.discretize_value
+        less_is_positive = main_estimator.less_is_positive
+
+        if less_is_positive:
+            y_trans = y < discretize_value
+        else:
+            y_trans = y > discretize_value
+
+        y_pred = clf.predict(X)
+        if sample_weight is not None:
+            return self._sign * self._score_func(y_trans, y_pred,
+                                                 sample_weight=sample_weight,
+                                                 **self._kwargs)
+        else:
+            return self._sign * self._score_func(y_trans, y_pred,
+                                                 **self._kwargs)
+
+
+# roc_auc
+binarize_auc_scorer =\
+        _BinarizeTargetProbaScorer(metrics.roc_auc_score, 1, {})
+
+# average_precision_scorer
+binarize_average_precision_scorer =\
+        _BinarizeTargetProbaScorer(metrics.average_precision_score, 1, {})
+
+# roc_auc_scorer
+iraps_auc_scorer = binarize_auc_scorer
+
+# average_precision_scorer
+iraps_average_precision_scorer = binarize_average_precision_scorer
+
+
+class BinarizeTargetRegressor(BaseEstimator, RegressorMixin):
+    """
+    Extend regression estimator to have discretize_value
+
+    Parameters
+    ----------
+    regressor: object
+        Estimator object such as derived from sklearn `RegressionMixin`.
+
+    z_score: float, default=-1.0
+        Threshold value based on z_score. Will be ignored when
+        fixed_value is set
+
+    value: float, default=None
+        Threshold value
+
+    less_is_positive: boolean, default=True
+        When target is less the threshold value, it will be converted
+        to True, False otherwise.
+
+    Attributes
+    ----------
+    regressor_: object
+        Fitted regressor
+
+    discretize_value: float
+        The threshold value used to discretize True and False targets
+    """
+
+    def __init__(self, regressor, z_score=-1, value=None,
+                 less_is_positive=True):
+        self.regressor = regressor
+        self.z_score = z_score
+        self.value = value
+        self.less_is_positive = less_is_positive
+
+    def fit(self, X, y, sample_weight=None):
+        """
+        Calculate the discretize_value fit the regressor with traning data
+
+        Returns
+        ------
+        self: object
+        """
+        y = check_array(y, accept_sparse=False, force_all_finite=True,
+                        ensure_2d=False, dtype='numeric')
+        y = column_or_1d(y)
+
+        if self.value is None:
+            discretize_value = y.mean() + y.std() * self.z_score
+        else:
+            discretize_value = self.Value
+        self.discretize_value = discretize_value
+
+        self.regressor_ = clone(self.regressor)
+
+        if sample_weight is not None:
+            self.regressor_.fit(X, y, sample_weight=sample_weight)
+        else:
+            self.regressor_.fit(X, y)
+
+        # attach classifier attributes
+        if hasattr(self.regressor_, 'feature_importances_'):
+            self.feature_importances_ = self.regressor_.feature_importances_
+        if hasattr(self.regressor_, 'coef_'):
+            self.coef_ = self.regressor_.coef_
+        if hasattr(self.regressor_, 'n_outputs_'):
+            self.n_outputs_ = self.regressor_.n_outputs_
+        if hasattr(self.regressor_, 'n_features_'):
+            self.n_features_ = self.regressor_.n_features_
+
+        return self
+
+    def predict(self, X):
+        """Predict target value of X
+        """
+        check_is_fitted(self, 'regressor_')
+        y_pred = self.regressor_.predict(X)
+        if not np.all((y_pred >= 0) & (y_pred <= 1)):
+            y_pred = (y_pred - y_pred.min()) / (y_pred.max() - y_pred.min())
+        if self.less_is_positive:
+            y_pred = 1 - y_pred
+        return y_pred
+
+
+# roc_auc_scorer
+regression_auc_scorer = binarize_auc_scorer
+
+# average_precision_scorer
+regression_average_precision_scorer = binarize_average_precision_scorer
--- a/main_macros.xml	Sun Dec 30 01:51:27 2018 -0500
+++ b/main_macros.xml	Tue May 14 18:05:43 2019 -0400
@@ -1,14 +1,17 @@
 <macros>
-  <token name="@VERSION@">1.0</token>
+  <token name="@VERSION@">1.0.0.4</token>

   <xml name="python_requirements">
       <requirements>
           <requirement type="package" version="3.6">python</requirement>
-          <requirement type="package" version="0.20.2">scikit-learn</requirement>
-          <requirement type="package" version="0.23.4">pandas</requirement>
+          <requirement type="package" version="0.20.3">scikit-learn</requirement>
+          <requirement type="package" version="0.24.2">pandas</requirement>
           <requirement type="package" version="0.80">xgboost</requirement>
           <requirement type="package" version="0.9.13">asteval</requirement>
-          <yield />
+          <requirement type="package" version="0.6">skrebate</requirement>
+          <requirement type="package" version="0.4.2">imbalanced-learn</requirement>
+          <requirement type="package" version="0.16.0">mlxtend</requirement>
+          <yield/>
       </requirements>
   </xml>

@@ -352,10 +355,10 @@
       <option value="all_columns">All columns</option>
     </param>
     <when value="by_index_number">
-      <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>
+      <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" use_header_names="true" data_ref="@INFILE@" label="Select target column(s):"/>
     </when>
     <when value="all_but_by_index_number">
-      <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>
+      <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" use_header_names="true" data_ref="@INFILE@" label="Select target column(s):"/>
     </when>
     <when value="by_header_name">
       <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2"/>
@@ -428,7 +431,7 @@
           <option value="sparse">sparse matrix</option>
       </param>
       <when value="tabular">
-          <expand macro="samples_tabular" multiple1="true"/>
+          <expand macro="samples_tabular" multiple1="true" multiple2="false"/>
       </when>
       <when value="sparse">
           <expand macro="sparse_target"/>
@@ -823,6 +826,8 @@
     <option value="StratifiedShuffleSplit">StratifiedShuffleSplit</option>
     <option value="TimeSeriesSplit">TimeSeriesSplit</option>
     <option value="PredefinedSplit">PredefinedSplit</option>
+    <option value="OrderedKFold">OrderedKFold</option>
+    <option value="RepeatedOrderedKFold">RepeatedOrderedKFold</option>
     <yield/>
   </xml>

@@ -872,6 +877,16 @@
     <when value="PredefinedSplit">
       <param argument="test_fold" type="text" value="" area="true" label="test_fold" help="List, e.g., [0, 1, -1, 1], represents two test sets, [X[0]] and [X[1], X[3]], X[2] is excluded from any test set due to '-1'."/>
     </when>
+    <when value="OrderedKFold">
+      <expand macro="cv_n_splits"/>
+      <expand macro="cv_shuffle"/>
+      <expand macro="random_state"/>
+    </when>
+    <when value="RepeatedOrderedKFold">
+      <expand macro="cv_n_splits"/>
+      <param argument="n_repeats" type="integer" value="5"/>
+      <expand macro="random_state"/>
+    </when>
     <yield/>
   </xml>

@@ -929,7 +944,13 @@
   </xml>

   <xml name="cv_groups" >
-    <param argument="groups" type="text" value="" area="true" label="Groups" help="Group lables in a list. e.g., [1, 1, 2, 2, 3, 3, 3]"/>
+    <section name="groups_selector" title="Groups column selector" expanded="true">
+      <param name="infile_g" type="data" format="tabular" label="Choose dataset containing groups info:"/>
+      <param name="header_g" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
+      <conditional name="column_selector_options_g">
+        <expand macro="samples_column_selector_options" column_option="selected_column_selector_option_g" col_name="col_g" multiple="False" infile="infile_g"/>
+      </conditional>
+    </section>
   </xml>

   <xml name="feature_selection_algorithms">
@@ -943,6 +964,7 @@
     <option value="SelectFromModel">SelectFromModel - Meta-transformer for selecting features based on importance weights</option>
     <option value="RFE">RFE - Feature ranking with recursive feature elimination</option>
     <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option>
+    <yield/>
   </xml>

   <xml name="feature_selection_algorithm_details">
@@ -991,7 +1013,7 @@
     </when>
     <when value="VarianceThreshold">
       <section name="options" title="Options" expanded="False">
-        <param argument="threshold" type="float" value="" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/>
+        <param argument="threshold" type="float" value="0.0" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/>
       </section>
     </when>
   </xml>
@@ -1047,13 +1069,47 @@
     </when>
   </xml>

-  <xml name="feature_selection_RFECV">
+  <xml name="feature_selection_RFECV_fs">
+    <when value="RFECV">
+      <yield/>
+      <section name="options" title="Advanced Options" expanded="False">
+        <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
+        <param argument="min_features_to_select" type="integer" value="1" optional="true" label="The minimum number of features to be selected"/>
+        <expand macro="cv"/>
+        <expand macro="scoring_selection"/>
+        <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
+      </section>
+    </when>
+  </xml>
+
+  <xml name="feature_selection_RFECV_pipeline">
     <when value="RFECV">
       <yield/>
       <section name="options" title="Advanced Options" expanded="False">
         <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
         <param argument="min_features_to_select" type="integer" value="1" optional="true" label="The minimum number of features to be selected"/>
         <expand macro="cv_reduced"/>
+        <!-- TODO: group splitter support-->
+        <expand macro="scoring_selection"/>
+        <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
+      </section>
+    </when>
+  </xml>
+
+  <xml name="feature_selection_DyRFECV_fs">
+    <when value="DyRFECV">
+      <yield/>
+      <section name="options" title="Advanced Options" expanded="False">
+        <param argument="step" type="text" size="30" value="1" label="step" optional="true" help="Default = 1. Support float, int and list." >
+          <sanitizer>
+            <valid initial="default">
+              <add value="["/>
+              <add value="]"/>
+            </valid>
+          </sanitizer>
+        </param>
+        <param argument="min_features_to_select" type="integer" value="1" optional="true" label="The minimum number of features to be selected"/>
+        <expand macro="cv"/>
         <expand macro="scoring_selection"/>
         <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
       </section>
@@ -1061,7 +1117,7 @@
   </xml>

   <xml name="feature_selection_pipeline">
-    <!--compare to `feature_selection_fs`, no fitted estimator for SelectFromModel and no customer estimator for RFE and RFECV-->
+    <!--compare to `feature_selection_fs`, no fitted estimator for SelectFromModel and no custom estimator for RFE and RFECV-->
     <conditional name="fs_algorithm_selector">
       <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">
         <expand macro="feature_selection_algorithms"/>
@@ -1071,23 +1127,29 @@
       <expand macro="feature_selection_RFE">
         <expand macro="estimator_selector_all"/>
       </expand>
-      <expand macro="feature_selection_RFECV">
+      <expand macro="feature_selection_RFECV_pipeline">
         <expand macro="estimator_selector_all"/>
       </expand>
+      <!-- TODO: add DyRFECV to pipeline-->
     </conditional>
   </xml>

   <xml name="feature_selection_fs">
     <conditional name="fs_algorithm_selector">
       <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">
-        <expand macro="feature_selection_algorithms"/>
+        <expand macro="feature_selection_algorithms">
+          <option value="DyRFECV">DyRFECV - Extended RFECV with changeable steps</option>
+        </expand>
       </param>
       <expand macro="feature_selection_algorithm_details"/>
       <expand macro="feature_selection_SelectFromModel"/>
       <expand macro="feature_selection_RFE">
         <expand macro="estimator_selector_fs"/>
       </expand>
-      <expand macro="feature_selection_RFECV">
+      <expand macro="feature_selection_RFECV_fs">
+        <expand macro="estimator_selector_fs"/>
+      </expand>
+      <expand macro="feature_selection_DyRFECV_fs">
         <expand macro="estimator_selector_fs"/>
       </expand>
     </conditional>
@@ -1105,7 +1167,7 @@

   <xml name="model_validation_common_options">
     <expand macro="cv"/>
-    <expand macro="verbose"/>
+    <!-- expand macro="verbose"/> -->
     <yield/>
   </xml>

@@ -1139,6 +1201,8 @@
         <option value="neg_mean_squared_log_error">Regression -- 'neg_mean_squared_log_error'</option>
         <option value="neg_median_absolute_error">Regression -- 'neg_median_absolute_error'</option>
         <option value="r2">Regression -- 'r2'</option>
+        <option value="binarize_auc_scorer">anomaly detection -- binarize_auc_scorer</option>
+        <option value="binarize_average_precision_scorer">anomaly detection -- binarize_average_precision_scorer</option>
       </param>
       <when value="default"/>
       <when value="accuracy"><expand macro="secondary_scoring_selection_classification"/></when>
@@ -1167,6 +1231,8 @@
       <when value="neg_mean_squared_log_error"><expand macro="secondary_scoring_selection_regression"/></when>
       <when value="neg_median_absolute_error"><expand macro="secondary_scoring_selection_regression"/></when>
       <when value="r2"><expand macro="secondary_scoring_selection_regression"/></when>
+      <when value="binarize_auc_scorer"><expand macro="secondary_scoring_selection_anormaly"/></when>
+      <when value="binarize_average_precision_scorer"><expand macro="secondary_scoring_selection_anormaly"/></when>
     </conditional>
   </xml>

@@ -1206,63 +1272,48 @@
     </param>
   </xml>

+  <xml name="secondary_scoring_selection_anormaly">
+    <param name="secondary_scoring" type="select" multiple="true" label="Additional scoring used in multi-metric mode:" help="If the same metric with the primary is chosen, the metric will be ignored.">
+      <option value="binarize_auc_scorer">anomaly detection -- binarize_auc_scorer</option>
+      <option value="binarize_average_precision_scorer">anomaly detection -- binarize_average_precision_scorer</option>
+    </param>
+  </xml>
+
   <xml name="pre_dispatch" token_type="hidden" token_default_value="all" token_help="Number of predispatched jobs for parallel execution">
     <param argument="pre_dispatch" type="@TYPE@" value="@DEFAULT_VALUE@" optional="true" label="pre_dispatch" help="@HELP@"/>
   </xml>

   <xml name="search_cv_estimator">
-    <param name="infile_pipeline" type="data" format="zip" label="Choose the dataset containing pipeline object:"/>
+    <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/>
     <section name="search_params_builder" title="Search parameters Builder" expanded="true">
-      <repeat name="param_set" min="1" max="20" title="Parameter setting for search:">
-        <conditional name="search_param_selector">
-          <param name="selected_param_type" type="select" label="Choose the transformation the parameter belongs to">
-            <option value="final_estimator_p" selected="true">Final estimator</option>
-            <option value="prep_1_p">Pre-processing step #1</option>
-            <option value="prep_2_p">Pre-processing step #2</option>
-            <option value="prep_3_p">Pre-processing step #3</option>
-            <option value="prep_4_p">Pre-processing step #4</option>
-            <option value="prep_5_p">Pre-processing step #5</option>
+      <param name="infile_params" type="data" format="tabular" label="Choose the dataset containing parameter names"/>
+      <repeat name="param_set" min="1" max="30" title="Parameter settings for search:">
+          <param name="sp_name" type="select" label="Choose a parameter name (with current value)">
+            <options from_dataset="infile_params" startswith="@">
+              <column name="name" index="2"/>
+              <column name="value" index="1"/>
+              <filter type="unique_value" name="unique_param" column="1"/>
+              <filter type="sort_by" name="sorted_param" column="2"/>
+            </options>
           </param>
-          <when value="final_estimator_p">
-            <expand macro="search_param_input" />
-          </when>
-          <when value="prep_1_p">
-            <expand macro="search_param_input" label="Pre_processing component #1  parameter:" help="One parameter per box. For example: with_centering: [True, False]."/>
-          </when>
-          <when value="prep_2_p">
-            <expand macro="search_param_input" label="Pre_processing component #2 parameter:" help="One parameter per box. For example: k: [3, 5, 7, 9]. See bottom for more examples"/>
-          </when>
-          <when value="prep_3_p">
-            <expand macro="search_param_input" label="Pre_processing component #3 parameter:" help="One parameter per box. For example: n_components: [1, 10, 100, 1000]. See bottom for more examples"/>
-          </when>
-          <when value="prep_4_p">
-            <expand macro="search_param_input" label="Pre_processing component #4 parameter:" help="One parameter per box. For example: n_components: [1, 10, 100, 1000]. See bottom for more examples"/>
-          </when>
-          <when value="prep_5_p">
-            <expand macro="search_param_input" label="Pre_processing component #5 parameter:" help="One parameter per box. For example: affinity: ['euclidean', 'l1', 'l2', 'manhattan']. See bottom for more examples"/>
-          </when>
-        </conditional>
+          <param name="sp_list" type="text" value="" optional="true" label="Search list" help="list or array-like, for example: [1, 10, 100, 1000], [True, False] and ['auto', 'sqrt', None]. See `help` section for more examples">
+            <sanitizer>
+              <valid initial="default">
+                <add value="&apos;"/>
+                <add value="&quot;"/>
+                <add value="["/>
+                <add value="]"/>
+              </valid>
+            </sanitizer>
+          </param>
       </repeat>
     </section>
   </xml>

-  <xml name="search_param_input" token_label="Estimator parameter:" token_help="One parameter per box. For example: C: [1, 10, 100, 1000]. See bottom for more examples">
-    <param name="search_p" type="text" value="" optional="true" label="@LABEL@" help="@HELP@">
-      <sanitizer>
-        <valid initial="default">
-          <add value="&apos;"/>
-          <add value="&quot;"/>
-          <add value="["/>
-          <add value="]"/>
-        </valid>
-      </sanitizer>
-    </param>
-  </xml>
-
   <xml name="search_cv_options">
       <expand macro="scoring_selection"/>
       <expand macro="model_validation_common_options"/>
-      <expand macro="pre_dispatch" value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/>
+      <!--expand macro="pre_dispatch" default_value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/-->
       <param argument="iid" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="iid" help="If True, data is identically distributed across the folds"/>
       <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset."/>
       <param argument="error_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Raise fit error:" help="If false, the metric score is assigned to NaN if an error occurs in estimator fitting and FitFailedWarning is raised."/>
@@ -1403,12 +1454,12 @@
     <conditional name="estimator_selector">
       <param name="selected_module" type="select" label="Choose the module that contains target estimator:" >
         <expand macro="estimator_module_options">
-            <option value="customer_estimator">Load a customer estimator</option>
+            <option value="custom_estimator">Load a custom estimator</option>
         </expand>
       </param>
       <expand macro="estimator_suboptions">
-        <when value="customer_estimator">
-            <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the customer estimator or pipeline:"/>
+        <when value="custom_estimator">
+            <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the custom estimator or pipeline:"/>
         </when>
       </expand>
     </conditional>
@@ -1591,6 +1642,7 @@
         <option value="over_sampling.SMOTENC">over_sampling.SMOTENC</option>
         <option value="combine.SMOTEENN">combine.SMOTEENN</option>
         <option value="combine.SMOTETomek">combine.SMOTETomek</option>
+        <option value="Z_RandomOverSampler">Z_RandomOverSampler - for regression</option>
       </param>
       <when value="under_sampling.ClusterCentroids">
         <expand macro="estimator_params_text"
@@ -1668,6 +1720,33 @@
         <expand macro="estimator_params_text"
               help="Default(=blank): sampling_strategy='auto', random_state=None, smote=None, tomek=None."/>
       </when>
+      <when value="Z_RandomOverSampler">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', random_state=None, negative_thres=0, positive_thres=-1."/>
+      </when>
+    </conditional>
+  </xml>
+
+  <xml name="stacking_ensemble_inputs">
+    <section name="options" title="Advanced Options" expanded="false">
+        <yield/>
+        <param argument="use_features_in_secondary" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/>
+        <param argument="store_train_meta_features" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false"/>
+    </section>
+  </xml>
+
+  <xml name="stacking_base_estimator">
+    <conditional name="estimator_selector">
+        <param name="selected_module" type="select" label="Choose the module that contains target estimator:" >
+            <expand macro="estimator_module_options">
+                <option value="custom_estimator">Load a custom estimator</option>
+            </expand>
+        </param>
+        <expand macro="estimator_suboptions">
+            <when value="custom_estimator">
+                <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the custom estimator or pipeline"/>
+            </when>
+        </expand>
     </conditional>
   </xml>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/model_validations.py	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,252 @@
+"""
+class
+-----
+OrderedKFold
+RepeatedOrderedKold
+
+
+function
+--------
+train_test_split
+"""
+
+import numpy as np
+import warnings
+
+from itertools import chain
+from math import ceil, floor
+from sklearn.model_selection import (GroupShuffleSplit, ShuffleSplit,
+                                     StratifiedShuffleSplit)
+from sklearn.model_selection._split import _BaseKFold, _RepeatedSplits
+from sklearn.utils import check_random_state, indexable, safe_indexing
+from sklearn.utils.validation import _num_samples, check_array
+
+
+def _validate_shuffle_split(n_samples, test_size, train_size,
+                            default_test_size=None):
+    """
+    Validation helper to check if the test/test sizes are meaningful wrt to the
+    size of the data (n_samples)
+    """
+    if test_size is None and train_size is None:
+        test_size = default_test_size
+
+    test_size_type = np.asarray(test_size).dtype.kind
+    train_size_type = np.asarray(train_size).dtype.kind
+
+    if (test_size_type == 'i' and (test_size >= n_samples or test_size <= 0)
+       or test_size_type == 'f' and (test_size <= 0 or test_size >= 1)):
+        raise ValueError('test_size={0} should be either positive and smaller'
+                         ' than the number of samples {1} or a float in the '
+                         '(0, 1) range'.format(test_size, n_samples))
+
+    if (train_size_type == 'i' and (train_size >= n_samples or train_size <= 0)
+       or train_size_type == 'f' and (train_size <= 0 or train_size >= 1)):
+        raise ValueError('train_size={0} should be either positive and smaller'
+                         ' than the number of samples {1} or a float in the '
+                         '(0, 1) range'.format(train_size, n_samples))
+
+    if train_size is not None and train_size_type not in ('i', 'f'):
+        raise ValueError("Invalid value for train_size: {}".format(train_size))
+    if test_size is not None and test_size_type not in ('i', 'f'):
+        raise ValueError("Invalid value for test_size: {}".format(test_size))
+
+    if (train_size_type == 'f' and test_size_type == 'f' and
+            train_size + test_size > 1):
+        raise ValueError(
+            'The sum of test_size and train_size = {}, should be in the (0, 1)'
+            ' range. Reduce test_size and/or train_size.'
+            .format(train_size + test_size))
+
+    if test_size_type == 'f':
+        n_test = ceil(test_size * n_samples)
+    elif test_size_type == 'i':
+        n_test = float(test_size)
+
+    if train_size_type == 'f':
+        n_train = floor(train_size * n_samples)
+    elif train_size_type == 'i':
+        n_train = float(train_size)
+
+    if train_size is None:
+        n_train = n_samples - n_test
+    elif test_size is None:
+        n_test = n_samples - n_train
+
+    if n_train + n_test > n_samples:
+        raise ValueError('The sum of train_size and test_size = %d, '
+                         'should be smaller than the number of '
+                         'samples %d. Reduce test_size and/or '
+                         'train_size.' % (n_train + n_test, n_samples))
+
+    n_train, n_test = int(n_train), int(n_test)
+
+    if n_train == 0:
+        raise ValueError(
+            'With n_samples={}, test_size={} and train_size={}, the '
+            'resulting train set will be empty. Adjust any of the '
+            'aforementioned parameters.'.format(n_samples, test_size,
+                                                train_size)
+        )
+
+    return n_train, n_test
+
+
+def train_test_split(*arrays, **options):
+    """Extend sklearn.model_selection.train_test_slit to have group split.
+
+    Parameters
+    ----------
+    *arrays : sequence of indexables with same length / shape[0]
+        Allowed inputs are lists, numpy arrays, scipy-sparse
+        matrices or pandas dataframes.
+
+    test_size : float, int or None, optional (default=None)
+        If float, should be between 0.0 and 1.0 and represent the proportion
+        of the dataset to include in the test split. If int, represents the
+        absolute number of test samples. If None, the value is set to the
+        complement of the train size. If ``train_size`` is also None, it will
+        be set to 0.25.
+
+    train_size : float, int, or None, (default=None)
+        If float, should be between 0.0 and 1.0 and represent the
+        proportion of the dataset to include in the train split. If
+        int, represents the absolute number of train samples. If None,
+        the value is automatically set to the complement of the test size.
+
+    random_state : int, RandomState instance or None, optional (default=None)
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`.
+
+    shuffle : None or str (default='simple')
+        How to shuffle the data before splitting.
+        None, no shuffle.
+        For str, one of 'simple', 'stratified' and 'group', corresponding to
+        `ShuffleSplit`, `StratifiedShuffleSplit` and `GroupShuffleSplit`,
+        respectively.
+
+    labels : array-like or None (default=None)
+        Ignored if shuffle is None or 'simple'.
+        When shuffle='stratified', this array is used as class labels.
+        When shuffle='group', this array is used as groups.
+
+    Returns
+    -------
+    splitting : list, length=2 * len(arrays)
+        List containing train-test split of inputs.
+
+    """
+    n_arrays = len(arrays)
+    if n_arrays == 0:
+        raise ValueError("At least one array required as input")
+    test_size = options.pop('test_size', None)
+    train_size = options.pop('train_size', None)
+    random_state = options.pop('random_state', None)
+    shuffle = options.pop('shuffle', 'simple')
+    labels = options.pop('labels', None)
+
+    if options:
+        raise TypeError("Invalid parameters passed: %s" % str(options))
+
+    arrays = indexable(*arrays)
+
+    n_samples = _num_samples(arrays[0])
+    if shuffle == 'group':
+        if labels is None:
+            raise ValueError("When shuffle='group', "
+                             "labels should not be None!")
+        labels = check_array(labels, ensure_2d=False, dtype=None)
+        uniques = np.unique(labels)
+        n_samples = uniques.size
+
+    n_train, n_test = _validate_shuffle_split(n_samples, test_size, train_size,
+                                              default_test_size=0.25)
+
+    shuffle_options = dict(test_size=n_test,
+                           train_size=n_train,
+                           random_state=random_state)
+
+    if shuffle is None:
+        if labels is not None:
+            warnings.warn("The `labels` is ignored for "
+                          "shuffle being None!")
+
+        train = np.arange(n_train)
+        test = np.arange(n_train, n_train + n_test)
+
+    elif shuffle == 'simple':
+        if labels is not None:
+            warnings.warn("The `labels` is not needed and therefore "
+                          "ignored for ShuffleSplit, as shuffle='simple'!")
+
+        cv = ShuffleSplit(**shuffle_options)
+        train, test = next(cv.split(X=arrays[0], y=None))
+
+    elif shuffle == 'stratified':
+        cv = StratifiedShuffleSplit(**shuffle_options)
+        train, test = next(cv.split(X=arrays[0], y=labels))
+
+    elif shuffle == 'group':
+        cv = GroupShuffleSplit(**shuffle_options)
+        train, test = next(cv.split(X=arrays[0], y=None, groups=labels))
+
+    else:
+        raise ValueError("The argument `shuffle` only supports None, "
+                         "'simple', 'stratified' and 'group', but got `%s`!"
+                         % shuffle)
+
+    return list(chain.from_iterable((safe_indexing(a, train),
+                                    safe_indexing(a, test)) for a in arrays))
+
+
+class OrderedKFold(_BaseKFold):
+    """
+    Split into K fold based on ordered target value
+
+    Parameters
+    ----------
+    n_splits : int, default=3
+        Number of folds. Must be at least 2.
+    shuffle: bool
+    random_state: None or int
+    """
+
+    def __init__(self, n_splits=3, shuffle=False, random_state=None):
+        super(OrderedKFold, self).__init__(n_splits, shuffle, random_state)
+
+    def _iter_test_indices(self, X, y, groups=None):
+        n_samples = _num_samples(X)
+        n_splits = self.n_splits
+        y = np.asarray(y)
+        sorted_index = np.argsort(y)
+        if self.shuffle:
+            current = 0
+            rng = check_random_state(self.random_state)
+            for i in range(n_samples // int(n_splits)):
+                start, stop = current, current + n_splits
+                rng.shuffle(sorted_index[start:stop])
+                current = stop
+            rng.shuffle(sorted_index[current:])
+
+        for i in range(n_splits):
+            yield sorted_index[i:n_samples:n_splits]
+
+
+class RepeatedOrderedKFold(_RepeatedSplits):
+    """ Repeated OrderedKFold runs mutiple times with different randomization.
+
+    Parameters
+    ----------
+    n_splits : int, default=5
+        Number of folds. Must be at least 2.
+
+    n_repeats : int, default=5
+        Number of times cross-validator to be repeated.
+
+    random_state: int, RandomState instance or None. Optional
+    """
+    def __init__(self, n_splits=5, n_repeats=5, random_state=None):
+        super(RepeatedOrderedKFold, self).__init__(
+            OrderedKFold, n_repeats, random_state, n_splits=n_splits)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pk_whitelist.json	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,768 @@
+{ "SK_NAMES": [
+    "sklearn._ASSUME_FINITE", "sklearn._isotonic._inplace_contiguous_isotonic_regression",
+    "sklearn._isotonic._make_unique", "sklearn.base.BaseEstimator",
+    "sklearn.base.BiclusterMixin", "sklearn.base.ClassifierMixin",
+    "sklearn.base.ClusterMixin", "sklearn.base.DensityMixin",
+    "sklearn.base.MetaEstimatorMixin", "sklearn.base.RegressorMixin",
+    "sklearn.base.TransformerMixin", "sklearn.base._first_and_last_element",
+    "sklearn.base._pprint", "sklearn.base.clone",
+    "sklearn.base.is_classifier", "sklearn.base.is_regressor",
+    "sklearn.clone", "sklearn.cluster.AffinityPropagation",
+    "sklearn.cluster.AgglomerativeClustering", "sklearn.cluster.Birch",
+    "sklearn.cluster.DBSCAN", "sklearn.cluster.FeatureAgglomeration",
+    "sklearn.cluster.KMeans", "sklearn.cluster.MeanShift",
+    "sklearn.cluster.MiniBatchKMeans", "sklearn.cluster.SpectralBiclustering",
+    "sklearn.cluster.SpectralClustering", "sklearn.cluster.SpectralCoclustering",
+    "sklearn.cluster._dbscan_inner.dbscan_inner", "sklearn.cluster._feature_agglomeration.AgglomerationTransform",
+    "sklearn.cluster._hierarchical.WeightedEdge", "sklearn.cluster._hierarchical._get_parents",
+    "sklearn.cluster._hierarchical._hc_get_descendent", "sklearn.cluster._hierarchical.average_merge",
+    "sklearn.cluster._hierarchical.compute_ward_dist", "sklearn.cluster._hierarchical.hc_get_heads",
+    "sklearn.cluster._hierarchical.max_merge", "sklearn.cluster._k_means._assign_labels_array",
+    "sklearn.cluster._k_means._assign_labels_csr", "sklearn.cluster._k_means._centers_dense",
+    "sklearn.cluster._k_means._centers_sparse", "sklearn.cluster._k_means._mini_batch_update_csr",
+    "sklearn.cluster._k_means_elkan.k_means_elkan", "sklearn.cluster.affinity_propagation",
+    "sklearn.cluster.affinity_propagation_.AffinityPropagation", "sklearn.cluster.affinity_propagation_.affinity_propagation",
+    "sklearn.cluster.bicluster.BaseSpectral", "sklearn.cluster.bicluster.SpectralBiclustering",
+    "sklearn.cluster.bicluster.SpectralCoclustering", "sklearn.cluster.bicluster._bistochastic_normalize",
+    "sklearn.cluster.bicluster._log_normalize", "sklearn.cluster.bicluster._scale_normalize",
+    "sklearn.cluster.birch.Birch", "sklearn.cluster.birch._CFNode",
+    "sklearn.cluster.birch._CFSubcluster", "sklearn.cluster.birch._iterate_sparse_X",
+    "sklearn.cluster.birch._split_node", "sklearn.cluster.dbscan",
+    "sklearn.cluster.dbscan_.DBSCAN", "sklearn.cluster.dbscan_.dbscan",
+    "sklearn.cluster.estimate_bandwidth", "sklearn.cluster.get_bin_seeds",
+    "sklearn.cluster.hierarchical.AgglomerativeClustering", "sklearn.cluster.hierarchical.FeatureAgglomeration",
+    "sklearn.cluster.hierarchical._TREE_BUILDERS", "sklearn.cluster.hierarchical._average_linkage",
+    "sklearn.cluster.hierarchical._complete_linkage", "sklearn.cluster.hierarchical._fix_connectivity",
+    "sklearn.cluster.hierarchical._hc_cut", "sklearn.cluster.hierarchical.linkage_tree",
+    "sklearn.cluster.hierarchical.ward_tree", "sklearn.cluster.k_means",
+    "sklearn.cluster.k_means_.FLOAT_DTYPES", "sklearn.cluster.k_means_.KMeans",
+    "sklearn.cluster.k_means_.MiniBatchKMeans", "sklearn.cluster.k_means_._init_centroids",
+    "sklearn.cluster.k_means_._k_init", "sklearn.cluster.k_means_._kmeans_single_elkan",
+    "sklearn.cluster.k_means_._kmeans_single_lloyd", "sklearn.cluster.k_means_._labels_inertia",
+    "sklearn.cluster.k_means_._labels_inertia_precompute_dense", "sklearn.cluster.k_means_._mini_batch_convergence",
+    "sklearn.cluster.k_means_._mini_batch_step", "sklearn.cluster.k_means_._tolerance",
+    "sklearn.cluster.k_means_._validate_center_shape", "sklearn.cluster.k_means_.k_means",
+    "sklearn.cluster.k_means_.string_types", "sklearn.cluster.linkage_tree",
+    "sklearn.cluster.mean_shift", "sklearn.cluster.mean_shift_.MeanShift",
+    "sklearn.cluster.mean_shift_._mean_shift_single_seed", "sklearn.cluster.mean_shift_.estimate_bandwidth",
+    "sklearn.cluster.mean_shift_.get_bin_seeds", "sklearn.cluster.mean_shift_.mean_shift",
+    "sklearn.cluster.spectral.SpectralClustering", "sklearn.cluster.spectral.discretize",
+    "sklearn.cluster.spectral.spectral_clustering", "sklearn.cluster.spectral_clustering",
+    "sklearn.cluster.ward_tree", "sklearn.config_context", "sklearn.compose.TransformedTargetRegressor",
+    "sklearn.compose._target.TransformedTargetRegressor", "sklearn.compose.ColumnTransformer",
+    "sklearn.compose._column_transformer.ColumnTransformer", "sklearn.compose.make_column_transformer",
+    "sklearn.compose._column_transformer.make_column_transformer",
+    "sklearn.covariance.EllipticEnvelope", "sklearn.covariance.EmpiricalCovariance",
+    "sklearn.covariance.GraphLasso", "sklearn.covariance.GraphLassoCV",
+    "sklearn.covariance.LedoitWolf", "sklearn.covariance.MinCovDet",
+    "sklearn.covariance.OAS", "sklearn.covariance.ShrunkCovariance",
+    "sklearn.covariance.empirical_covariance", "sklearn.covariance.empirical_covariance_.EmpiricalCovariance",
+    "sklearn.covariance.empirical_covariance_.empirical_covariance", "sklearn.covariance.empirical_covariance_.log_likelihood",
+    "sklearn.covariance.fast_mcd", "sklearn.covariance.graph_lasso",
+    "sklearn.covariance.graph_lasso_.GraphLasso", "sklearn.covariance.graph_lasso_.GraphLassoCV",
+    "sklearn.covariance.graph_lasso_._dual_gap", "sklearn.covariance.graph_lasso_._objective",
+    "sklearn.covariance.graph_lasso_.alpha_max", "sklearn.covariance.graph_lasso_.graph_lasso",
+    "sklearn.covariance.graph_lasso_.graph_lasso_path", "sklearn.covariance.ledoit_wolf",
+    "sklearn.covariance.ledoit_wolf_shrinkage", "sklearn.covariance.log_likelihood",
+    "sklearn.covariance.oas", "sklearn.covariance.outlier_detection.EllipticEnvelope",
+    "sklearn.covariance.robust_covariance.MinCovDet", "sklearn.covariance.robust_covariance._c_step",
+    "sklearn.covariance.robust_covariance.c_step", "sklearn.covariance.robust_covariance.fast_mcd",
+    "sklearn.covariance.robust_covariance.select_candidates", "sklearn.covariance.shrunk_covariance",
+    "sklearn.covariance.shrunk_covariance_.LedoitWolf", "sklearn.covariance.shrunk_covariance_.OAS",
+    "sklearn.covariance.shrunk_covariance_.ShrunkCovariance", "sklearn.covariance.shrunk_covariance_.ledoit_wolf",
+    "sklearn.covariance.shrunk_covariance_.ledoit_wolf_shrinkage", "sklearn.covariance.shrunk_covariance_.oas",
+    "sklearn.covariance.shrunk_covariance_.shrunk_covariance", "sklearn.decomposition.DictionaryLearning",
+    "sklearn.decomposition.FactorAnalysis", "sklearn.decomposition.FastICA",
+    "sklearn.decomposition.IncrementalPCA", "sklearn.decomposition.KernelPCA",
+    "sklearn.decomposition.LatentDirichletAllocation", "sklearn.decomposition.MiniBatchDictionaryLearning",
+    "sklearn.decomposition.MiniBatchSparsePCA", "sklearn.decomposition.NMF",
+    "sklearn.decomposition.PCA", "sklearn.decomposition.RandomizedPCA",
+    "sklearn.decomposition.SparseCoder", "sklearn.decomposition.SparsePCA",
+    "sklearn.decomposition.TruncatedSVD", "sklearn.decomposition._online_lda._dirichlet_expectation_1d",
+    "sklearn.decomposition._online_lda._dirichlet_expectation_2d", "sklearn.decomposition._online_lda.mean_change",
+    "sklearn.decomposition.base._BasePCA", "sklearn.decomposition.cdnmf_fast._update_cdnmf_fast",
+    "sklearn.decomposition.dict_learning", "sklearn.decomposition.dict_learning_online",
+    "sklearn.decomposition.factor_analysis.FactorAnalysis", "sklearn.decomposition.fastica",
+    "sklearn.decomposition.fastica_.FLOAT_DTYPES", "sklearn.decomposition.fastica_.FastICA",
+    "sklearn.decomposition.fastica_._cube", "sklearn.decomposition.fastica_._exp",
+    "sklearn.decomposition.fastica_._gs_decorrelation", "sklearn.decomposition.fastica_._ica_def",
+    "sklearn.decomposition.fastica_._ica_par", "sklearn.decomposition.fastica_._logcosh",
+    "sklearn.decomposition.fastica_._sym_decorrelation", "sklearn.decomposition.fastica_.fastica",
+    "sklearn.decomposition.fastica_.string_types", "sklearn.decomposition.incremental_pca.IncrementalPCA",
+    "sklearn.decomposition.kernel_pca.KernelPCA", "sklearn.decomposition.nmf.EPSILON",
+    "sklearn.decomposition.nmf.INTEGER_TYPES", "sklearn.decomposition.nmf.NMF",
+    "sklearn.decomposition.nmf._beta_divergence", "sklearn.decomposition.nmf._beta_loss_to_float",
+    "sklearn.decomposition.nmf._check_init", "sklearn.decomposition.nmf._check_string_param",
+    "sklearn.decomposition.nmf._compute_regularization", "sklearn.decomposition.nmf._fit_coordinate_descent",
+    "sklearn.decomposition.nmf._fit_multiplicative_update", "sklearn.decomposition.nmf._initialize_nmf",
+    "sklearn.decomposition.nmf._multiplicative_update_h", "sklearn.decomposition.nmf._multiplicative_update_w",
+    "sklearn.decomposition.nmf._special_sparse_dot", "sklearn.decomposition.nmf._update_coordinate_descent",
+    "sklearn.decomposition.nmf.non_negative_factorization", "sklearn.decomposition.nmf.norm",
+    "sklearn.decomposition.nmf.trace_dot", "sklearn.decomposition.non_negative_factorization",
+    "sklearn.decomposition.online_lda.EPS", "sklearn.decomposition.online_lda.LatentDirichletAllocation",
+    "sklearn.decomposition.online_lda._update_doc_distribution", "sklearn.decomposition.online_lda.gammaln",
+    "sklearn.decomposition.pca.PCA", "sklearn.decomposition.pca.RandomizedPCA",
+    "sklearn.decomposition.pca._assess_dimension_", "sklearn.decomposition.pca._infer_dimension_",
+    "sklearn.decomposition.pca.gammaln", "sklearn.decomposition.sparse_encode",
+    "sklearn.decomposition.sparse_pca.MiniBatchSparsePCA", "sklearn.decomposition.sparse_pca.SparsePCA",
+    "sklearn.decomposition.truncated_svd.TruncatedSVD", "sklearn.discriminant_analysis.LinearDiscriminantAnalysis",
+    "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis", "sklearn.discriminant_analysis._class_cov",
+    "sklearn.discriminant_analysis._class_means", "sklearn.discriminant_analysis._cov",
+    "sklearn.discriminant_analysis.string_types", "sklearn.ensemble.AdaBoostClassifier",
+    "sklearn.ensemble.AdaBoostRegressor", "sklearn.ensemble.BaggingClassifier",
+    "sklearn.ensemble.BaggingRegressor", "sklearn.ensemble.BaseEnsemble",
+    "sklearn.ensemble.ExtraTreesClassifier", "sklearn.ensemble.ExtraTreesRegressor",
+    "sklearn.ensemble.GradientBoostingClassifier", "sklearn.ensemble.GradientBoostingRegressor",
+    "sklearn.ensemble.IsolationForest", "sklearn.ensemble.RandomForestClassifier",
+    "sklearn.ensemble.RandomForestRegressor", "sklearn.ensemble.RandomTreesEmbedding",
+    "sklearn.ensemble.VotingClassifier", "sklearn.ensemble._gradient_boosting._partial_dependence_tree",
+    "sklearn.ensemble._gradient_boosting._predict_regression_tree_stages_sparse", "sklearn.ensemble._gradient_boosting._random_sample_mask",
+    "sklearn.ensemble._gradient_boosting.predict_stage", "sklearn.ensemble._gradient_boosting.predict_stages",
+    "sklearn.ensemble.bagging.BaggingClassifier", "sklearn.ensemble.bagging.BaggingRegressor",
+    "sklearn.ensemble.bagging.BaseBagging", "sklearn.ensemble.bagging.MAX_INT",
+    "sklearn.ensemble.bagging._generate_bagging_indices", "sklearn.ensemble.bagging._generate_indices",
+    "sklearn.ensemble.bagging._parallel_build_estimators", "sklearn.ensemble.bagging._parallel_decision_function",
+    "sklearn.ensemble.bagging._parallel_predict_log_proba", "sklearn.ensemble.bagging._parallel_predict_proba",
+    "sklearn.ensemble.bagging._parallel_predict_regression", "sklearn.ensemble.base.BaseEnsemble",
+    "sklearn.ensemble.base.MAX_RAND_SEED", "sklearn.ensemble.base._partition_estimators",
+    "sklearn.ensemble.base._set_random_states", "sklearn.ensemble.forest.BaseForest",
+    "sklearn.ensemble.forest.ExtraTreesClassifier", "sklearn.ensemble.forest.ExtraTreesRegressor",
+    "sklearn.ensemble.forest.ForestClassifier", "sklearn.ensemble.forest.ForestRegressor",
+    "sklearn.ensemble.forest.MAX_INT", "sklearn.ensemble.forest.RandomForestClassifier",
+    "sklearn.ensemble.forest.RandomForestRegressor", "sklearn.ensemble.forest.RandomTreesEmbedding",
+    "sklearn.ensemble.forest._generate_sample_indices", "sklearn.ensemble.forest._generate_unsampled_indices",
+    "sklearn.ensemble.forest._parallel_build_trees", "sklearn.ensemble.forest.accumulate_prediction",
+    "sklearn.ensemble.gradient_boosting.BaseGradientBoosting", "sklearn.ensemble.gradient_boosting.BinomialDeviance",
+    "sklearn.ensemble.gradient_boosting.ClassificationLossFunction", "sklearn.ensemble.gradient_boosting.ExponentialLoss",
+    "sklearn.ensemble.gradient_boosting.GradientBoostingClassifier", "sklearn.ensemble.gradient_boosting.GradientBoostingRegressor",
+    "sklearn.ensemble.gradient_boosting.HuberLossFunction", "sklearn.ensemble.gradient_boosting.INIT_ESTIMATORS",
+    "sklearn.ensemble.gradient_boosting.LOSS_FUNCTIONS", "sklearn.ensemble.gradient_boosting.LeastAbsoluteError",
+    "sklearn.ensemble.gradient_boosting.LeastSquaresError", "sklearn.ensemble.gradient_boosting.LogOddsEstimator",
+    "sklearn.ensemble.gradient_boosting.LossFunction", "sklearn.ensemble.gradient_boosting.MeanEstimator",
+    "sklearn.ensemble.gradient_boosting.MultinomialDeviance", "sklearn.ensemble.gradient_boosting.PriorProbabilityEstimator",
+    "sklearn.ensemble.gradient_boosting.QuantileEstimator", "sklearn.ensemble.gradient_boosting.QuantileLossFunction",
+    "sklearn.ensemble.gradient_boosting.RegressionLossFunction", "sklearn.ensemble.gradient_boosting.ScaledLogOddsEstimator",
+    "sklearn.ensemble.gradient_boosting.TREE_LEAF", "sklearn.ensemble.gradient_boosting.VerboseReporter",
+    "sklearn.ensemble.gradient_boosting.ZeroEstimator", "sklearn.ensemble.gradient_boosting.expit",
+    "sklearn.ensemble.iforest.INTEGER_TYPES", "sklearn.ensemble.iforest.IsolationForest",
+    "sklearn.ensemble.iforest._average_path_length", "sklearn.ensemble.iforest.euler_gamma",
+    "sklearn.ensemble.partial_dependence._grid_from_X", "sklearn.ensemble.partial_dependence.partial_dependence",
+    "sklearn.ensemble.partial_dependence.plot_partial_dependence", "sklearn.ensemble.voting_classifier.VotingClassifier",
+    "sklearn.ensemble.voting_classifier._parallel_fit_estimator", "sklearn.ensemble.weight_boosting.AdaBoostClassifier",
+    "sklearn.ensemble.weight_boosting.AdaBoostRegressor", "sklearn.ensemble.weight_boosting.BaseWeightBoosting",
+    "sklearn.ensemble.weight_boosting._samme_proba", "sklearn.ensemble.weight_boosting.inner1d",
+    "sklearn.feature_extraction.DictVectorizer", "sklearn.feature_extraction.FeatureHasher",
+    "sklearn.feature_extraction._hashing.transform", "sklearn.feature_extraction.dict_vectorizer.DictVectorizer",
+    "sklearn.feature_extraction.dict_vectorizer._tosequence", "sklearn.feature_extraction.grid_to_graph",
+    "sklearn.feature_extraction.hashing.FeatureHasher", "sklearn.feature_extraction.hashing._iteritems",
+    "sklearn.feature_extraction.image.PatchExtractor", "sklearn.feature_extraction.image._compute_gradient_3d",
+    "sklearn.feature_extraction.image._compute_n_patches", "sklearn.feature_extraction.image._make_edges_3d",
+    "sklearn.feature_extraction.image._mask_edges_weights", "sklearn.feature_extraction.image._to_graph",
+    "sklearn.feature_extraction.image.extract_patches", "sklearn.feature_extraction.image.extract_patches_2d",
+    "sklearn.feature_extraction.image.grid_to_graph", "sklearn.feature_extraction.image.img_to_graph",
+    "sklearn.feature_extraction.image.reconstruct_from_patches_2d", "sklearn.feature_extraction.img_to_graph",
+    "sklearn.feature_extraction.stop_words.ENGLISH_STOP_WORDS", "sklearn.feature_extraction.text.CountVectorizer",
+    "sklearn.feature_extraction.text.ENGLISH_STOP_WORDS", "sklearn.feature_extraction.text.HashingVectorizer",
+    "sklearn.feature_extraction.text.TfidfTransformer", "sklearn.feature_extraction.text.TfidfVectorizer",
+    "sklearn.feature_extraction.text.VectorizerMixin", "sklearn.feature_extraction.text._check_stop_list",
+    "sklearn.feature_extraction.text._document_frequency", "sklearn.feature_extraction.text._make_int_array",
+    "sklearn.feature_extraction.text.strip_accents_ascii", "sklearn.feature_extraction.text.strip_accents_unicode",
+    "sklearn.feature_extraction.text.strip_tags", "sklearn.feature_selection.GenericUnivariateSelect",
+    "sklearn.feature_selection.RFE", "sklearn.feature_selection.RFECV",
+    "sklearn.feature_selection.SelectFdr", "sklearn.feature_selection.SelectFpr",
+    "sklearn.feature_selection.SelectFromModel", "sklearn.feature_selection.SelectFwe",
+    "sklearn.feature_selection.SelectKBest", "sklearn.feature_selection.SelectPercentile",
+    "sklearn.feature_selection.VarianceThreshold", "sklearn.feature_selection.base.SelectorMixin",
+    "sklearn.feature_selection.chi2", "sklearn.feature_selection.f_classif",
+    "sklearn.feature_selection.f_oneway", "sklearn.feature_selection.f_regression",
+    "sklearn.feature_selection.from_model.SelectFromModel", "sklearn.feature_selection.from_model._calculate_threshold",
+    "sklearn.feature_selection.from_model._get_feature_importances", "sklearn.feature_selection.mutual_info_._compute_mi",
+    "sklearn.feature_selection.mutual_info_._compute_mi_cc", "sklearn.feature_selection.mutual_info_._compute_mi_cd",
+    "sklearn.feature_selection.mutual_info_._estimate_mi", "sklearn.feature_selection.mutual_info_._iterate_columns",
+    "sklearn.feature_selection.mutual_info_.digamma", "sklearn.feature_selection.mutual_info_.mutual_info_classif",
+    "sklearn.feature_selection.mutual_info_.mutual_info_regression", "sklearn.feature_selection.mutual_info_classif",
+    "sklearn.feature_selection.mutual_info_regression", "sklearn.feature_selection.rfe.RFE",
+    "sklearn.feature_selection.rfe.RFECV", "sklearn.feature_selection.rfe._rfe_single_fit",
+    "sklearn.feature_selection.univariate_selection.GenericUnivariateSelect", "sklearn.feature_selection.univariate_selection.SelectFdr",
+    "sklearn.feature_selection.univariate_selection.SelectFpr", "sklearn.feature_selection.univariate_selection.SelectFwe",
+    "sklearn.feature_selection.univariate_selection.SelectKBest", "sklearn.feature_selection.univariate_selection.SelectPercentile",
+    "sklearn.feature_selection.univariate_selection._BaseFilter", "sklearn.feature_selection.univariate_selection._chisquare",
+    "sklearn.feature_selection.univariate_selection._clean_nans", "sklearn.feature_selection.univariate_selection.chi2",
+    "sklearn.feature_selection.univariate_selection.f_classif", "sklearn.feature_selection.univariate_selection.f_oneway",
+    "sklearn.feature_selection.univariate_selection.f_regression", "sklearn.feature_selection.variance_threshold.VarianceThreshold",
+    "sklearn.gaussian_process.GaussianProcess", "sklearn.gaussian_process.GaussianProcessClassifier",
+    "sklearn.gaussian_process.GaussianProcessRegressor", "sklearn.gaussian_process.correlation_models.absolute_exponential",
+    "sklearn.gaussian_process.correlation_models.cubic", "sklearn.gaussian_process.correlation_models.generalized_exponential",
+    "sklearn.gaussian_process.correlation_models.linear", "sklearn.gaussian_process.correlation_models.pure_nugget",
+    "sklearn.gaussian_process.correlation_models.squared_exponential", "sklearn.gaussian_process.gaussian_process.GaussianProcess",
+    "sklearn.gaussian_process.gaussian_process.MACHINE_EPSILON", "sklearn.gaussian_process.gaussian_process.l1_cross_distances",
+    "sklearn.gaussian_process.gpc.COEFS", "sklearn.gaussian_process.gpc.GaussianProcessClassifier",
+    "sklearn.gaussian_process.gpc.LAMBDAS", "sklearn.gaussian_process.gpc._BinaryGaussianProcessClassifierLaplace",
+    "sklearn.gaussian_process.gpc.erf", "sklearn.gaussian_process.gpc.expit",
+    "sklearn.gaussian_process.gpr.GaussianProcessRegressor", "sklearn.gaussian_process.kernels.CompoundKernel",
+    "sklearn.gaussian_process.kernels.ConstantKernel", "sklearn.gaussian_process.kernels.DotProduct",
+    "sklearn.gaussian_process.kernels.ExpSineSquared", "sklearn.gaussian_process.kernels.Exponentiation",
+    "sklearn.gaussian_process.kernels.Hyperparameter", "sklearn.gaussian_process.kernels.Kernel",
+    "sklearn.gaussian_process.kernels.KernelOperator", "sklearn.gaussian_process.kernels.Matern",
+    "sklearn.gaussian_process.kernels.NormalizedKernelMixin", "sklearn.gaussian_process.kernels.PairwiseKernel",
+    "sklearn.gaussian_process.kernels.Product", "sklearn.gaussian_process.kernels.RBF",
+    "sklearn.gaussian_process.kernels.RationalQuadratic", "sklearn.gaussian_process.kernels.StationaryKernelMixin",
+    "sklearn.gaussian_process.kernels.Sum", "sklearn.gaussian_process.kernels.WhiteKernel",
+    "sklearn.gaussian_process.kernels._approx_fprime", "sklearn.gaussian_process.kernels._check_length_scale",
+    "sklearn.gaussian_process.kernels.gamma", "sklearn.gaussian_process.kernels.kv",
+    "sklearn.gaussian_process.regression_models.constant", "sklearn.gaussian_process.regression_models.linear",
+    "sklearn.gaussian_process.regression_models.quadratic", "sklearn.get_config",
+    "sklearn.isotonic.IsotonicRegression", "sklearn.isotonic.check_increasing",
+    "sklearn.isotonic.isotonic_regression", "sklearn.kernel_approximation.AdditiveChi2Sampler",
+    "sklearn.kernel_approximation.KERNEL_PARAMS", "sklearn.kernel_approximation.Nystroem",
+    "sklearn.kernel_approximation.RBFSampler", "sklearn.kernel_approximation.SkewedChi2Sampler",
+    "sklearn.kernel_ridge.KernelRidge", "sklearn.linear_model.ARDRegression",
+    "sklearn.linear_model.BayesianRidge", "sklearn.linear_model.ElasticNet",
+    "sklearn.linear_model.ElasticNetCV", "sklearn.linear_model.Hinge",
+    "sklearn.linear_model.Huber", "sklearn.linear_model.HuberRegressor",
+    "sklearn.linear_model.Lars", "sklearn.linear_model.LarsCV",
+    "sklearn.linear_model.Lasso", "sklearn.linear_model.LassoCV",
+    "sklearn.linear_model.LassoLars", "sklearn.linear_model.LassoLarsCV",
+    "sklearn.linear_model.LassoLarsIC", "sklearn.linear_model.LinearRegression",
+    "sklearn.linear_model.Log", "sklearn.linear_model.LogisticRegression",
+    "sklearn.linear_model.LogisticRegressionCV", "sklearn.linear_model.ModifiedHuber",
+    "sklearn.linear_model.MultiTaskElasticNet", "sklearn.linear_model.MultiTaskElasticNetCV",
+    "sklearn.linear_model.MultiTaskLasso", "sklearn.linear_model.MultiTaskLassoCV",
+    "sklearn.linear_model.OrthogonalMatchingPursuit", "sklearn.linear_model.OrthogonalMatchingPursuitCV",
+    "sklearn.linear_model.PassiveAggressiveClassifier", "sklearn.linear_model.PassiveAggressiveRegressor",
+    "sklearn.linear_model.Perceptron", "sklearn.linear_model.RANSACRegressor",
+    "sklearn.linear_model.RandomizedLasso", "sklearn.linear_model.RandomizedLogisticRegression",
+    "sklearn.linear_model.Ridge", "sklearn.linear_model.RidgeCV",
+    "sklearn.linear_model.RidgeClassifier", "sklearn.linear_model.RidgeClassifierCV",
+    "sklearn.linear_model.SGDClassifier", "sklearn.linear_model.SGDRegressor",
+    "sklearn.linear_model.SquaredLoss", "sklearn.linear_model.TheilSenRegressor",
+    "sklearn.linear_model.base.FLOAT_DTYPES", "sklearn.linear_model.base.LinearClassifierMixin",
+    "sklearn.linear_model.base.LinearModel", "sklearn.linear_model.base.LinearRegression",
+    "sklearn.linear_model.base.SPARSE_INTERCEPT_DECAY", "sklearn.linear_model.base.SparseCoefMixin",
+    "sklearn.linear_model.base._pre_fit", "sklearn.linear_model.base._preprocess_data",
+    "sklearn.linear_model.base._rescale_data", "sklearn.linear_model.base.center_data",
+    "sklearn.linear_model.base.make_dataset", "sklearn.linear_model.base.sparse_center_data",
+    "sklearn.linear_model.bayes.ARDRegression", "sklearn.linear_model.bayes.BayesianRidge",
+    "sklearn.linear_model.cd_fast.enet_coordinate_descent", "sklearn.linear_model.cd_fast.enet_coordinate_descent_gram",
+    "sklearn.linear_model.cd_fast.enet_coordinate_descent_multi_task", "sklearn.linear_model.cd_fast.sparse_enet_coordinate_descent",
+    "sklearn.linear_model.coordinate_descent.ElasticNet", "sklearn.linear_model.coordinate_descent.ElasticNetCV",
+    "sklearn.linear_model.coordinate_descent.Lasso", "sklearn.linear_model.coordinate_descent.LassoCV",
+    "sklearn.linear_model.coordinate_descent.LinearModelCV", "sklearn.linear_model.coordinate_descent.MultiTaskElasticNet",
+    "sklearn.linear_model.coordinate_descent.MultiTaskElasticNetCV", "sklearn.linear_model.coordinate_descent.MultiTaskLasso",
+    "sklearn.linear_model.coordinate_descent.MultiTaskLassoCV", "sklearn.linear_model.coordinate_descent._alpha_grid",
+    "sklearn.linear_model.coordinate_descent._path_residuals", "sklearn.linear_model.coordinate_descent.enet_path",
+    "sklearn.linear_model.coordinate_descent.lasso_path", "sklearn.linear_model.enet_path",
+    "sklearn.linear_model.huber.HuberRegressor", "sklearn.linear_model.huber._huber_loss_and_gradient",
+    "sklearn.linear_model.lars_path", "sklearn.linear_model.lasso_path",
+    "sklearn.linear_model.lasso_stability_path", "sklearn.linear_model.least_angle.Lars",
+    "sklearn.linear_model.least_angle.LarsCV", "sklearn.linear_model.least_angle.LassoLars",
+    "sklearn.linear_model.least_angle.LassoLarsCV", "sklearn.linear_model.least_angle.LassoLarsIC",
+    "sklearn.linear_model.least_angle._check_copy_and_writeable", "sklearn.linear_model.least_angle._lars_path_residues",
+    "sklearn.linear_model.least_angle.lars_path", "sklearn.linear_model.least_angle.solve_triangular_args",
+    "sklearn.linear_model.least_angle.string_types", "sklearn.linear_model.logistic.LogisticRegression",
+    "sklearn.linear_model.logistic.LogisticRegressionCV", "sklearn.linear_model.logistic.SCORERS",
+    "sklearn.linear_model.logistic._check_solver_option", "sklearn.linear_model.logistic._intercept_dot",
+    "sklearn.linear_model.logistic._log_reg_scoring_path", "sklearn.linear_model.logistic._logistic_grad_hess",
+    "sklearn.linear_model.logistic._logistic_loss", "sklearn.linear_model.logistic._logistic_loss_and_grad",
+    "sklearn.linear_model.logistic._multinomial_grad_hess", "sklearn.linear_model.logistic._multinomial_loss",
+    "sklearn.linear_model.logistic._multinomial_loss_grad", "sklearn.linear_model.logistic.expit",
+    "sklearn.linear_model.logistic.logistic_regression_path", "sklearn.linear_model.logistic_regression_path",
+    "sklearn.linear_model.omp.OrthogonalMatchingPursuit", "sklearn.linear_model.omp.OrthogonalMatchingPursuitCV",
+    "sklearn.linear_model.omp._cholesky_omp", "sklearn.linear_model.omp._gram_omp",
+    "sklearn.linear_model.omp._omp_path_residues", "sklearn.linear_model.omp.orthogonal_mp",
+    "sklearn.linear_model.omp.orthogonal_mp_gram", "sklearn.linear_model.omp.premature",
+    "sklearn.linear_model.omp.solve_triangular_args", "sklearn.linear_model.orthogonal_mp",
+    "sklearn.linear_model.orthogonal_mp_gram", "sklearn.linear_model.passive_aggressive.DEFAULT_EPSILON",
+    "sklearn.linear_model.passive_aggressive.PassiveAggressiveClassifier", "sklearn.linear_model.passive_aggressive.PassiveAggressiveRegressor",
+    "sklearn.linear_model.perceptron.Perceptron", "sklearn.linear_model.randomized_l1.BaseRandomizedLinearModel",
+    "sklearn.linear_model.randomized_l1.RandomizedLasso", "sklearn.linear_model.randomized_l1.RandomizedLogisticRegression",
+    "sklearn.linear_model.randomized_l1._lasso_stability_path", "sklearn.linear_model.randomized_l1._randomized_lasso",
+    "sklearn.linear_model.randomized_l1._randomized_logistic", "sklearn.linear_model.randomized_l1._resample_model",
+    "sklearn.linear_model.randomized_l1.lasso_stability_path", "sklearn.linear_model.ransac.RANSACRegressor",
+    "sklearn.linear_model.ransac._EPSILON", "sklearn.linear_model.ransac._dynamic_max_trials",
+    "sklearn.linear_model.ridge.Ridge", "sklearn.linear_model.ridge.RidgeCV",
+    "sklearn.linear_model.ridge.RidgeClassifier", "sklearn.linear_model.ridge.RidgeClassifierCV",
+    "sklearn.linear_model.ridge._BaseRidge", "sklearn.linear_model.ridge._BaseRidgeCV",
+    "sklearn.linear_model.ridge._RidgeGCV", "sklearn.linear_model.ridge._solve_cholesky",
+    "sklearn.linear_model.ridge._solve_cholesky_kernel", "sklearn.linear_model.ridge._solve_lsqr",
+    "sklearn.linear_model.ridge._solve_sparse_cg", "sklearn.linear_model.ridge._solve_svd",
+    "sklearn.linear_model.ridge.ridge_regression", "sklearn.linear_model.ridge_regression",
+    "sklearn.linear_model.sag.get_auto_step_size", "sklearn.linear_model.sag.sag",
+    "sklearn.linear_model.sag.sag_solver", "sklearn.linear_model.sag_fast.MultinomialLogLoss",
+    "sklearn.linear_model.sag_fast._multinomial_grad_loss_all_samples", "sklearn.linear_model.sag_fast.sag",
+    "sklearn.linear_model.sgd_fast.Classification", "sklearn.linear_model.sgd_fast.EpsilonInsensitive",
+    "sklearn.linear_model.sgd_fast.Hinge", "sklearn.linear_model.sgd_fast.Huber",
+    "sklearn.linear_model.sgd_fast.Log", "sklearn.linear_model.sgd_fast.LossFunction",
+    "sklearn.linear_model.sgd_fast.ModifiedHuber", "sklearn.linear_model.sgd_fast.Regression",
+    "sklearn.linear_model.sgd_fast.SquaredEpsilonInsensitive", "sklearn.linear_model.sgd_fast.SquaredHinge",
+    "sklearn.linear_model.sgd_fast.SquaredLoss", "sklearn.linear_model.sgd_fast._plain_sgd",
+    "sklearn.linear_model.sgd_fast.average_sgd", "sklearn.linear_model.sgd_fast.plain_sgd",
+    "sklearn.linear_model.stochastic_gradient.BaseSGD", "sklearn.linear_model.stochastic_gradient.BaseSGDClassifier",
+    "sklearn.linear_model.stochastic_gradient.BaseSGDRegressor", "sklearn.linear_model.stochastic_gradient.DEFAULT_EPSILON",
+    "sklearn.linear_model.stochastic_gradient.LEARNING_RATE_TYPES", "sklearn.linear_model.stochastic_gradient.PENALTY_TYPES",
+    "sklearn.linear_model.stochastic_gradient.SGDClassifier", "sklearn.linear_model.stochastic_gradient.SGDRegressor",
+    "sklearn.linear_model.stochastic_gradient._prepare_fit_binary", "sklearn.linear_model.stochastic_gradient.fit_binary",
+    "sklearn.linear_model.theil_sen.TheilSenRegressor", "sklearn.linear_model.theil_sen._EPSILON",
+    "sklearn.linear_model.theil_sen._breakdown_point", "sklearn.linear_model.theil_sen._lstsq",
+    "sklearn.linear_model.theil_sen._modified_weiszfeld_step", "sklearn.linear_model.theil_sen._spatial_median",
+    "sklearn.linear_model.theil_sen.binom", "sklearn.manifold.Isomap",
+    "sklearn.manifold.LocallyLinearEmbedding", "sklearn.manifold.MDS",
+    "sklearn.manifold.SpectralEmbedding", "sklearn.manifold.TSNE",
+    "sklearn.manifold._barnes_hut_tsne.gradient", "sklearn.manifold._utils._binary_search_perplexity",
+    "sklearn.manifold.isomap.Isomap", "sklearn.manifold.locally_linear.FLOAT_DTYPES",
+    "sklearn.manifold.locally_linear.LocallyLinearEmbedding", "sklearn.manifold.locally_linear.barycenter_kneighbors_graph",
+    "sklearn.manifold.locally_linear.barycenter_weights", "sklearn.manifold.locally_linear.locally_linear_embedding",
+    "sklearn.manifold.locally_linear.null_space", "sklearn.manifold.locally_linear_embedding",
+    "sklearn.manifold.mds.MDS", "sklearn.manifold.mds._smacof_single",
+    "sklearn.manifold.mds.smacof", "sklearn.manifold.smacof",
+    "sklearn.manifold.spectral_embedding", "sklearn.manifold.spectral_embedding_.SpectralEmbedding",
+    "sklearn.manifold.spectral_embedding_._graph_connected_component", "sklearn.manifold.spectral_embedding_._graph_is_connected",
+    "sklearn.manifold.spectral_embedding_._set_diag", "sklearn.manifold.spectral_embedding_.spectral_embedding",
+    "sklearn.manifold.t_sne.MACHINE_EPSILON", "sklearn.manifold.t_sne.TSNE",
+    "sklearn.manifold.t_sne._gradient_descent", "sklearn.manifold.t_sne._joint_probabilities",
+    "sklearn.manifold.t_sne._joint_probabilities_nn", "sklearn.manifold.t_sne._kl_divergence",
+    "sklearn.manifold.t_sne._kl_divergence_bh", "sklearn.manifold.t_sne.string_types",
+    "sklearn.manifold.t_sne.trustworthiness", "sklearn.metrics.SCORERS",
+    "sklearn.metrics.accuracy_score", "sklearn.metrics.adjusted_mutual_info_score",
+    "sklearn.metrics.adjusted_rand_score", "sklearn.metrics.auc",
+    "sklearn.metrics.average_precision_score", "sklearn.metrics.base._average_binary_score",
+    "sklearn.metrics.brier_score_loss", "sklearn.metrics.calinski_harabaz_score",
+    "sklearn.metrics.classification._check_binary_probabilistic_predictions", "sklearn.metrics.classification._check_targets",
+    "sklearn.metrics.classification._prf_divide", "sklearn.metrics.classification._weighted_sum",
+    "sklearn.metrics.classification.accuracy_score", "sklearn.metrics.classification.brier_score_loss",
+    "sklearn.metrics.classification.classification_report", "sklearn.metrics.classification.cohen_kappa_score",
+    "sklearn.metrics.classification.confusion_matrix", "sklearn.metrics.classification.f1_score",
+    "sklearn.metrics.classification.fbeta_score", "sklearn.metrics.classification.hamming_loss",
+    "sklearn.metrics.classification.hinge_loss", "sklearn.metrics.classification.jaccard_similarity_score",
+    "sklearn.metrics.classification.log_loss", "sklearn.metrics.classification.matthews_corrcoef",
+    "sklearn.metrics.classification.precision_recall_fscore_support", "sklearn.metrics.classification.precision_score",
+    "sklearn.metrics.classification.recall_score", "sklearn.metrics.classification.zero_one_loss",
+    "sklearn.metrics.classification_report", "sklearn.metrics.cluster.adjusted_mutual_info_score",
+    "sklearn.metrics.cluster.adjusted_rand_score", "sklearn.metrics.cluster.bicluster._check_rows_and_columns",
+    "sklearn.metrics.cluster.bicluster._jaccard", "sklearn.metrics.cluster.bicluster._pairwise_similarity",
+    "sklearn.metrics.cluster.bicluster.consensus_score", "sklearn.metrics.cluster.calinski_harabaz_score",
+    "sklearn.metrics.cluster.completeness_score", "sklearn.metrics.cluster.consensus_score",
+    "sklearn.metrics.cluster.contingency_matrix", "sklearn.metrics.cluster.entropy",
+    "sklearn.metrics.cluster.expected_mutual_info_fast.expected_mutual_information", "sklearn.metrics.cluster.expected_mutual_info_fast.gammaln",
+    "sklearn.metrics.cluster.expected_mutual_information", "sklearn.metrics.cluster.fowlkes_mallows_score",
+    "sklearn.metrics.cluster.homogeneity_completeness_v_measure", "sklearn.metrics.cluster.homogeneity_score",
+    "sklearn.metrics.cluster.mutual_info_score", "sklearn.metrics.cluster.normalized_mutual_info_score",
+    "sklearn.metrics.cluster.silhouette_samples", "sklearn.metrics.cluster.silhouette_score",
+    "sklearn.metrics.cluster.supervised.adjusted_mutual_info_score", "sklearn.metrics.cluster.supervised.adjusted_rand_score",
+    "sklearn.metrics.cluster.supervised.check_clusterings", "sklearn.metrics.cluster.supervised.comb2",
+    "sklearn.metrics.cluster.supervised.completeness_score", "sklearn.metrics.cluster.supervised.contingency_matrix",
+    "sklearn.metrics.cluster.supervised.entropy", "sklearn.metrics.cluster.supervised.fowlkes_mallows_score",
+    "sklearn.metrics.cluster.supervised.homogeneity_completeness_v_measure", "sklearn.metrics.cluster.supervised.homogeneity_score",
+    "sklearn.metrics.cluster.supervised.mutual_info_score", "sklearn.metrics.cluster.supervised.normalized_mutual_info_score",
+    "sklearn.metrics.cluster.supervised.v_measure_score", "sklearn.metrics.cluster.unsupervised.calinski_harabaz_score",
+    "sklearn.metrics.cluster.unsupervised.check_number_of_labels", "sklearn.metrics.cluster.unsupervised.silhouette_samples",
+    "sklearn.metrics.cluster.unsupervised.silhouette_score", "sklearn.metrics.cluster.v_measure_score",
+    "sklearn.metrics.cohen_kappa_score", "sklearn.metrics.completeness_score",
+    "sklearn.metrics.confusion_matrix", "sklearn.metrics.consensus_score",
+    "sklearn.metrics.coverage_error", "sklearn.metrics.euclidean_distances",
+    "sklearn.metrics.explained_variance_score", "sklearn.metrics.f1_score",
+    "sklearn.metrics.fbeta_score", "sklearn.metrics.fowlkes_mallows_score",
+    "sklearn.metrics.get_scorer", "sklearn.metrics.hamming_loss",
+    "sklearn.metrics.hinge_loss", "sklearn.metrics.homogeneity_completeness_v_measure",
+    "sklearn.metrics.homogeneity_score", "sklearn.metrics.jaccard_similarity_score",
+    "sklearn.metrics.label_ranking_average_precision_score", "sklearn.metrics.label_ranking_loss",
+    "sklearn.metrics.log_loss", "sklearn.metrics.make_scorer",
+    "sklearn.metrics.matthews_corrcoef", "sklearn.metrics.mean_absolute_error",
+    "sklearn.metrics.mean_squared_error", "sklearn.metrics.mean_squared_log_error",
+    "sklearn.metrics.median_absolute_error", "sklearn.metrics.mutual_info_score",
+    "sklearn.metrics.normalized_mutual_info_score", "sklearn.metrics.pairwise.KERNEL_PARAMS",
+    "sklearn.metrics.pairwise.PAIRED_DISTANCES", "sklearn.metrics.pairwise.PAIRWISE_BOOLEAN_FUNCTIONS",
+    "sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS", "sklearn.metrics.pairwise.PAIRWISE_KERNEL_FUNCTIONS",
+    "sklearn.metrics.pairwise._VALID_METRICS", "sklearn.metrics.pairwise._chi2_kernel_fast",
+    "sklearn.metrics.pairwise._pairwise_callable", "sklearn.metrics.pairwise._parallel_pairwise",
+    "sklearn.metrics.pairwise._return_float_dtype", "sklearn.metrics.pairwise._sparse_manhattan",
+    "sklearn.metrics.pairwise.additive_chi2_kernel", "sklearn.metrics.pairwise.check_paired_arrays",
+    "sklearn.metrics.pairwise.check_pairwise_arrays", "sklearn.metrics.pairwise.chi2_kernel",
+    "sklearn.metrics.pairwise.cosine_distances", "sklearn.metrics.pairwise.cosine_similarity",
+    "sklearn.metrics.pairwise.distance_metrics", "sklearn.metrics.pairwise.euclidean_distances",
+    "sklearn.metrics.pairwise.kernel_metrics", "sklearn.metrics.pairwise.laplacian_kernel",
+    "sklearn.metrics.pairwise.linear_kernel", "sklearn.metrics.pairwise.manhattan_distances",
+    "sklearn.metrics.pairwise.paired_cosine_distances", "sklearn.metrics.pairwise.paired_distances",
+    "sklearn.metrics.pairwise.paired_euclidean_distances", "sklearn.metrics.pairwise.paired_manhattan_distances",
+    "sklearn.metrics.pairwise.pairwise_distances", "sklearn.metrics.pairwise.pairwise_distances_argmin",
+    "sklearn.metrics.pairwise.pairwise_distances_argmin_min", "sklearn.metrics.pairwise.pairwise_kernels",
+    "sklearn.metrics.pairwise.polynomial_kernel", "sklearn.metrics.pairwise.rbf_kernel",
+    "sklearn.metrics.pairwise.sigmoid_kernel", "sklearn.metrics.pairwise_distances",
+    "sklearn.metrics.pairwise_distances_argmin", "sklearn.metrics.pairwise_distances_argmin_min",
+    "sklearn.metrics.pairwise_fast._chi2_kernel_fast", "sklearn.metrics.pairwise_fast._sparse_manhattan",
+    "sklearn.metrics.pairwise_kernels", "sklearn.metrics.precision_recall_curve",
+    "sklearn.metrics.precision_recall_fscore_support", "sklearn.metrics.precision_score",
+    "sklearn.metrics.r2_score", "sklearn.metrics.ranking._binary_clf_curve",
+    "sklearn.metrics.ranking.auc", "sklearn.metrics.ranking.average_precision_score",
+    "sklearn.metrics.ranking.coverage_error", "sklearn.metrics.ranking.label_ranking_average_precision_score",
+    "sklearn.metrics.ranking.label_ranking_loss", "sklearn.metrics.ranking.precision_recall_curve",
+    "sklearn.metrics.ranking.roc_auc_score", "sklearn.metrics.ranking.roc_curve",
+    "sklearn.metrics.recall_score", "sklearn.metrics.regression._check_reg_targets",
+    "sklearn.metrics.regression.explained_variance_score", "sklearn.metrics.regression.mean_absolute_error",
+    "sklearn.metrics.regression.mean_squared_error", "sklearn.metrics.regression.mean_squared_log_error",
+    "sklearn.metrics.regression.median_absolute_error", "sklearn.metrics.regression.r2_score",
+    "sklearn.metrics.regression.string_types", "sklearn.metrics.roc_auc_score",
+    "sklearn.metrics.roc_curve", "sklearn.metrics.scorer.SCORERS",
+    "sklearn.metrics.scorer._BaseScorer", "sklearn.metrics.scorer._PredictScorer",
+    "sklearn.metrics.scorer._ProbaScorer", "sklearn.metrics.scorer._ThresholdScorer",
+    "sklearn.metrics.scorer._check_multimetric_scoring", "sklearn.metrics.scorer._passthrough_scorer",
+    "sklearn.metrics.scorer.accuracy_scorer", "sklearn.metrics.scorer.adjusted_mutual_info_scorer",
+    "sklearn.metrics.scorer.adjusted_rand_scorer", "sklearn.metrics.scorer.average",
+    "sklearn.metrics.scorer.average_precision_scorer", "sklearn.metrics.scorer.check_scoring",
+    "sklearn.metrics.scorer.completeness_scorer", "sklearn.metrics.scorer.deprecation_msg",
+    "sklearn.metrics.scorer.explained_variance_scorer", "sklearn.metrics.scorer.f1_scorer",
+    "sklearn.metrics.scorer.fowlkes_mallows_scorer", "sklearn.metrics.scorer.get_scorer",
+    "sklearn.metrics.scorer.homogeneity_scorer", "sklearn.metrics.scorer.log_loss_scorer",
+    "sklearn.metrics.scorer.make_scorer", "sklearn.metrics.scorer.mean_absolute_error_scorer",
+    "sklearn.metrics.scorer.mean_squared_error_scorer", "sklearn.metrics.scorer.median_absolute_error_scorer",
+    "sklearn.metrics.scorer.mutual_info_scorer", "sklearn.metrics.scorer.name",
+    "sklearn.metrics.scorer.neg_log_loss_scorer", "sklearn.metrics.scorer.neg_mean_absolute_error_scorer",
+    "sklearn.metrics.scorer.neg_mean_squared_error_scorer", "sklearn.metrics.scorer.neg_mean_squared_log_error_scorer",
+    "sklearn.metrics.scorer.neg_median_absolute_error_scorer", "sklearn.metrics.scorer.normalized_mutual_info_scorer",
+    "sklearn.metrics.scorer.precision_scorer", "sklearn.metrics.scorer.qualified_name",
+    "sklearn.metrics.scorer.r2_scorer", "sklearn.metrics.scorer.recall_scorer",
+    "sklearn.metrics.scorer.roc_auc_scorer", "sklearn.metrics.scorer.v_measure_scorer",
+    "sklearn.metrics.silhouette_samples", "sklearn.metrics.silhouette_score",
+    "sklearn.metrics.v_measure_score", "sklearn.metrics.zero_one_loss",
+    "sklearn.model_selection.BaseCrossValidator", "sklearn.model_selection.GridSearchCV",
+    "sklearn.model_selection.GroupKFold", "sklearn.model_selection.GroupShuffleSplit",
+    "sklearn.model_selection.KFold", "sklearn.model_selection.LeaveOneGroupOut",
+    "sklearn.model_selection.LeaveOneOut", "sklearn.model_selection.LeavePGroupsOut",
+    "sklearn.model_selection.LeavePOut", "sklearn.model_selection.ParameterGrid",
+    "sklearn.model_selection.ParameterSampler", "sklearn.model_selection.PredefinedSplit",
+    "sklearn.model_selection.RandomizedSearchCV", "sklearn.model_selection.RepeatedKFold",
+    "sklearn.model_selection.RepeatedStratifiedKFold", "sklearn.model_selection.ShuffleSplit",
+    "sklearn.model_selection.StratifiedKFold", "sklearn.model_selection.StratifiedShuffleSplit",
+    "sklearn.model_selection.TimeSeriesSplit", "sklearn.model_selection._search.BaseSearchCV",
+    "sklearn.model_selection._search.GridSearchCV", "sklearn.model_selection._search.ParameterGrid",
+    "sklearn.model_selection._search.ParameterSampler", "sklearn.model_selection._search.RandomizedSearchCV",
+    "sklearn.model_selection._search._CVScoreTuple", "sklearn.model_selection._search._check_param_grid",
+    "sklearn.model_selection._search.fit_grid_point", "sklearn.model_selection._search.sp_version",
+    "sklearn.model_selection._split.BaseCrossValidator", "sklearn.model_selection._split.BaseShuffleSplit",
+    "sklearn.model_selection._split.GroupKFold", "sklearn.model_selection._split.GroupShuffleSplit",
+    "sklearn.model_selection._split.KFold", "sklearn.model_selection._split.LeaveOneGroupOut",
+    "sklearn.model_selection._split.LeaveOneOut", "sklearn.model_selection._split.LeavePGroupsOut",
+    "sklearn.model_selection._split.LeavePOut", "sklearn.model_selection._split.PredefinedSplit",
+    "sklearn.model_selection._split.RepeatedKFold", "sklearn.model_selection._split.RepeatedStratifiedKFold",
+    "sklearn.model_selection._split.ShuffleSplit", "sklearn.model_selection._split.StratifiedKFold",
+    "sklearn.model_selection._split.StratifiedShuffleSplit", "sklearn.model_selection._split.TimeSeriesSplit",
+    "sklearn.model_selection._split._BaseKFold", "sklearn.model_selection._split._CVIterableWrapper",
+    "sklearn.model_selection._split._RepeatedSplits", "sklearn.model_selection._split._approximate_mode",
+    "sklearn.model_selection._split._build_repr", "sklearn.model_selection._split._validate_shuffle_split",
+    "sklearn.model_selection._split._validate_shuffle_split_init", "sklearn.model_selection._split.check_cv",
+    "sklearn.model_selection._split.train_test_split", "sklearn.model_selection._validation._aggregate_score_dicts",
+    "sklearn.model_selection._validation._check_is_permutation", "sklearn.model_selection._validation._fit_and_predict",
+    "sklearn.model_selection._validation._fit_and_score", "sklearn.model_selection._validation._incremental_fit_estimator",
+    "sklearn.model_selection._validation._index_param_value", "sklearn.model_selection._validation._multimetric_score",
+    "sklearn.model_selection._validation._permutation_test_score", "sklearn.model_selection._validation._score",
+    "sklearn.model_selection._validation._shuffle", "sklearn.model_selection._validation._translate_train_sizes",
+    "sklearn.model_selection._validation.cross_val_predict", "sklearn.model_selection._validation.cross_val_score",
+    "sklearn.model_selection._validation.cross_validate", "sklearn.model_selection._validation.learning_curve",
+    "sklearn.model_selection._validation.permutation_test_score", "sklearn.model_selection._validation.validation_curve",
+    "sklearn.model_selection.check_cv", "sklearn.model_selection.cross_val_predict",
+    "sklearn.model_selection.cross_val_score", "sklearn.model_selection.cross_validate",
+    "sklearn.model_selection.fit_grid_point", "sklearn.model_selection.learning_curve",
+    "sklearn.model_selection.permutation_test_score", "sklearn.model_selection.train_test_split",
+    "sklearn.model_selection.validation_curve", "sklearn.multiclass.OneVsOneClassifier",
+    "sklearn.multiclass.OneVsRestClassifier", "sklearn.multiclass.OutputCodeClassifier",
+    "sklearn.multiclass._ConstantPredictor", "sklearn.multiclass._check_estimator",
+    "sklearn.multiclass._fit_binary", "sklearn.multiclass._fit_ovo_binary",
+    "sklearn.multiclass._partial_fit_binary", "sklearn.multiclass._partial_fit_ovo_binary",
+    "sklearn.multiclass._predict_binary", "sklearn.naive_bayes.BaseDiscreteNB",
+    "sklearn.naive_bayes.BaseNB", "sklearn.naive_bayes.BernoulliNB",
+    "sklearn.naive_bayes.GaussianNB", "sklearn.naive_bayes.MultinomialNB",
+    "sklearn.naive_bayes._ALPHA_MIN", "sklearn.neighbors.BallTree",
+    "sklearn.neighbors.DistanceMetric", "sklearn.neighbors.KDTree",
+    "sklearn.neighbors.KNeighborsClassifier", "sklearn.neighbors.KNeighborsRegressor",
+    "sklearn.neighbors.KernelDensity", "sklearn.neighbors.LSHForest",
+    "sklearn.neighbors.LocalOutlierFactor", "sklearn.neighbors.NearestCentroid",
+    "sklearn.neighbors.NearestNeighbors", "sklearn.neighbors.RadiusNeighborsClassifier",
+    "sklearn.neighbors.RadiusNeighborsRegressor", "sklearn.neighbors.approximate.GaussianRandomProjectionHash",
+    "sklearn.neighbors.approximate.HASH_DTYPE", "sklearn.neighbors.approximate.LSHForest",
+    "sklearn.neighbors.approximate.MAX_HASH_SIZE", "sklearn.neighbors.approximate.ProjectionToHashMixin",
+    "sklearn.neighbors.approximate._array_of_arrays", "sklearn.neighbors.approximate._find_longest_prefix_match",
+    "sklearn.neighbors.approximate._find_matching_indices", "sklearn.neighbors.ball_tree.BallTree",
+    "sklearn.neighbors.ball_tree.BinaryTree", "sklearn.neighbors.ball_tree.CLASS_DOC",
+    "sklearn.neighbors.ball_tree.DOC_DICT", "sklearn.neighbors.ball_tree.NeighborsHeap",
+    "sklearn.neighbors.ball_tree.NodeData", "sklearn.neighbors.ball_tree.NodeHeap",
+    "sklearn.neighbors.ball_tree.NodeHeapData", "sklearn.neighbors.ball_tree.VALID_METRICS",
+    "sklearn.neighbors.ball_tree.VALID_METRIC_IDS", "sklearn.neighbors.ball_tree.kernel_norm",
+    "sklearn.neighbors.ball_tree.load_heap", "sklearn.neighbors.ball_tree.newObj",
+    "sklearn.neighbors.ball_tree.nodeheap_sort", "sklearn.neighbors.ball_tree.offsets",
+    "sklearn.neighbors.ball_tree.simultaneous_sort", "sklearn.neighbors.base.KNeighborsMixin",
+    "sklearn.neighbors.base.NeighborsBase", "sklearn.neighbors.base.PAIRWISE_DISTANCE_FUNCTIONS",
+    "sklearn.neighbors.base.RadiusNeighborsMixin", "sklearn.neighbors.base.SupervisedFloatMixin",
+    "sklearn.neighbors.base.SupervisedIntegerMixin", "sklearn.neighbors.base.UnsupervisedMixin",
+    "sklearn.neighbors.base.VALID_METRICS", "sklearn.neighbors.base.VALID_METRICS_SPARSE",
+    "sklearn.neighbors.base._check_weights", "sklearn.neighbors.base._get_weights",
+    "sklearn.neighbors.classification.KNeighborsClassifier", "sklearn.neighbors.classification.RadiusNeighborsClassifier",
+    "sklearn.neighbors.dist_metrics.BrayCurtisDistance", "sklearn.neighbors.dist_metrics.CanberraDistance",
+    "sklearn.neighbors.dist_metrics.ChebyshevDistance", "sklearn.neighbors.dist_metrics.DiceDistance",
+    "sklearn.neighbors.dist_metrics.DistanceMetric", "sklearn.neighbors.dist_metrics.EuclideanDistance",
+    "sklearn.neighbors.dist_metrics.HammingDistance", "sklearn.neighbors.dist_metrics.HaversineDistance",
+    "sklearn.neighbors.dist_metrics.JaccardDistance", "sklearn.neighbors.dist_metrics.KulsinskiDistance",
+    "sklearn.neighbors.dist_metrics.METRIC_MAPPING", "sklearn.neighbors.dist_metrics.MahalanobisDistance",
+    "sklearn.neighbors.dist_metrics.ManhattanDistance", "sklearn.neighbors.dist_metrics.MatchingDistance",
+    "sklearn.neighbors.dist_metrics.MinkowskiDistance", "sklearn.neighbors.dist_metrics.PyFuncDistance",
+    "sklearn.neighbors.dist_metrics.RogersTanimotoDistance", "sklearn.neighbors.dist_metrics.RussellRaoDistance",
+    "sklearn.neighbors.dist_metrics.SEuclideanDistance", "sklearn.neighbors.dist_metrics.SokalMichenerDistance",
+    "sklearn.neighbors.dist_metrics.SokalSneathDistance", "sklearn.neighbors.dist_metrics.WMinkowskiDistance",
+    "sklearn.neighbors.dist_metrics.get_valid_metric_ids", "sklearn.neighbors.dist_metrics.newObj",
+    "sklearn.neighbors.graph._check_params", "sklearn.neighbors.graph._query_include_self",
+    "sklearn.neighbors.graph.kneighbors_graph", "sklearn.neighbors.graph.radius_neighbors_graph",
+    "sklearn.neighbors.kd_tree.BinaryTree", "sklearn.neighbors.kd_tree.CLASS_DOC",
+    "sklearn.neighbors.kd_tree.DOC_DICT", "sklearn.neighbors.kd_tree.KDTree",
+    "sklearn.neighbors.kd_tree.NeighborsHeap", "sklearn.neighbors.kd_tree.NodeData",
+    "sklearn.neighbors.kd_tree.NodeHeap", "sklearn.neighbors.kd_tree.NodeHeapData",
+    "sklearn.neighbors.kd_tree.VALID_METRICS", "sklearn.neighbors.kd_tree.VALID_METRIC_IDS",
+    "sklearn.neighbors.kd_tree.kernel_norm", "sklearn.neighbors.kd_tree.load_heap",
+    "sklearn.neighbors.kd_tree.newObj", "sklearn.neighbors.kd_tree.nodeheap_sort",
+    "sklearn.neighbors.kd_tree.offsets", "sklearn.neighbors.kd_tree.simultaneous_sort",
+    "sklearn.neighbors.kde.KernelDensity", "sklearn.neighbors.kde.TREE_DICT",
+    "sklearn.neighbors.kde.VALID_KERNELS", "sklearn.neighbors.kde.gammainc",
+    "sklearn.neighbors.kneighbors_graph", "sklearn.neighbors.lof.LocalOutlierFactor",
+    "sklearn.neighbors.nearest_centroid.NearestCentroid", "sklearn.neighbors.quad_tree.CELL_DTYPE",
+    "sklearn.neighbors.quad_tree._QuadTree", "sklearn.neighbors.radius_neighbors_graph",
+    "sklearn.neighbors.regression.KNeighborsRegressor", "sklearn.neighbors.regression.RadiusNeighborsRegressor",
+    "sklearn.neighbors.unsupervised.NearestNeighbors", "sklearn.pipeline.FeatureUnion",
+    "sklearn.pipeline.Pipeline", "sklearn.pipeline._fit_one_transformer",
+    "sklearn.pipeline._fit_transform_one", "sklearn.pipeline._name_estimators",
+    "sklearn.pipeline._transform_one", "sklearn.pipeline.make_pipeline",
+    "sklearn.pipeline.make_union", "sklearn.preprocessing.Binarizer",
+    "sklearn.preprocessing.FunctionTransformer", "sklearn.preprocessing.Imputer",
+    "sklearn.preprocessing.KernelCenterer", "sklearn.preprocessing.LabelBinarizer",
+    "sklearn.preprocessing.LabelEncoder", "sklearn.preprocessing.MaxAbsScaler",
+    "sklearn.preprocessing.MinMaxScaler", "sklearn.preprocessing.MultiLabelBinarizer",
+    "sklearn.preprocessing.Normalizer", "sklearn.preprocessing.OneHotEncoder",
+    "sklearn.preprocessing.PolynomialFeatures", "sklearn.preprocessing.QuantileTransformer",
+    "sklearn.preprocessing.RobustScaler", "sklearn.preprocessing.StandardScaler",
+    "sklearn.preprocessing._function_transformer.FunctionTransformer", "sklearn.preprocessing._function_transformer._identity",
+    "sklearn.preprocessing._function_transformer.string_types", "sklearn.preprocessing.add_dummy_feature",
+    "sklearn.preprocessing.binarize", "sklearn.preprocessing.data.BOUNDS_THRESHOLD",
+    "sklearn.preprocessing.data.Binarizer", "sklearn.preprocessing.data.FLOAT_DTYPES",
+    "sklearn.preprocessing.data.KernelCenterer", "sklearn.preprocessing.data.MaxAbsScaler",
+    "sklearn.preprocessing.data.MinMaxScaler", "sklearn.preprocessing.data.Normalizer",
+    "sklearn.preprocessing.data.OneHotEncoder", "sklearn.preprocessing.data.PolynomialFeatures",
+    "sklearn.preprocessing.data.QuantileTransformer", "sklearn.preprocessing.data.RobustScaler",
+    "sklearn.preprocessing.data.StandardScaler", "sklearn.preprocessing.data._handle_zeros_in_scale",
+    "sklearn.preprocessing.data._transform_selected", "sklearn.preprocessing.data.add_dummy_feature",
+    "sklearn.preprocessing.data.binarize", "sklearn.preprocessing.data.maxabs_scale",
+    "sklearn.preprocessing.data.minmax_scale", "sklearn.preprocessing.data.normalize",
+    "sklearn.preprocessing.data.quantile_transform", "sklearn.preprocessing.data.robust_scale",
+    "sklearn.preprocessing.data.scale", "sklearn.preprocessing.data.string_types",
+    "sklearn.preprocessing.imputation.FLOAT_DTYPES", "sklearn.preprocessing.imputation.Imputer",
+    "sklearn.preprocessing.imputation._get_mask", "sklearn.preprocessing.imputation._most_frequent",
+    "sklearn.preprocessing.label.LabelBinarizer", "sklearn.preprocessing.label.LabelEncoder",
+    "sklearn.preprocessing.label.MultiLabelBinarizer", "sklearn.preprocessing.label._inverse_binarize_multiclass",
+    "sklearn.preprocessing.label._inverse_binarize_thresholding", "sklearn.preprocessing.label.label_binarize",
+    "sklearn.preprocessing.label_binarize", "sklearn.preprocessing.maxabs_scale",
+    "sklearn.preprocessing.minmax_scale", "sklearn.preprocessing.normalize",
+    "sklearn.preprocessing.quantile_transform", "sklearn.preprocessing.robust_scale",
+    "sklearn.preprocessing.scale", "sklearn.random_projection.BaseRandomProjection",
+    "sklearn.random_projection.GaussianRandomProjection", "sklearn.random_projection.SparseRandomProjection",
+    "sklearn.random_projection._check_density", "sklearn.random_projection._check_input_size",
+    "sklearn.random_projection.gaussian_random_matrix", "sklearn.random_projection.johnson_lindenstrauss_min_dim",
+    "sklearn.random_projection.sparse_random_matrix", "sklearn.set_config",
+    "sklearn.setup_module", "sklearn.svm.LinearSVC",
+    "sklearn.svm.LinearSVR", "sklearn.svm.NuSVC",
+    "sklearn.svm.NuSVR", "sklearn.svm.OneClassSVM",
+    "sklearn.svm.SVC", "sklearn.svm.SVR",
+    "sklearn.svm.base.BaseLibSVM", "sklearn.svm.base.BaseSVC",
+    "sklearn.svm.base.LIBSVM_IMPL", "sklearn.svm.base._fit_liblinear",
+    "sklearn.svm.base._get_liblinear_solver_type", "sklearn.svm.base._one_vs_one_coef",
+    "sklearn.svm.bounds.l1_min_c", "sklearn.svm.classes.LinearSVC",
+    "sklearn.svm.classes.LinearSVR", "sklearn.svm.classes.NuSVC",
+    "sklearn.svm.classes.NuSVR", "sklearn.svm.classes.OneClassSVM",
+    "sklearn.svm.classes.SVC", "sklearn.svm.classes.SVR",
+    "sklearn.svm.l1_min_c", "sklearn.svm.liblinear.set_verbosity_wrap",
+    "sklearn.svm.liblinear.train_wrap", "sklearn.svm.libsvm.LIBSVM_KERNEL_TYPES",
+    "sklearn.svm.libsvm.cross_validation", "sklearn.svm.libsvm.decision_function",
+    "sklearn.svm.libsvm.fit", "sklearn.svm.libsvm.predict",
+    "sklearn.svm.libsvm.predict_proba", "sklearn.svm.libsvm.set_verbosity_wrap",
+    "sklearn.svm.libsvm_sparse.libsvm_sparse_decision_function", "sklearn.svm.libsvm_sparse.libsvm_sparse_predict",
+    "sklearn.svm.libsvm_sparse.libsvm_sparse_predict_proba", "sklearn.svm.libsvm_sparse.libsvm_sparse_train",
+    "sklearn.svm.libsvm_sparse.set_verbosity_wrap", "sklearn.tree.DecisionTreeClassifier",
+    "sklearn.tree.DecisionTreeRegressor", "sklearn.tree.ExtraTreeClassifier",
+    "sklearn.tree.ExtraTreeRegressor", "sklearn.tree._criterion.ClassificationCriterion",
+    "sklearn.tree._criterion.Criterion", "sklearn.tree._criterion.Entropy",
+    "sklearn.tree._criterion.FriedmanMSE", "sklearn.tree._criterion.Gini",
+    "sklearn.tree._criterion.MAE", "sklearn.tree._criterion.MSE",
+    "sklearn.tree._criterion.RegressionCriterion", "sklearn.tree._splitter.BaseDenseSplitter",
+    "sklearn.tree._splitter.BaseSparseSplitter", "sklearn.tree._splitter.BestSparseSplitter",
+    "sklearn.tree._splitter.BestSplitter", "sklearn.tree._splitter.RandomSparseSplitter",
+    "sklearn.tree._splitter.RandomSplitter", "sklearn.tree._splitter.Splitter",
+    "sklearn.tree._tree.BestFirstTreeBuilder", "sklearn.tree._tree.DepthFirstTreeBuilder",
+    "sklearn.tree._tree.NODE_DTYPE", "sklearn.tree._tree.TREE_LEAF",
+    "sklearn.tree._tree.TREE_UNDEFINED", "sklearn.tree._tree.Tree",
+    "sklearn.tree._tree.TreeBuilder", "sklearn.tree._utils.PriorityHeap",
+    "sklearn.tree._utils.Stack", "sklearn.tree._utils.WeightedMedianCalculator",
+    "sklearn.tree._utils.WeightedPQueue", "sklearn.tree._utils._realloc_test",
+    "sklearn.tree.export.SENTINEL", "sklearn.tree.export.Sentinel",
+    "sklearn.tree.export._color_brew", "sklearn.tree.export.export_graphviz",
+    "sklearn.tree.export_graphviz", "sklearn.tree.tree.BaseDecisionTree",
+    "sklearn.tree.tree.CRITERIA_CLF", "sklearn.tree.tree.CRITERIA_REG",
+    "sklearn.tree.tree.DENSE_SPLITTERS", "sklearn.tree.tree.DecisionTreeClassifier",
+    "sklearn.tree.tree.DecisionTreeRegressor", "sklearn.tree.tree.ExtraTreeClassifier",
+    "sklearn.tree.tree.ExtraTreeRegressor", "sklearn.tree.tree.SPARSE_SPLITTERS",
+    "sklearn.utils.Bunch", "sklearn.utils._get_n_jobs",
+    "sklearn.utils._logistic_sigmoid._log_logistic_sigmoid", "sklearn.utils._random._sample_without_replacement_check_input",
+    "sklearn.utils._random._sample_without_replacement_with_pool", "sklearn.utils._random._sample_without_replacement_with_reservoir_sampling",
+    "sklearn.utils._random._sample_without_replacement_with_tracking_selection", "sklearn.utils._random.sample_without_replacement",
+    "sklearn.utils.arrayfuncs.cholesky_delete", "sklearn.utils.arrayfuncs.min_pos",
+    "sklearn.utils.as_float_array", "sklearn.utils.assert_all_finite",
+    "sklearn.utils.axis0_safe_slice", "sklearn.utils.check_X_y",
+    "sklearn.utils.check_array", "sklearn.utils.check_consistent_length",
+    "sklearn.utils.check_random_state", "sklearn.utils.check_symmetric",
+    "sklearn.utils.class_weight.compute_class_weight", "sklearn.utils.class_weight.compute_sample_weight",
+    "sklearn.utils.column_or_1d", "sklearn.utils.compute_class_weight",
+    "sklearn.utils.compute_sample_weight", "sklearn.utils.deprecated",
+    "sklearn.utils.deprecation.DeprecationDict", "sklearn.utils.deprecation._is_deprecated",
+    "sklearn.utils.deprecation.deprecated", "sklearn.utils.extmath._deterministic_vector_sign_flip",
+    "sklearn.utils.extmath._impose_f_order", "sklearn.utils.extmath._incremental_mean_and_var",
+    "sklearn.utils.extmath.cartesian", "sklearn.utils.extmath.density",
+    "sklearn.utils.extmath.fast_dot", "sklearn.utils.extmath.fast_logdet",
+    "sklearn.utils.extmath.log_logistic", "sklearn.utils.extmath.logsumexp",
+    "sklearn.utils.extmath.make_nonnegative", "sklearn.utils.extmath.norm",
+    "sklearn.utils.extmath.np_version", "sklearn.utils.extmath.pinvh",
+    "sklearn.utils.extmath.randomized_range_finder", "sklearn.utils.extmath.randomized_svd",
+    "sklearn.utils.extmath.row_norms", "sklearn.utils.extmath.safe_min",
+    "sklearn.utils.extmath.safe_sparse_dot", "sklearn.utils.extmath.softmax",
+    "sklearn.utils.extmath.squared_norm", "sklearn.utils.extmath.stable_cumsum",
+    "sklearn.utils.extmath.svd_flip", "sklearn.utils.extmath.weighted_mode",
+    "sklearn.utils.fast_dict.IntFloatDict", "sklearn.utils.fast_dict.argmin",
+    "sklearn.utils.fixes._parse_version", "sklearn.utils.fixes.divide",
+    "sklearn.utils.fixes.euler_gamma", "sklearn.utils.fixes.makedirs",
+    "sklearn.utils.fixes.np_version", "sklearn.utils.fixes.parallel_helper",
+    "sklearn.utils.fixes.sp_version", "sklearn.utils.fixes.sparse_min_max",
+    "sklearn.utils.gen_batches", "sklearn.utils.gen_even_slices",
+    "sklearn.utils.graph.connected_components", "sklearn.utils.graph.graph_laplacian",
+    "sklearn.utils.graph.graph_shortest_path", "sklearn.utils.graph.single_source_shortest_path_length",
+    "sklearn.utils.graph_shortest_path.graph_shortest_path", "sklearn.utils.indexable",
+    "sklearn.utils.indices_to_mask", "sklearn.utils.linear_assignment_._HungarianState",
+    "sklearn.utils.linear_assignment_._hungarian", "sklearn.utils.linear_assignment_._step1",
+    "sklearn.utils.linear_assignment_._step3", "sklearn.utils.linear_assignment_._step4",
+    "sklearn.utils.linear_assignment_._step5", "sklearn.utils.linear_assignment_._step6",
+    "sklearn.utils.linear_assignment_.linear_assignment", "sklearn.utils.metaestimators._BaseComposition",
+    "sklearn.utils.metaestimators._IffHasAttrDescriptor", "sklearn.utils.metaestimators._safe_split",
+    "sklearn.utils.metaestimators.if_delegate_has_method", "sklearn.utils.multiclass._FN_UNIQUE_LABELS",
+    "sklearn.utils.multiclass._check_partial_fit_first_call", "sklearn.utils.multiclass._is_integral_float",
+    "sklearn.utils.multiclass._ovr_decision_function", "sklearn.utils.multiclass._unique_indicator",
+    "sklearn.utils.multiclass._unique_multiclass", "sklearn.utils.multiclass.check_classification_targets",
+    "sklearn.utils.multiclass.class_distribution", "sklearn.utils.multiclass.is_multilabel",
+    "sklearn.utils.multiclass.string_types", "sklearn.utils.multiclass.type_of_target",
+    "sklearn.utils.multiclass.unique_labels", "sklearn.utils.murmurhash.murmurhash3_32",
+    "sklearn.utils.murmurhash.murmurhash3_bytes_array_s32", "sklearn.utils.murmurhash.murmurhash3_bytes_array_u32",
+    "sklearn.utils.murmurhash.murmurhash3_bytes_s32", "sklearn.utils.murmurhash.murmurhash3_bytes_u32",
+    "sklearn.utils.murmurhash.murmurhash3_int_s32", "sklearn.utils.murmurhash.murmurhash3_int_u32",
+    "sklearn.utils.murmurhash3_32", "sklearn.utils.optimize._LineSearchError",
+    "sklearn.utils.optimize._cg", "sklearn.utils.optimize._line_search_wolfe12",
+    "sklearn.utils.optimize.newton_cg", "sklearn.utils.random.choice",
+    "sklearn.utils.random.random_choice_csc", "sklearn.utils.resample",
+    "sklearn.utils.safe_indexing", "sklearn.utils.safe_mask",
+    "sklearn.utils.safe_sqr", "sklearn.utils.seq_dataset.ArrayDataset",
+    "sklearn.utils.seq_dataset.CSRDataset", "sklearn.utils.seq_dataset.SequentialDataset",
+    "sklearn.utils.shuffle", "sklearn.utils.sparsefuncs._csc_mean_var_axis0",
+    "sklearn.utils.sparsefuncs._csr_mean_var_axis0", "sklearn.utils.sparsefuncs._get_elem_at_rank",
+    "sklearn.utils.sparsefuncs._get_median", "sklearn.utils.sparsefuncs._incr_mean_var_axis0",
+    "sklearn.utils.sparsefuncs._raise_error_wrong_axis", "sklearn.utils.sparsefuncs._raise_typeerror",
+    "sklearn.utils.sparsefuncs.count_nonzero", "sklearn.utils.sparsefuncs.csc_median_axis_0",
+    "sklearn.utils.sparsefuncs.incr_mean_variance_axis", "sklearn.utils.sparsefuncs.inplace_column_scale",
+    "sklearn.utils.sparsefuncs.inplace_csr_column_scale", "sklearn.utils.sparsefuncs.inplace_csr_row_scale",
+    "sklearn.utils.sparsefuncs.inplace_row_scale", "sklearn.utils.sparsefuncs.inplace_swap_column",
+    "sklearn.utils.sparsefuncs.inplace_swap_row", "sklearn.utils.sparsefuncs.inplace_swap_row_csc",
+    "sklearn.utils.sparsefuncs.inplace_swap_row_csr", "sklearn.utils.sparsefuncs.mean_variance_axis",
+    "sklearn.utils.sparsefuncs.min_max_axis", "sklearn.utils.sparsefuncs_fast._csc_mean_variance_axis0",
+    "sklearn.utils.sparsefuncs_fast._csr_mean_variance_axis0", "sklearn.utils.sparsefuncs_fast._csr_row_norms",
+    "sklearn.utils.sparsefuncs_fast._incr_mean_variance_axis0", "sklearn.utils.sparsefuncs_fast._inplace_csr_row_normalize_l1",
+    "sklearn.utils.sparsefuncs_fast._inplace_csr_row_normalize_l2", "sklearn.utils.sparsefuncs_fast.assign_rows_csr",
+    "sklearn.utils.sparsefuncs_fast.csc_mean_variance_axis0", "sklearn.utils.sparsefuncs_fast.csr_mean_variance_axis0",
+    "sklearn.utils.sparsefuncs_fast.csr_row_norms", "sklearn.utils.sparsefuncs_fast.incr_mean_variance_axis0",
+    "sklearn.utils.sparsefuncs_fast.inplace_csr_row_normalize_l1", "sklearn.utils.sparsefuncs_fast.inplace_csr_row_normalize_l2",
+    "sklearn.utils.stats._weighted_percentile", "sklearn.utils.stats.rankdata",
+    "sklearn.utils.tosequence", "sklearn.utils.validation.FLOAT_DTYPES",
+    "sklearn.utils.validation._assert_all_finite", "sklearn.utils.validation._ensure_sparse_format",
+    "sklearn.utils.validation._is_arraylike", "sklearn.utils.validation._num_samples",
+    "sklearn.utils.validation._shape_repr", "sklearn.utils.validation.as_float_array",
+    "sklearn.utils.validation.assert_all_finite", "sklearn.utils.validation.check_X_y",
+    "sklearn.utils.validation.check_array", "sklearn.utils.validation.check_consistent_length",
+    "sklearn.utils.validation.check_is_fitted", "sklearn.utils.validation.check_memory",
+    "sklearn.utils.validation.check_non_negative", "sklearn.utils.validation.check_random_state",
+    "sklearn.utils.validation.check_symmetric", "sklearn.utils.validation.column_or_1d",
+    "sklearn.utils.validation.has_fit_parameter", "sklearn.utils.validation.indexable",
+    "sklearn.utils.weight_vector.WeightVector"
+],
+
+  "SKR_NAMES": [
+    "skrebate.MultiSURF", "skrebate.MultiSURFstar",
+    "skrebate.ReliefF", "skrebate.SURF",
+    "skrebate.SURFstar", "skrebate.TuRF",
+    "skrebate.multisurf.MultiSURF", "skrebate.multisurfstar.MultiSURFstar",
+    "skrebate.relieff.ReliefF", "skrebate.scoring_utils.MultiSURF_compute_scores",
+    "skrebate.scoring_utils.MultiSURFstar_compute_scores", "skrebate.scoring_utils.ReliefF_compute_scores",
+    "skrebate.scoring_utils.SURF_compute_scores", "skrebate.scoring_utils.SURFstar_compute_scores",
+    "skrebate.scoring_utils.compute_score", "skrebate.scoring_utils.get_row_missing",
+    "skrebate.scoring_utils.ramp_function", "skrebate.surf.SURF",
+    "skrebate.surfstar.SURFstar", "skrebate.turf.TuRF"
+  ],
+
+  "XGB_NAMES": [
+    "xgboost.Booster", "xgboost.DMatrix",
+    "xgboost.VERSION_FILE", "xgboost.XGBClassifier",
+    "xgboost.XGBModel", "xgboost.XGBRegressor",
+    "xgboost.callback._fmt_metric", "xgboost.callback._get_callback_context",
+    "xgboost.callback.early_stop", "xgboost.callback.print_evaluation",
+    "xgboost.callback.record_evaluation", "xgboost.callback.reset_learning_rate",
+    "xgboost.compat.PANDAS_INSTALLED", "xgboost.compat.PY3",
+    "xgboost.compat.SKLEARN_INSTALLED", "xgboost.compat.STRING_TYPES",
+    "xgboost.compat.py_str", "xgboost.core.Booster",
+    "xgboost.core.CallbackEnv", "xgboost.core.DMatrix",
+    "xgboost.core.EarlyStopException", "xgboost.core.PANDAS_DTYPE_MAPPER",
+    "xgboost.core.PANDAS_INSTALLED", "xgboost.core.PY3",
+    "xgboost.core.STRING_TYPES", "xgboost.core.XGBoostError",
+    "xgboost.core._check_call", "xgboost.core._load_lib",
+    "xgboost.core._maybe_pandas_data", "xgboost.core._maybe_pandas_label",
+    "xgboost.core.c_array", "xgboost.core.c_str",
+    "xgboost.core.ctypes2buffer", "xgboost.core.ctypes2numpy",
+    "xgboost.core.from_cstr_to_pystr", "xgboost.core.from_pystr_to_cstr",
+    "xgboost.cv", "xgboost.f",
+    "xgboost.libpath.XGBoostLibraryNotFound", "xgboost.libpath.find_lib_path",
+    "xgboost.plot_importance", "xgboost.plot_tree",
+    "xgboost.plotting._EDGEPAT", "xgboost.plotting._EDGEPAT2",
+    "xgboost.plotting._LEAFPAT", "xgboost.plotting._NODEPAT",
+    "xgboost.plotting._parse_edge", "xgboost.plotting._parse_node",
+    "xgboost.plotting.plot_importance", "xgboost.plotting.plot_tree",
+    "xgboost.plotting.to_graphviz", "xgboost.rabit.DTYPE_ENUM__",
+    "xgboost.rabit.STRING_TYPES", "xgboost.rabit._init_rabit",
+    "xgboost.rabit.allreduce", "xgboost.rabit.broadcast",
+    "xgboost.rabit.finalize", "xgboost.rabit.get_processor_name",
+    "xgboost.rabit.get_rank", "xgboost.rabit.get_world_size",
+    "xgboost.rabit.init", "xgboost.rabit.tracker_print",
+    "xgboost.rabit.version_number", "xgboost.sklearn.SKLEARN_INSTALLED",
+    "xgboost.sklearn.XGBClassifier", "xgboost.sklearn.XGBModel",
+    "xgboost.sklearn.XGBRegressor", "xgboost.sklearn._objective_decorator",
+    "xgboost.to_graphviz", "xgboost.train",
+    "xgboost.training.CVPack", "xgboost.training.SKLEARN_INSTALLED",
+    "xgboost.training.STRING_TYPES", "xgboost.training._train_internal",
+    "xgboost.training.aggcv", "xgboost.training.cv",
+    "xgboost.training.mknfold", "xgboost.training.train"
+  ],
+
+
+  "NUMPY_NAMES": [
+    "numpy.core.multiarray._reconstruct", "numpy.ndarray",
+    "numpy.dtype", "numpy.core.multiarray.scalar", "numpy.random.__RandomState_ctor",
+    "numpy.ma.core._mareconstruct", "numpy.ma.core.MaskedArray"
+  ],
+
+  "IMBLEARN_NAMES":[
+    "imblearn.pipeline.Pipeline", "imblearn.over_sampling._random_over_sampler.RandomOverSampler",
+    "imblearn.under_sampling._prototype_selection._edited_nearest_neighbours.EditedNearestNeighbours"
+  ],
+
+  "MLXTEND_NAMES":[
+    "mlxtend.classifier.stacking_cv_classification.StackingCVClassifier",
+    "mlxtend.classifier.stacking_classification.StackingClassifier",
+    "mlxtend.regressor.stacking_cv_regression.StackingCVRegressor",
+    "mlxtend.regressor.stacking_regression.StackingRegressor"
+  ]
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preprocessors.py	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,184 @@
+"""
+Z_RandomOverSampler
+"""
+
+import imblearn
+import numpy as np
+
+from collections import Counter
+from imblearn.over_sampling.base import BaseOverSampler
+from imblearn.over_sampling import RandomOverSampler
+from imblearn.pipeline import Pipeline as imbPipeline
+from imblearn.utils import check_target_type
+from scipy import sparse
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.preprocessing.data import _handle_zeros_in_scale
+from sklearn.utils import check_array, safe_indexing
+from sklearn.utils.fixes import nanpercentile
+from sklearn.utils.validation import (check_is_fitted, check_X_y,
+                                      FLOAT_DTYPES)
+
+
+class Z_RandomOverSampler(BaseOverSampler):
+
+    def __init__(self, sampling_strategy='auto',
+                 return_indices=False,
+                 random_state=None,
+                 ratio=None,
+                 negative_thres=0,
+                 positive_thres=-1):
+        super(Z_RandomOverSampler, self).__init__(
+            sampling_strategy=sampling_strategy, ratio=ratio)
+        self.random_state = random_state
+        self.return_indices = return_indices
+        self.negative_thres = negative_thres
+        self.positive_thres = positive_thres
+
+    @staticmethod
+    def _check_X_y(X, y):
+        y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
+        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'], dtype=None)
+        return X, y, binarize_y
+
+    def _fit_resample(self, X, y):
+        n_samples = X.shape[0]
+
+        # convert y to z_score
+        y_z = (y - y.mean()) / y.std()
+
+        index0 = np.arange(n_samples)
+        index_negative = index0[y_z > self.negative_thres]
+        index_positive = index0[y_z <= self.positive_thres]
+        index_unclassified = [x for x in index0
+                              if x not in index_negative
+                              and x not in index_positive]
+
+        y_z[index_negative] = 0
+        y_z[index_positive] = 1
+        y_z[index_unclassified] = -1
+
+        ros = RandomOverSampler(
+            sampling_strategy=self.sampling_strategy,
+            random_state=self.random_state,
+            ratio=self.ratio)
+        _, _ = ros.fit_resample(X, y_z)
+        sample_indices = ros.sample_indices_
+
+        print("Before sampler: %s. Total after: %s"
+              % (Counter(y_z), sample_indices.shape))
+
+        self.sample_indices_ = np.array(sample_indices)
+
+        if self.return_indices:
+            return (safe_indexing(X, sample_indices),
+                    safe_indexing(y, sample_indices),
+                    sample_indices)
+        return (safe_indexing(X, sample_indices),
+                safe_indexing(y, sample_indices))
+
+
+def _get_quantiles(X, quantile_range):
+    """
+    Calculate column percentiles for 2d array
+
+    Parameters
+    ----------
+    X : array-like, shape [n_samples, n_features]
+    """
+    quantiles = []
+    for feature_idx in range(X.shape[1]):
+        if sparse.issparse(X):
+            column_nnz_data = X.data[
+                X.indptr[feature_idx]: X.indptr[feature_idx + 1]]
+            column_data = np.zeros(shape=X.shape[0], dtype=X.dtype)
+            column_data[:len(column_nnz_data)] = column_nnz_data
+        else:
+            column_data = X[:, feature_idx]
+        quantiles.append(nanpercentile(column_data, quantile_range))
+
+    quantiles = np.transpose(quantiles)
+
+    return quantiles
+
+
+class TDMScaler(BaseEstimator, TransformerMixin):
+    """
+    Scale features using Training Distribution Matching (TDM) algorithm
+
+    References
+    ----------
+    .. [1] Thompson JA, Tan J and Greene CS (2016) Cross-platform
+           normalization of microarray and RNA-seq data for machine
+           learning applications. PeerJ 4, e1621.
+    """
+
+    def __init__(self, q_lower=25.0, q_upper=75.0, ):
+        self.q_lower = q_lower
+        self.q_upper = q_upper
+
+    def fit(self, X, y=None):
+        """
+        Parameters
+        ----------
+        X : array-like, shape [n_samples, n_features]
+        """
+        X = check_array(X, copy=True, estimator=self, dtype=FLOAT_DTYPES,
+                        force_all_finite=True)
+
+        if not 0 <= self.q_lower <= self.q_upper <= 100:
+            raise ValueError("Invalid quantile parameter values: "
+                             "q_lower %s, q_upper: %s"
+                             % (str(self.q_lower), str(self.q_upper)))
+
+        # TODO sparse data
+        quantiles = nanpercentile(X, (self.q_lower, self.q_upper))
+        iqr = quantiles[1] - quantiles[0]
+
+        self.q_lower_ = quantiles[0]
+        self.q_upper_ = quantiles[1]
+        self.iqr_ = _handle_zeros_in_scale(iqr, copy=False)
+
+        self.max_ = np.nanmax(X)
+        self.min_ = np.nanmin(X)
+
+        return self
+
+    def transform(self, X):
+        """
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}
+            The data used to scale along the specified axis.
+        """
+        check_is_fitted(self, 'iqr_', 'max_')
+        X = check_array(X, copy=True, estimator=self, dtype=FLOAT_DTYPES,
+                        force_all_finite=True)
+
+        # TODO sparse data
+        train_upper_scale = (self.max_ - self.q_upper_) / self.iqr_
+        train_lower_scale = (self.q_lower_ - self.min_) / self.iqr_
+
+        test_quantiles = nanpercentile(X, (self.q_lower, self.q_upper))
+        test_iqr = _handle_zeros_in_scale(
+            test_quantiles[1] - test_quantiles[0], copy=False)
+
+        test_upper_bound = test_quantiles[1] + train_upper_scale * test_iqr
+        test_lower_bound = test_quantiles[0] - train_lower_scale * test_iqr
+
+        test_min = np.nanmin(X)
+        if test_lower_bound < test_min:
+            test_lower_bound = test_min
+
+        X[X > test_upper_bound] = test_upper_bound
+        X[X < test_lower_bound] = test_lower_bound
+
+        X = (X - test_lower_bound) / (test_upper_bound - test_lower_bound)\
+            * (self.max_ - self.min_) + self.min_
+
+        return X
+
+    def inverse_transform(self, X):
+        """
+        Scale the data back to the original state
+        """
+        raise NotImplementedError("Inverse transformation is not implemented!")
--- a/search_model_validation.py	Sun Dec 30 01:51:27 2018 -0500
+++ b/search_model_validation.py	Tue May 14 18:05:43 2019 -0400
@@ -1,7 +1,8 @@
+import argparse
+import collections
 import imblearn
 import json
 import numpy as np
-import os
 import pandas
 import pickle
 import skrebate
@@ -9,93 +10,124 @@
 import sys
 import xgboost
 import warnings
+import iraps_classifier
+import model_validations
+import preprocessors
+import feature_selectors
 from imblearn import under_sampling, over_sampling, combine
-from imblearn.pipeline import Pipeline as imbPipeline
-from sklearn import (cluster, compose, decomposition, ensemble, feature_extraction,
-                    feature_selection, gaussian_process, kernel_approximation, metrics,
-                    model_selection, naive_bayes, neighbors, pipeline, preprocessing,
-                    svm, linear_model, tree, discriminant_analysis)
+from scipy.io import mmread
+from mlxtend import classifier, regressor
+from sklearn import (cluster, compose, decomposition, ensemble,
+                     feature_extraction, feature_selection,
+                     gaussian_process, kernel_approximation, metrics,
+                     model_selection, naive_bayes, neighbors,
+                     pipeline, preprocessing, svm, linear_model,
+                     tree, discriminant_analysis)
 from sklearn.exceptions import FitFailedWarning
 from sklearn.externals import joblib
-from utils import get_cv, get_scoring, get_X_y, load_model, read_columns, SafeEval
+from sklearn.model_selection._validation import _score
+
+from utils import (SafeEval, get_cv, get_scoring, get_X_y,
+                   load_model, read_columns)
+from model_validations import train_test_split


-N_JOBS = int(os.environ.get('GALAXY_SLOTS', 1))
+N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1))
+CACHE_DIR = './cached'
+NON_SEARCHABLE = ('n_jobs', 'pre_dispatch', 'memory', 'steps',
+                  'nthread', 'verbose')


-def get_search_params(params_builder):
+def _eval_search_params(params_builder):
     search_params = {}
-    safe_eval = SafeEval(load_scipy=True, load_numpy=True)
-    safe_eval_es = SafeEval(load_estimators=True)

     for p in params_builder['param_set']:
-        search_p = p['search_param_selector']['search_p']
-        if search_p.strip() == '':
+        search_list = p['sp_list'].strip()
+        if search_list == '':
             continue
-        param_type = p['search_param_selector']['selected_param_type']
+
+        param_name = p['sp_name']
+        if param_name.lower().endswith(NON_SEARCHABLE):
+            print("Warning: `%s` is not eligible for search and was "
+                  "omitted!" % param_name)
+            continue

-        lst = search_p.split(':')
-        assert (len(lst) == 2), "Error, make sure there is one and only one colon in search parameter input."
-        literal = lst[1].strip()
-        param_name = lst[0].strip()
-        if param_name:
-            if param_name.lower() == 'n_jobs':
-                sys.exit("Parameter `%s` is invalid for search." %param_name)
-            elif not param_name.endswith('-'):
-                ev = safe_eval(literal)
-                if param_type == 'final_estimator_p':
-                    search_params['estimator__' + param_name] = ev
-                else:
-                    search_params['preprocessing_' + param_type[5:6] + '__' + param_name] = ev
-            else:
-                # only for estimator eval, add `-` to the end of param
-                #TODO maybe add regular express check
-                ev = safe_eval_es(literal)
-                for obj in ev:
-                    if 'n_jobs' in obj.get_params():
-                        obj.set_params( n_jobs=N_JOBS )
-                if param_type == 'final_estimator_p':
-                    search_params['estimator__' + param_name[:-1]] = ev
-                else:
-                    search_params['preprocessing_' + param_type[5:6] + '__' + param_name[:-1]] = ev
-        elif param_type != 'final_estimator_p':
-            #TODO regular express check ?
-            ev = safe_eval_es(literal)
-            preprocessors = [preprocessing.StandardScaler(), preprocessing.Binarizer(), preprocessing.Imputer(),
-                            preprocessing.MaxAbsScaler(), preprocessing.Normalizer(), preprocessing.MinMaxScaler(),
-                            preprocessing.PolynomialFeatures(),preprocessing.RobustScaler(),
-                            feature_selection.SelectKBest(), feature_selection.GenericUnivariateSelect(),
-                            feature_selection.SelectPercentile(), feature_selection.SelectFpr(), feature_selection.SelectFdr(),
-                            feature_selection.SelectFwe(), feature_selection.VarianceThreshold(),
-                            decomposition.FactorAnalysis(random_state=0), decomposition.FastICA(random_state=0), decomposition.IncrementalPCA(),
-                            decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS), decomposition.LatentDirichletAllocation(random_state=0, n_jobs=N_JOBS),
-                            decomposition.MiniBatchDictionaryLearning(random_state=0, n_jobs=N_JOBS),
-                            decomposition.MiniBatchSparsePCA(random_state=0, n_jobs=N_JOBS), decomposition.NMF(random_state=0),
-                            decomposition.PCA(random_state=0), decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS),
-                            decomposition.TruncatedSVD(random_state=0),
-                            kernel_approximation.Nystroem(random_state=0), kernel_approximation.RBFSampler(random_state=0),
-                            kernel_approximation.AdditiveChi2Sampler(), kernel_approximation.SkewedChi2Sampler(random_state=0),
-                            cluster.FeatureAgglomeration(),
-                            skrebate.ReliefF(n_jobs=N_JOBS), skrebate.SURF(n_jobs=N_JOBS), skrebate.SURFstar(n_jobs=N_JOBS),
-                            skrebate.MultiSURF(n_jobs=N_JOBS), skrebate.MultiSURFstar(n_jobs=N_JOBS),
-                            imblearn.under_sampling.ClusterCentroids(random_state=0, n_jobs=N_JOBS),
-                            imblearn.under_sampling.CondensedNearestNeighbour(random_state=0, n_jobs=N_JOBS),
-                            imblearn.under_sampling.EditedNearestNeighbours(random_state=0, n_jobs=N_JOBS),
-                            imblearn.under_sampling.RepeatedEditedNearestNeighbours(random_state=0, n_jobs=N_JOBS),
-                            imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS),
-                            imblearn.under_sampling.InstanceHardnessThreshold(random_state=0, n_jobs=N_JOBS),
-                            imblearn.under_sampling.NearMiss(random_state=0, n_jobs=N_JOBS),
-                            imblearn.under_sampling.NeighbourhoodCleaningRule(random_state=0, n_jobs=N_JOBS),
-                            imblearn.under_sampling.OneSidedSelection(random_state=0, n_jobs=N_JOBS),
-                            imblearn.under_sampling.RandomUnderSampler(random_state=0),
-                            imblearn.under_sampling.TomekLinks(random_state=0, n_jobs=N_JOBS),
-                            imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS),
-                            imblearn.over_sampling.RandomOverSampler(random_state=0),
-                            imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS),
-                            imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS),
-                            imblearn.over_sampling.BorderlineSMOTE(random_state=0, n_jobs=N_JOBS),
-                            imblearn.over_sampling.SMOTENC(categorical_features=[], random_state=0, n_jobs=N_JOBS),
-                            imblearn.combine.SMOTEENN(random_state=0), imblearn.combine.SMOTETomek(random_state=0)]
+        if not search_list.startswith(':'):
+            safe_eval = SafeEval(load_scipy=True, load_numpy=True)
+            ev = safe_eval(search_list)
+            search_params[param_name] = ev
+        else:
+            # Have `:` before search list, asks for estimator evaluatio
+            safe_eval_es = SafeEval(load_estimators=True)
+            search_list = search_list[1:].strip()
+            # TODO maybe add regular express check
+            ev = safe_eval_es(search_list)
+            preprocessors = (
+                preprocessing.StandardScaler(), preprocessing.Binarizer(),
+                preprocessing.Imputer(), preprocessing.MaxAbsScaler(),
+                preprocessing.Normalizer(), preprocessing.MinMaxScaler(),
+                preprocessing.PolynomialFeatures(),
+                preprocessing.RobustScaler(), feature_selection.SelectKBest(),
+                feature_selection.GenericUnivariateSelect(),
+                feature_selection.SelectPercentile(),
+                feature_selection.SelectFpr(), feature_selection.SelectFdr(),
+                feature_selection.SelectFwe(),
+                feature_selection.VarianceThreshold(),
+                decomposition.FactorAnalysis(random_state=0),
+                decomposition.FastICA(random_state=0),
+                decomposition.IncrementalPCA(),
+                decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS),
+                decomposition.LatentDirichletAllocation(
+                    random_state=0, n_jobs=N_JOBS),
+                decomposition.MiniBatchDictionaryLearning(
+                    random_state=0, n_jobs=N_JOBS),
+                decomposition.MiniBatchSparsePCA(
+                    random_state=0, n_jobs=N_JOBS),
+                decomposition.NMF(random_state=0),
+                decomposition.PCA(random_state=0),
+                decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS),
+                decomposition.TruncatedSVD(random_state=0),
+                kernel_approximation.Nystroem(random_state=0),
+                kernel_approximation.RBFSampler(random_state=0),
+                kernel_approximation.AdditiveChi2Sampler(),
+                kernel_approximation.SkewedChi2Sampler(random_state=0),
+                cluster.FeatureAgglomeration(),
+                skrebate.ReliefF(n_jobs=N_JOBS),
+                skrebate.SURF(n_jobs=N_JOBS),
+                skrebate.SURFstar(n_jobs=N_JOBS),
+                skrebate.MultiSURF(n_jobs=N_JOBS),
+                skrebate.MultiSURFstar(n_jobs=N_JOBS),
+                imblearn.under_sampling.ClusterCentroids(
+                    random_state=0, n_jobs=N_JOBS),
+                imblearn.under_sampling.CondensedNearestNeighbour(
+                    random_state=0, n_jobs=N_JOBS),
+                imblearn.under_sampling.EditedNearestNeighbours(
+                    random_state=0, n_jobs=N_JOBS),
+                imblearn.under_sampling.RepeatedEditedNearestNeighbours(
+                    random_state=0, n_jobs=N_JOBS),
+                imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS),
+                imblearn.under_sampling.InstanceHardnessThreshold(
+                    random_state=0, n_jobs=N_JOBS),
+                imblearn.under_sampling.NearMiss(
+                    random_state=0, n_jobs=N_JOBS),
+                imblearn.under_sampling.NeighbourhoodCleaningRule(
+                    random_state=0, n_jobs=N_JOBS),
+                imblearn.under_sampling.OneSidedSelection(
+                    random_state=0, n_jobs=N_JOBS),
+                imblearn.under_sampling.RandomUnderSampler(
+                    random_state=0),
+                imblearn.under_sampling.TomekLinks(
+                    random_state=0, n_jobs=N_JOBS),
+                imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS),
+                imblearn.over_sampling.RandomOverSampler(random_state=0),
+                imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS),
+                imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS),
+                imblearn.over_sampling.BorderlineSMOTE(
+                    random_state=0, n_jobs=N_JOBS),
+                imblearn.over_sampling.SMOTENC(
+                    categorical_features=[], random_state=0, n_jobs=N_JOBS),
+                imblearn.combine.SMOTEENN(random_state=0),
+                imblearn.combine.SMOTETomek(random_state=0))
             newlist = []
             for obj in ev:
                 if obj is None:
@@ -114,87 +146,102 @@
                     newlist.extend(preprocessors[31:36])
                 elif obj == 'imb_all':
                     newlist.extend(preprocessors[36:55])
-                elif  type(obj) is int and -1 < obj < len(preprocessors):
+                elif type(obj) is int and -1 < obj < len(preprocessors):
                     newlist.append(preprocessors[obj])
-                elif hasattr(obj, 'get_params'):       # user object
+                elif hasattr(obj, 'get_params'):       # user uploaded object
                     if 'n_jobs' in obj.get_params():
-                        newlist.append( obj.set_params(n_jobs=N_JOBS) )
+                        newlist.append(obj.set_params(n_jobs=N_JOBS))
                     else:
                         newlist.append(obj)
                 else:
-                    sys.exit("Unsupported preprocessor type: %r" %(obj))
-            search_params['preprocessing_' + param_type[5:6]] = newlist
-        else:
-            sys.exit("Parameter name of the final estimator can't be skipped!")
+                    sys.exit("Unsupported estimator type: %r" % (obj))
+
+            search_params[param_name] = newlist

     return search_params


-if __name__ == '__main__':
+def main(inputs, infile_estimator, infile1, infile2,
+         outfile_result, outfile_object=None, groups=None):
+    """
+    Parameter
+    ---------
+    inputs : str
+        File path to galaxy tool parameter
+
+    infile_estimator : str
+        File path to estimator
+
+    infile1 : str
+        File path to dataset containing features
+
+    infile2 : str
+        File path to dataset containing target values
+
+    outfile_result : str
+        File path to save the results, either cv_results or test result
+
+    outfile_object : str, optional
+        File path to save searchCV object
+
+    groups : str
+        File path to dataset containing groups labels
+    """

     warnings.simplefilter('ignore')

-    input_json_path = sys.argv[1]
-    with open(input_json_path, 'r') as param_handler:
+    with open(inputs, 'r') as param_handler:
         params = json.load(param_handler)
-
-    infile_pipeline = sys.argv[2]
-    infile1 = sys.argv[3]
-    infile2 = sys.argv[4]
-    outfile_result = sys.argv[5]
-    if len(sys.argv) > 6:
-        outfile_estimator = sys.argv[6]
-    else:
-        outfile_estimator = None
+    if groups:
+        (params['search_schemes']['options']['cv_selector']
+         ['groups_selector']['infile_g']) = groups

     params_builder = params['search_schemes']['search_params_builder']

     input_type = params['input_options']['selected_input']
     if input_type == 'tabular':
         header = 'infer' if params['input_options']['header1'] else None
-        column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option']
-        if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
+        column_option = (params['input_options']['column_selector_options_1']
+                         ['selected_column_selector_option'])
+        if column_option in ['by_index_number', 'all_but_by_index_number',
+                             'by_header_name', 'all_but_by_header_name']:
             c = params['input_options']['column_selector_options_1']['col1']
         else:
             c = None
         X = read_columns(
                 infile1,
-                c = c,
-                c_option = column_option,
+                c=c,
+                c_option=column_option,
                 sep='\t',
                 header=header,
-                parse_dates=True
-        )
+                parse_dates=True).astype(float)
     else:
         X = mmread(open(infile1, 'r'))

     header = 'infer' if params['input_options']['header2'] else None
-    column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2']
-    if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
+    column_option = (params['input_options']['column_selector_options_2']
+                     ['selected_column_selector_option2'])
+    if column_option in ['by_index_number', 'all_but_by_index_number',
+                         'by_header_name', 'all_but_by_header_name']:
         c = params['input_options']['column_selector_options_2']['col2']
     else:
         c = None
     y = read_columns(
             infile2,
-            c = c,
-            c_option = column_option,
+            c=c,
+            c_option=column_option,
             sep='\t',
             header=header,
-            parse_dates=True
-    )
+            parse_dates=True)
     y = y.ravel()

     optimizer = params['search_schemes']['selected_search_scheme']
     optimizer = getattr(model_selection, optimizer)

     options = params['search_schemes']['options']
+
     splitter, groups = get_cv(options.pop('cv_selector'))
-    if groups is None:
-        options['cv'] = splitter
-    elif groups == '':
-        options['cv'] = list( splitter.split(X, y, groups=None) )
-    else:
-        options['cv'] = list( splitter.split(X, y, groups=groups) )
+    options['cv'] = splitter
     options['n_jobs'] = N_JOBS
     primary_scoring = options['scoring']['primary_scoring']
     options['scoring'] = get_scoring(options['scoring'])
@@ -203,32 +250,117 @@
     else:
         options['error_score'] = np.NaN
     if options['refit'] and isinstance(options['scoring'], dict):
-        options['refit'] = 'primary'
+        options['refit'] = primary_scoring
     if 'pre_dispatch' in options and options['pre_dispatch'] == '':
         options['pre_dispatch'] = None

-    with open(infile_pipeline, 'rb') as pipeline_handler:
-        pipeline = load_model(pipeline_handler)
+    with open(infile_estimator, 'rb') as estimator_handler:
+        estimator = load_model(estimator_handler)
+
+    memory = joblib.Memory(location=CACHE_DIR, verbose=0)
+    # cache iraps_core fits could increase search speed significantly
+    if estimator.__class__.__name__ == 'IRAPSClassifier':
+        estimator.set_params(memory=memory)
+    else:
+        for p, v in estimator.get_params().items():
+            if p.endswith('memory'):
+                if len(p) > 8 and p[:-8].endswith('irapsclassifier'):
+                    # cache iraps_core fits could increase search
+                    # speed significantly
+                    new_params = {p: memory}
+                    estimator.set_params(**new_params)
+                elif v:
+                    new_params = {p, None}
+                    estimator.set_params(**new_params)
+            elif p.endswith('n_jobs'):
+                new_params = {p: 1}
+                estimator.set_params(**new_params)
+
+    param_grid = _eval_search_params(params_builder)
+    searcher = optimizer(estimator, param_grid, **options)

-    search_params = get_search_params(params_builder)
-    searcher = optimizer(pipeline, search_params, **options)
+    # do train_test_split
+    do_train_test_split = params['train_test_split'].pop('do_split')
+    if do_train_test_split == 'yes':
+        # make sure refit is choosen
+        if not options['refit']:
+            raise ValueError("Refit must be `True` for shuffle splitting!")
+        split_options = params['train_test_split']
+
+        # splits
+        if split_options['shuffle'] == 'stratified':
+            split_options['labels'] = y
+            X, X_test, y, y_test = train_test_split(X, y, **split_options)
+        elif split_options['shuffle'] == 'group':
+            if not groups:
+                raise ValueError("No group based CV option was "
+                                 "choosen for group shuffle!")
+            split_options['labels'] = groups
+            X, X_test, y, y_test, groups, _ =\
+                train_test_split(X, y, **split_options)
+        else:
+            if split_options['shuffle'] == 'None':
+                split_options['shuffle'] = None
+            X, X_test, y, y_test =\
+                train_test_split(X, y, **split_options)
+    # end train_test_split

     if options['error_score'] == 'raise':
-        searcher.fit(X, y)
+        searcher.fit(X, y, groups=groups)
     else:
         warnings.simplefilter('always', FitFailedWarning)
         with warnings.catch_warnings(record=True) as w:
             try:
-                searcher.fit(X, y)
+                searcher.fit(X, y, groups=groups)
             except ValueError:
                 pass
             for warning in w:
                 print(repr(warning.message))

-    cv_result = pandas.DataFrame(searcher.cv_results_)
-    cv_result.rename(inplace=True, columns={'mean_test_primary': 'mean_test_'+primary_scoring, 'rank_test_primary': 'rank_test_'+primary_scoring})
-    cv_result.to_csv(path_or_buf=outfile_result, sep='\t', header=True, index=False)
+    if do_train_test_split == 'no':
+        # save results
+        cv_results = pandas.DataFrame(searcher.cv_results_)
+        cv_results = cv_results[sorted(cv_results.columns)]
+        cv_results.to_csv(path_or_buf=outfile_result, sep='\t',
+                          header=True, index=False)
+
+    # output test result using best_estimator_
+    else:
+        best_estimator_ = searcher.best_estimator_
+        if isinstance(options['scoring'], collections.Mapping):
+            is_multimetric = True
+        else:
+            is_multimetric = False

-    if outfile_estimator:
-        with open(outfile_estimator, 'wb') as output_handler:
-            pickle.dump(searcher.best_estimator_, output_handler, pickle.HIGHEST_PROTOCOL)
+        test_score = _score(best_estimator_, X_test,
+                            y_test, options['scoring'],
+                            is_multimetric=is_multimetric)
+        if not is_multimetric:
+            test_score = {primary_scoring: test_score}
+        for key, value in test_score.items():
+            test_score[key] = [value]
+        result_df = pandas.DataFrame(test_score)
+        result_df.to_csv(path_or_buf=outfile_result, sep='\t',
+                         header=True, index=False)
+
+    memory.clear(warn=False)
+
+    if outfile_object:
+        with open(outfile_object, 'wb') as output_handler:
+            pickle.dump(searcher, output_handler, pickle.HIGHEST_PROTOCOL)
+
+
+if __name__ == '__main__':
+    aparser = argparse.ArgumentParser()
+    aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
+    aparser.add_argument("-e", "--estimator", dest="infile_estimator")
+    aparser.add_argument("-X", "--infile1", dest="infile1")
+    aparser.add_argument("-y", "--infile2", dest="infile2")
+    aparser.add_argument("-r", "--outfile_result", dest="outfile_result")
+    aparser.add_argument("-o", "--outfile_object", dest="outfile_object")
+    aparser.add_argument("-g", "--groups", dest="groups")
+    args = aparser.parse_args()
+
+    main(args.inputs, args.infile_estimator, args.infile1, args.infile2,
+         args.outfile_result, outfile_object=args.outfile_object,
+         groups=args.groups)
--- a/search_model_validation.xml	Sun Dec 30 01:51:27 2018 -0500
+++ b/search_model_validation.xml	Tue May 14 18:05:43 2019 -0400
@@ -3,23 +3,24 @@
     <macros>
         <import>main_macros.xml</import>
     </macros>
-    <expand macro="python_requirements">
-        <requirement type="package" version="0.6">skrebate</requirement>
-        <requirement type="package" version="0.4.2">imbalanced-learn</requirement>
-    </expand>
+    <expand macro="python_requirements"/>
     <expand macro="macro_stdio"/>
     <version_command>echo "@VERSION@"</version_command>
     <command>
         <![CDATA[
         python '$__tool_directory__/search_model_validation.py'
-            '$inputs'
-            '$search_schemes.infile_pipeline'
-            '$input_options.infile1'
-            '$input_options.infile2'
-            '$outfile_result'
-            #if $save:
-            '$outfile_estimator'
+            --inputs '$inputs'
+            --estimator '$search_schemes.infile_estimator'
+            --infile1 '$input_options.infile1'
+            --infile2 '$input_options.infile2'
+            --outfile_result '$outfile_result'
+            #if $save
+            --outfile_object '$outfile_object'
             #end if
+            #if $search_schemes.options.cv_selector.selected_cv in ['GroupKFold', 'GroupShuffleSplit', 'LeaveOneGroupOut', 'LeavePGroupsOut']
+            --groups '$inputs,$search_schemes.options.cv_selector.groups_selector.infile_g'
+            #end if
+
         ]]>
     </command>
     <configfiles>
@@ -27,7 +28,7 @@
     </configfiles>
     <inputs>
         <conditional name="search_schemes">
-            <param name="selected_search_scheme" type="select" label="Select a model selection search scheme:">
+            <param name="selected_search_scheme" type="select" label="Select a model selection search scheme">
                 <option value="GridSearchCV" selected="true">GridSearchCV - Exhaustive search over specified parameter values for an estimator </option>
                 <option value="RandomizedSearchCV">RandomizedSearchCV - Randomized search on hyper parameters for an estimator</option>
             </param>
@@ -46,27 +47,46 @@
                 </section>
             </when>
         </conditional>
-        <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Save the best estimator/pipeline?"/>
+        <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Save the searchCV object"/>
         <expand macro="sl_mixed_input"/>
+        <conditional name="train_test_split">
+            <param name="do_split" type="select" label="Whether to hold a portion of samples for test exclusively?" help="train_test_split">
+                <option value="no">Nope</option>
+                <option value="yes">Yes - I do</option>
+            </param>
+            <when value='no'/>
+            <when value='yes'>
+                <param argument="test_size" type="float" optional="True" value="0.25" label="Test size:"/>
+                <param argument="train_size" type="float" optional="True" value="" label="Train size:"/>
+                <param argument="random_state" type="integer" optional="True" value="" label="Random seed number:"/>
+                <param argument="shuffle" type="select">
+                    <option value="None">None - No shuffle</option>
+                    <option value="simple">Shuffle -- for regression problems</option>
+                    <option value="stratified">StratifiedShuffle -- will use the target values as class labels</option>
+                    <option value="group">GroupShuffle -- make sure group CV option is choosen</option>
+                </param>
+            </when>
+        </conditional>
     </inputs>
     <outputs>
         <data format="tabular" name="outfile_result"/>
-        <data format="zip" name="outfile_estimator" label="${tool.name}: best estimator on ${on_string}">
+        <data format="zip" name="outfile_object" label="${search_schemes.selected_search_scheme} on ${on_string}">
             <filter>save</filter>
         </data>
     </outputs>
     <tests>
         <test>
             <param name="selected_search_scheme" value="GridSearchCV"/>
-            <param name="infile_pipeline" value="pipeline01" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="C: [1, 10, 100, 1000]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="k: [-1, 3, 5, 7, 9]"/>
-                <param name="selected_param_type" value="prep_2_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline01" ftype="zip"/>
+            <param name="infile_params" value="get_params01.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value="[1, 10, 100, 1000]"/>
+                <param name="sp_name" value="svr__C"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="[-1, 3, 5, 7, 9]"/>
+                <param name="sp_name" value="selectkbest__k"/>
+            </repeat>
             <param name="error_score" value="false"/>
             <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
             <param name="header1" value="true" />
@@ -78,21 +98,22 @@
                 <assert_contents>
                     <has_n_columns n="13"/>
                     <has_text text="0.7938837807353147"/>
-                    <has_text text="{'estimator__C': 1, 'preprocessing_2__k': 9}"/>
+                    <has_text text="{'selectkbest__k': 9, 'svr__C': 1}"/>
                 </assert_contents>
             </output>
         </test>
         <test expect_failure="true">
             <param name="selected_search_scheme" value="GridSearchCV"/>
-            <param name="infile_pipeline" value="pipeline01" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="C: [1, 10, 100, 1000]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="k: [-1, 3, 5, 7, 9]"/>
-                <param name="selected_param_type" value="prep_2_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline01" ftype="zip"/>
+            <param name="infile_params" value="get_params01.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value="[1, 10, 100, 1000]"/>
+                <param name="sp_name" value="svr__C"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="[-1, 3, 5, 7, 9]"/>
+                <param name="sp_name" value="selectkbest__k"/>
+            </repeat>
             <param name="error_score" value="true"/>
             <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
             <param name="header1" value="true" />
@@ -103,23 +124,24 @@
         </test>
         <test>
             <param name="selected_search_scheme" value="RandomizedSearchCV"/>
-            <param name="infile_pipeline" value="pipeline01" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="C: [1, 10, 100, 1000]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="kernel: ['linear', 'poly', 'rbf', 'sigmoid']"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="k: [3, 5, 7, 9]"/>
-                <param name="selected_param_type" value="prep_2_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="with_centering: [True, False]"/>
-                <param name="selected_param_type" value="prep_1_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline01" ftype="zip"/>
+            <param name="infile_params" value="get_params01.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value="[1, 10, 100, 1000]"/>
+                <param name="sp_name" value="svr__C"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="['linear', 'poly', 'rbf', 'sigmoid']"/>
+                <param name="sp_name" value="svr__kernel"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="[3, 5, 7, 9]"/>
+                <param name="sp_name" value="selectkbest__k"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="[True, False]"/>
+                <param name="sp_name" value="robustscaler__with_centering"/>
+            </repeat>
             <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
             <param name="header1" value="true" />
             <param name="selected_column_selector_option" value="all_columns"/>
@@ -129,29 +151,30 @@
             <output name="outfile_result" >
                 <assert_contents>
                     <has_n_columns n="15" />
-                    <has_text text="param_preprocessing_1__with_centering"/>
+                    <has_text text="param_robustscaler__with_centering"/>
                 </assert_contents>
             </output>
         </test>
         <test>
             <param name="selected_search_scheme" value="RandomizedSearchCV"/>
-            <param name="infile_pipeline" value="pipeline03" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="n_estimators: np_arange(50, 1001, 50)"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="max_depth: scipy_stats_randint(1, 51)"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="gamma: scipy_stats_uniform(0., 1.)"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="random_state: [324089]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline03" ftype="zip"/>
+            <param name="infile_params" value="get_params03.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value="np_arange(50, 1001, 50)"/>
+                <param name="sp_name" value="xgbclassifier__n_estimators"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="scipy_stats_randint(1, 51)"/>
+                <param name="sp_name" value="xgbclassifier__max_depth"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="scipy_stats_uniform(0., 1.)"/>
+                <param name="sp_name" value="xgbclassifier__gamma"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="[324089]"/>
+                <param name="sp_name" value="xgbclassifier__random_state"/>
+            </repeat>
             <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
             <param name="header1" value="true" />
             <param name="selected_column_selector_option" value="all_columns"/>
@@ -161,21 +184,22 @@
             <output name="outfile_result" >
                 <assert_contents>
                     <has_n_columns n="15" />
-                    <has_text text="param_estimator__max_depth"/>
+                    <has_text text="param_xgbclassifier__max_depth"/>
                 </assert_contents>
             </output>
         </test>
         <test>
             <param name="selected_search_scheme" value="GridSearchCV"/>
-            <param name="infile_pipeline" value="pipeline04" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="random_state: list(range(100, 1001, 100))"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="estimator-: [sklearn_ensemble.ExtraTreesClassifier(n_estimators=100, random_state=324089)]"/>
-                <param name="selected_param_type" value="prep_1_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline04" ftype="zip"/>
+            <param name="infile_params" value="get_params04.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value="list(range(100, 1001, 100))"/>
+                <param name="sp_name" value="linearsvc__random_state"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value=": [sklearn_ensemble.ExtraTreesClassifier(n_estimators=100, random_state=324089)]"/>
+                <param name="sp_name" value="selectfrommodel__estimator"/>
+            </repeat>
             <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
             <param name="header1" value="true" />
             <param name="selected_column_selector_option" value="all_columns"/>
@@ -191,30 +215,32 @@
         </test>
         <test>
             <param name="selected_search_scheme" value="GridSearchCV"/>
-            <param name="infile_pipeline" value="pipeline01" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="C: [1, 10, 100, 1000]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline01" ftype="zip"/>
+            <param name="infile_params" value="get_params01.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value="[1, 10, 100, 1000]"/>
+                <param name="sp_name" value="svr__C"/>
+            </repeat>
             <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
             <param name="header1" value="true" />
             <param name="selected_column_selector_option" value="all_columns"/>
             <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
             <param name="header2" value="true" />
             <param name="selected_column_selector_option2" value="all_columns"/>
-            <output name="outfile_estimator" file="searchCV01" compare="sim_size" delta="1"/>
+            <output name="outfile_object" file="searchCV01" compare="sim_size" delta="10"/>
         </test>
         <test>
             <param name="selected_search_scheme" value="GridSearchCV"/>
-            <param name="infile_pipeline" value="pipeline06" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="n_estimators: [10, 50, 200, 1000]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="random_state: [324089]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline06" ftype="zip"/>
+            <param name="infile_params" value="get_params06.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value="[10, 50, 200, 1000]"/>
+                <param name="sp_name" value="adaboostregressor__n_estimators"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="[324089]"/>
+                <param name="sp_name" value="adaboostregressor__random_state"/>
+            </repeat>
             <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
             <param name="header1" value="true" />
             <param name="selected_column_selector_option" value="all_columns"/>
@@ -230,19 +256,20 @@
         </test>
         <test>
             <param name="selected_search_scheme" value="GridSearchCV"/>
-            <param name="infile_pipeline" value="pipeline07" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="n_estimators: [10, 50, 100, 200]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="random_state: [324089]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="gamma: [1.0, 2.0]"/>
-                <param name="selected_param_type" value="prep_1_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline07" ftype="zip"/>
+            <param name="infile_params" value="get_params07.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value="[10, 50, 100, 200]"/>
+                <param name="sp_name" value="adaboostclassifier__n_estimators"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="[324089]"/>
+                <param name="sp_name" value="adaboostclassifier__random_state"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="[1.0, 2.0]"/>
+                <param name="sp_name" value="rbfsampler__gamma"/>
+            </repeat>
             <param name='selected_cv' value="default"/>
             <param name="n_splits" value="3"/>
             <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
@@ -260,19 +287,20 @@
         </test>
         <test>
             <param name="selected_search_scheme" value="GridSearchCV"/>
-            <param name="infile_pipeline" value="pipeline08" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="n_estimators: [10, 50, 100, 200]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="random_state: [324089]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="linkage: ['ward', 'complete', 'average']"/>
-                <param name="selected_param_type" value="prep_1_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline08" ftype="zip"/>
+            <param name="infile_params" value="get_params08.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value="[10, 50, 100, 200]"/>
+                <param name="sp_name" value="adaboostclassifier__n_estimators"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="[324089]"/>
+                <param name="sp_name" value="adaboostclassifier__random_state"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="['ward', 'complete', 'average']"/>
+                <param name="sp_name" value="featureagglomeration__linkage"/>
+            </repeat>
             <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
             <param name="header1" value="true" />
             <param name="selected_column_selector_option" value="all_columns"/>
@@ -287,11 +315,12 @@
         </test>
         <test>
             <param name="selected_search_scheme" value="GridSearchCV"/>
-            <param name="infile_pipeline" value="pipeline01" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="C: [1, 10, 100, 1000]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline01" ftype="zip"/>
+            <param name="infile_params" value="get_params01.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value="[1, 10, 100, 1000]"/>
+                <param name="sp_name" value="svr__C"/>
+            </repeat>
             <param name='selected_cv' value="StratifiedKFold"/>
             <param name="n_splits" value="3"/>
             <param name="shuffle" value="true" />
@@ -302,19 +331,20 @@
             <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
             <param name="header2" value="true" />
             <param name="selected_column_selector_option2" value="all_columns"/>
-            <output name="outfile_estimator" file="searchCV02" compare="sim_size" delta="1"/>
+            <output name="outfile_object" file="searchCV02" compare="sim_size" delta="10"/>
         </test>
         <test>
             <param name="selected_search_scheme" value="GridSearchCV"/>
-            <param name="infile_pipeline" value="pipeline03" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="n_estimators: [10, 50, 200, 1000]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="random_state: [324089]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline03" ftype="zip"/>
+            <param name="infile_params" value="get_params03.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value="[10, 50, 200, 1000]"/>
+                <param name="sp_name" value="xgbclassifier__n_estimators"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="[324089]"/>
+                <param name="sp_name" value="xgbclassifier__random_state"/>
+            </repeat>
             <param name="primary_scoring" value="balanced_accuracy"/>
             <param name='selected_cv' value="StratifiedKFold"/>
             <param name="n_splits" value="3"/>
@@ -335,15 +365,16 @@
         </test>
         <test>
             <param name="selected_search_scheme" value="GridSearchCV"/>
-            <param name="infile_pipeline" value="pipeline09" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="n_neighbors: [50, 100, 150, 200]"/>
-                <param name="selected_param_type" value="prep_1_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="random_state: [324089]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline09" ftype="zip"/>
+            <param name="infile_params" value="get_params09.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value="[50, 100, 150, 200]"/>
+                <param name="sp_name" value="relieff__n_neighbors"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="[324089]"/>
+                <param name="sp_name" value="randomforestregressor__random_state"/>
+            </repeat>
             <param name="primary_scoring" value="explained_variance"/>
             <param name="secondary_scoring" value="neg_mean_squared_error,r2"/>
             <param name='selected_cv' value="StratifiedKFold"/>
@@ -367,11 +398,12 @@
         </test>
         <test>
             <param name="selected_search_scheme" value="GridSearchCV"/>
-            <param name="infile_pipeline" value="pipeline02" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="eps: [0.01, 0.001]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline02" ftype="zip"/>
+            <param name="infile_params" value="get_params02.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value="[0.01, 0.001]"/>
+                <param name="sp_name" value="lassocv__eps"/>
+            </repeat>
             <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
             <param name="header1" value="true" />
             <param name="selected_column_selector_option" value="all_columns"/>
@@ -381,17 +413,18 @@
             <output name="outfile_result">
                 <assert_contents>
                     <has_n_columns n="12"/>
-                    <has_text text="0.7762968161366681" />
+                    <has_text text="0.776296816136668" />
                 </assert_contents>
             </output>
         </test>
         <test>
             <param name="selected_search_scheme" value="GridSearchCV"/>
-            <param name="infile_pipeline" value="pipeline05" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="n_estimators: [10, 50, 100, 300]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline05" ftype="zip"/>
+            <param name="infile_params" value="get_params05.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value="[10, 50, 100, 300]"/>
+                <param name="sp_name" value="randomforestregressor__n_estimators"/>
+            </repeat>
             <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
             <param name="header1" value="true" />
             <param name="selected_column_selector_option" value="all_columns"/>
@@ -407,11 +440,12 @@
         </test>
         <test expect_failure="true">
             <param name="selected_search_scheme" value="GridSearchCV"/>
-            <param name="infile_pipeline" value="pipeline01" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="C: open('~/.ssh/authorized_keys', 'r').read()"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline01" ftype="zip"/>
+            <param name="infile_params" value="get_params01.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value="open('~/.ssh/authorized_keys', 'r').read()"/>
+                <param name="sp_name" value="svr__C"/>
+            </repeat>
             <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
             <param name="header1" value="true" />
             <param name="selected_column_selector_option" value="all_columns"/>
@@ -421,15 +455,16 @@
         </test>
         <test>
             <param name="selected_search_scheme" value="GridSearchCV"/>
-            <param name="infile_pipeline" value="pipeline10" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="base_estimator-: [sklearn_tree.DecisionTreeRegressor(random_state=0), sklearn_tree.ExtraTreeRegressor(random_state=0)]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="random_state: [10]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline10" ftype="zip"/>
+            <param name="infile_params" value="get_params10.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value=": [sklearn_tree.DecisionTreeRegressor(random_state=0), sklearn_tree.ExtraTreeRegressor(random_state=0)]"/>
+                <param name="sp_name" value="adaboostregressor__base_estimator"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="[10]"/>
+                <param name="sp_name" value="adaboostregressor__random_state"/>
+            </repeat>
             <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
             <param name="header1" value="true" />
             <param name="selected_column_selector_option" value="all_columns"/>
@@ -445,16 +480,17 @@
         </test>
         <test>
             <param name="selected_search_scheme" value="GridSearchCV"/>
-            <param name="infile_pipeline" value="pipeline09" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value=": [sklearn_feature_selection.SelectKBest(),
+            <param name="infile_estimator" value="pipeline09" ftype="zip"/>
+            <param name="infile_params" value="get_params09.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value=": [sklearn_feature_selection.SelectKBest(),
                         sklearn_feature_selection.VarianceThreshold(), skrebate_ReliefF(), sklearn_preprocessing.RobustScaler()]"/>
-                <param name="selected_param_type" value="prep_1_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="random_state: [10]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
+                <param name="sp_name" value="relieff"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="[10]"/>
+                <param name="sp_name" value="randomforestregressor__random_state"/>
+            </repeat>
             <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
             <param name="header1" value="true" />
             <param name="selected_column_selector_option" value="all_columns"/>
@@ -470,15 +506,16 @@
         </test>
         <test>
             <param name="selected_search_scheme" value="GridSearchCV"/>
-            <param name="infile_pipeline" value="pipeline09" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value=": [None,'sk_prep_all', 8, 14, skrebate_ReliefF(n_features_to_select=12)]"/>
-                <param name="selected_param_type" value="prep_1_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="random_state: [10]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline09" ftype="zip"/>
+            <param name="infile_params" value="get_params09.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value=": [None,'sk_prep_all', 8, 14, skrebate_ReliefF(n_features_to_select=12)]"/>
+                <param name="sp_name" value="relieff"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="[10]"/>
+                <param name="sp_name" value="randomforestregressor__random_state"/>
+            </repeat>
             <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
             <param name="header1" value="true" />
             <param name="selected_column_selector_option" value="all_columns"/>
@@ -494,23 +531,24 @@
         </test>
         <test>
             <param name="selected_search_scheme" value="GridSearchCV"/>
-            <param name="infile_pipeline" value="pipeline11" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="n_neighbors: [3,4,5]"/>
-                <param name="selected_param_type" value="prep_1_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="random_state: [10]"/>
-                <param name="selected_param_type" value="prep_1_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="n_estimators:[10, 50, 100, 500]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="random_state: [10]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline11" ftype="zip"/>
+            <param name="infile_params" value="get_params11.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value="[3,4,5]"/>
+                <param name="sp_name" value="editednearestneighbours__n_neighbors"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="[10]"/>
+                <param name="sp_name" value="editednearestneighbours__random_state"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="[10, 50, 100, 500]"/>
+                <param name="sp_name" value="randomforestclassifier__n_estimators"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="[10]"/>
+                <param name="sp_name" value="randomforestclassifier__random_state"/>
+            </repeat>
             <param name="primary_scoring" value="f1_macro"/>
             <param name="secondary_scoring" value="balanced_accuracy,accuracy"/>
             <param name="n_splits" value="5"/>
@@ -531,15 +569,16 @@
         </test>
         <test>
             <param name="selected_search_scheme" value="GridSearchCV"/>
-            <param name="infile_pipeline" value="pipeline12" ftype="zip"/>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="estimator__n_estimators: [10, 100, 200]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
-            <conditional name="search_param_selector">
-                <param name="search_p" value="n_features_to_select: [10, None]"/>
-                <param name="selected_param_type" value="final_estimator_p"/>
-            </conditional>
+            <param name="infile_estimator" value="pipeline12" ftype="zip"/>
+            <param name="infile_params" value="get_params12.tabular" ftype="tabular"/>
+            <repeat name="param_set">
+                <param name="sp_list" value="[10, 100, 200]"/>
+                <param name="sp_name" value="rfe__estimator__n_estimators"/>
+            </repeat>
+            <repeat name="param_set">
+                <param name="sp_list" value="[10, None]"/>
+                <param name="sp_name" value="rfe__n_features_to_select"/>
+            </repeat>
             <param name="primary_scoring" value="r2"/>
             <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
             <param name="header1" value="true" />
@@ -554,38 +593,66 @@
                 </assert_contents>
             </output>
         </test>
+        <!--test>
+            <conditional name="search_schemes">
+                <param name="selected_search_scheme" value="GridSearchCV"/>
+                <param name="infile_estimator" value="pipeline05" ftype="zip"/>
+                <section name="search_params_builder">
+                    <param name="infile_params" value="get_params05.tabular" ftype="tabular"/>
+                    <repeat name="param_set">
+                        <param name="sp_list" value="[10, 50, 100, 300]"/>
+                        <param name="sp_name" value="randomforestregressor__n_estimators"/>
+                    </repeat>
+                </section>
+            </conditional>
+            <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
+            <param name="header1" value="true" />
+            <param name="selected_column_selector_option" value="all_columns"/>
+            <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
+            <param name="header2" value="true" />
+            <param name="selected_column_selector_option2" value="all_columns"/>
+            <output name="outfile_result">
+                <assert_contents>
+                    <has_n_columns n="1"/>
+                    <has_text text="0.7986842219788204" />
+                </assert_contents>
+            </output>
+        </test-->
     </tests>
     <help>
         <![CDATA[
 **What it does**
-Searches optimized parameter values for an estimator or pipeline through either exhaustive grid cross validation search or Randomized cross validation search.
+Searches optimized parameter settings for an estimator or pipeline through either exhaustive grid cross validation search or Randomized cross validation search.
 please refer to `Scikit-learn model_selection GridSearchCV`_, `Scikit-learn model_selection RandomizedSearchCV`_ and `Tuning hyper-parameters`_.

-**How to choose search patameters?**
+**Return**
+
+Outputs `cv_results_` from SearchCV in a tabular dataset if no train_test_split, otherwise the test score(s). Besides, Output of the SearchCV object is optional.
+
+**How to choose search patameters grid?**

 Please refer to `svm`_, `linear_model`_, `ensemble`_, `naive_bayes`_, `tree`_, `neighbors`_ and `xgboost`_ for estimator parameters.
-Refer to `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_, `cluster.FeatureAgglomeration`_ and `skrebate`_ for parameter in the pre-processing steps.
-
-**Search parameter input** accepts parameter and setting in key:value pair. One pair per input box. Setting can be list, numpy array, or distribution.
-The evaluation of settings supports operations in Math, list comprehension, numpy.arange(np_arange), most numpy.random(e.g., np_random_uniform) and some scipy.stats(e.g., scipy_stats_zipf) classes or functions, and others.
+Refer to `sklearn.preprocessing`_, `feature_selection`_, `decomposition`_, `kernel_approximation`_, `cluster.FeatureAgglomeration`_
+and `skrebate`_ for parameter in the pre-processing steps.

-**Examples:**
+**Search parameter list** can be list, numpy array, or distribution. The evaluation of settings supports operations in Math,
+list comprehension, numpy.arange(np_arange), most numpy.random(e.g., np_random_uniform) and some scipy.stats(e.g., scipy_stats_zipf) classes or functions, and others.

-- K: [3, 5, 7, 9]
+Examples:

-- n_estimators: list(range(50, 1001, 50))
+- [3, 5, 7, 9]

-- gamma: np_arange(0.01, 1, 0.1)
+- list(range(50, 1001, 50))

-- alpha: np_random_choice(list(range(1, 51)) + [None], size=20)
+- np_arange(0.01, 1, 0.1)

-- max_depth: scipy_stats_randin(1, 11)
+- np_random_choice(list(range(1, 51)) + [None], size=20)

-**Estimator search/eval (additional '-')**::
+- scipy_stats_randin(1, 11)

-     base_estimator-: [sklearn_tree.DecisionTreeRegressor(), sklearn_tree.ExtraTreeRegressor()]
+**Estimator / Preprocessor search (additional `:` in the front)**::

-**Preprocessors search/swap**::
+     : [sklearn_tree.DecisionTreeRegressor(), sklearn_tree.ExtraTreeRegressor()]

      : [sklearn_feature_selection.SelectKBest(), sklearn_feature_selection.VarianceThreshold(),
         skrebate_ReliefF(), sklearn_preprocessing.RobustScaler()]
@@ -656,6 +723,17 @@
      : [None, 'sk_prep_all', 22, 'k_appr_all', sklearn_feature_selection.SelectKBest(k=50)]


+
+**Whether to do train_test_split?**
+
+Please refer to `https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation`_
+
+
+.. image:: https://scikit-learn.org/stable/_images/grid_search_cross_validation.png
+    :height: 300
+    :width: 400
+
+
 .. _`Scikit-learn model_selection GridSearchCV`: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
 .. _`Scikit-learn model_selection RandomizedSearchCV`: http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html
 .. _`Tuning hyper-parameters`: http://scikit-learn.org/stable/modules/grid_search.html
@@ -674,6 +752,7 @@
 .. _`kernel_approximation`: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.kernel_approximation
 .. _`cluster.FeatureAgglomeration`: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.FeatureAgglomeration.html
 .. _`skrebate`: https://epistasislab.github.io/scikit-rebate/using/
+.. _`https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation`: https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation

         ]]>
     </help>
--- a/sk_whitelist.json	Sun Dec 30 01:51:27 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,761 +0,0 @@
-{ "SK_NAMES": [
-    "sklearn._ASSUME_FINITE", "sklearn._isotonic._inplace_contiguous_isotonic_regression",
-    "sklearn._isotonic._make_unique", "sklearn.base.BaseEstimator",
-    "sklearn.base.BiclusterMixin", "sklearn.base.ClassifierMixin",
-    "sklearn.base.ClusterMixin", "sklearn.base.DensityMixin",
-    "sklearn.base.MetaEstimatorMixin", "sklearn.base.RegressorMixin",
-    "sklearn.base.TransformerMixin", "sklearn.base._first_and_last_element",
-    "sklearn.base._pprint", "sklearn.base.clone",
-    "sklearn.base.is_classifier", "sklearn.base.is_regressor",
-    "sklearn.clone", "sklearn.cluster.AffinityPropagation",
-    "sklearn.cluster.AgglomerativeClustering", "sklearn.cluster.Birch",
-    "sklearn.cluster.DBSCAN", "sklearn.cluster.FeatureAgglomeration",
-    "sklearn.cluster.KMeans", "sklearn.cluster.MeanShift",
-    "sklearn.cluster.MiniBatchKMeans", "sklearn.cluster.SpectralBiclustering",
-    "sklearn.cluster.SpectralClustering", "sklearn.cluster.SpectralCoclustering",
-    "sklearn.cluster._dbscan_inner.dbscan_inner", "sklearn.cluster._feature_agglomeration.AgglomerationTransform",
-    "sklearn.cluster._hierarchical.WeightedEdge", "sklearn.cluster._hierarchical._get_parents",
-    "sklearn.cluster._hierarchical._hc_get_descendent", "sklearn.cluster._hierarchical.average_merge",
-    "sklearn.cluster._hierarchical.compute_ward_dist", "sklearn.cluster._hierarchical.hc_get_heads",
-    "sklearn.cluster._hierarchical.max_merge", "sklearn.cluster._k_means._assign_labels_array",
-    "sklearn.cluster._k_means._assign_labels_csr", "sklearn.cluster._k_means._centers_dense",
-    "sklearn.cluster._k_means._centers_sparse", "sklearn.cluster._k_means._mini_batch_update_csr",
-    "sklearn.cluster._k_means_elkan.k_means_elkan", "sklearn.cluster.affinity_propagation",
-    "sklearn.cluster.affinity_propagation_.AffinityPropagation", "sklearn.cluster.affinity_propagation_.affinity_propagation",
-    "sklearn.cluster.bicluster.BaseSpectral", "sklearn.cluster.bicluster.SpectralBiclustering",
-    "sklearn.cluster.bicluster.SpectralCoclustering", "sklearn.cluster.bicluster._bistochastic_normalize",
-    "sklearn.cluster.bicluster._log_normalize", "sklearn.cluster.bicluster._scale_normalize",
-    "sklearn.cluster.birch.Birch", "sklearn.cluster.birch._CFNode",
-    "sklearn.cluster.birch._CFSubcluster", "sklearn.cluster.birch._iterate_sparse_X",
-    "sklearn.cluster.birch._split_node", "sklearn.cluster.dbscan",
-    "sklearn.cluster.dbscan_.DBSCAN", "sklearn.cluster.dbscan_.dbscan",
-    "sklearn.cluster.estimate_bandwidth", "sklearn.cluster.get_bin_seeds",
-    "sklearn.cluster.hierarchical.AgglomerativeClustering", "sklearn.cluster.hierarchical.FeatureAgglomeration",
-    "sklearn.cluster.hierarchical._TREE_BUILDERS", "sklearn.cluster.hierarchical._average_linkage",
-    "sklearn.cluster.hierarchical._complete_linkage", "sklearn.cluster.hierarchical._fix_connectivity",
-    "sklearn.cluster.hierarchical._hc_cut", "sklearn.cluster.hierarchical.linkage_tree",
-    "sklearn.cluster.hierarchical.ward_tree", "sklearn.cluster.k_means",
-    "sklearn.cluster.k_means_.FLOAT_DTYPES", "sklearn.cluster.k_means_.KMeans",
-    "sklearn.cluster.k_means_.MiniBatchKMeans", "sklearn.cluster.k_means_._init_centroids",
-    "sklearn.cluster.k_means_._k_init", "sklearn.cluster.k_means_._kmeans_single_elkan",
-    "sklearn.cluster.k_means_._kmeans_single_lloyd", "sklearn.cluster.k_means_._labels_inertia",
-    "sklearn.cluster.k_means_._labels_inertia_precompute_dense", "sklearn.cluster.k_means_._mini_batch_convergence",
-    "sklearn.cluster.k_means_._mini_batch_step", "sklearn.cluster.k_means_._tolerance",
-    "sklearn.cluster.k_means_._validate_center_shape", "sklearn.cluster.k_means_.k_means",
-    "sklearn.cluster.k_means_.string_types", "sklearn.cluster.linkage_tree",
-    "sklearn.cluster.mean_shift", "sklearn.cluster.mean_shift_.MeanShift",
-    "sklearn.cluster.mean_shift_._mean_shift_single_seed", "sklearn.cluster.mean_shift_.estimate_bandwidth",
-    "sklearn.cluster.mean_shift_.get_bin_seeds", "sklearn.cluster.mean_shift_.mean_shift",
-    "sklearn.cluster.spectral.SpectralClustering", "sklearn.cluster.spectral.discretize",
-    "sklearn.cluster.spectral.spectral_clustering", "sklearn.cluster.spectral_clustering",
-    "sklearn.cluster.ward_tree", "sklearn.config_context", "sklearn.compose.TransformedTargetRegressor",
-    "sklearn.compose._target.TransformedTargetRegressor", "sklearn.compose.ColumnTransformer",
-    "sklearn.compose._column_transformer.ColumnTransformer", "sklearn.compose.make_column_transformer",
-    "sklearn.compose._column_transformer.make_column_transformer",
-    "sklearn.covariance.EllipticEnvelope", "sklearn.covariance.EmpiricalCovariance",
-    "sklearn.covariance.GraphLasso", "sklearn.covariance.GraphLassoCV",
-    "sklearn.covariance.LedoitWolf", "sklearn.covariance.MinCovDet",
-    "sklearn.covariance.OAS", "sklearn.covariance.ShrunkCovariance",
-    "sklearn.covariance.empirical_covariance", "sklearn.covariance.empirical_covariance_.EmpiricalCovariance",
-    "sklearn.covariance.empirical_covariance_.empirical_covariance", "sklearn.covariance.empirical_covariance_.log_likelihood",
-    "sklearn.covariance.fast_mcd", "sklearn.covariance.graph_lasso",
-    "sklearn.covariance.graph_lasso_.GraphLasso", "sklearn.covariance.graph_lasso_.GraphLassoCV",
-    "sklearn.covariance.graph_lasso_._dual_gap", "sklearn.covariance.graph_lasso_._objective",
-    "sklearn.covariance.graph_lasso_.alpha_max", "sklearn.covariance.graph_lasso_.graph_lasso",
-    "sklearn.covariance.graph_lasso_.graph_lasso_path", "sklearn.covariance.ledoit_wolf",
-    "sklearn.covariance.ledoit_wolf_shrinkage", "sklearn.covariance.log_likelihood",
-    "sklearn.covariance.oas", "sklearn.covariance.outlier_detection.EllipticEnvelope",
-    "sklearn.covariance.robust_covariance.MinCovDet", "sklearn.covariance.robust_covariance._c_step",
-    "sklearn.covariance.robust_covariance.c_step", "sklearn.covariance.robust_covariance.fast_mcd",
-    "sklearn.covariance.robust_covariance.select_candidates", "sklearn.covariance.shrunk_covariance",
-    "sklearn.covariance.shrunk_covariance_.LedoitWolf", "sklearn.covariance.shrunk_covariance_.OAS",
-    "sklearn.covariance.shrunk_covariance_.ShrunkCovariance", "sklearn.covariance.shrunk_covariance_.ledoit_wolf",
-    "sklearn.covariance.shrunk_covariance_.ledoit_wolf_shrinkage", "sklearn.covariance.shrunk_covariance_.oas",
-    "sklearn.covariance.shrunk_covariance_.shrunk_covariance", "sklearn.decomposition.DictionaryLearning",
-    "sklearn.decomposition.FactorAnalysis", "sklearn.decomposition.FastICA",
-    "sklearn.decomposition.IncrementalPCA", "sklearn.decomposition.KernelPCA",
-    "sklearn.decomposition.LatentDirichletAllocation", "sklearn.decomposition.MiniBatchDictionaryLearning",
-    "sklearn.decomposition.MiniBatchSparsePCA", "sklearn.decomposition.NMF",
-    "sklearn.decomposition.PCA", "sklearn.decomposition.RandomizedPCA",
-    "sklearn.decomposition.SparseCoder", "sklearn.decomposition.SparsePCA",
-    "sklearn.decomposition.TruncatedSVD", "sklearn.decomposition._online_lda._dirichlet_expectation_1d",
-    "sklearn.decomposition._online_lda._dirichlet_expectation_2d", "sklearn.decomposition._online_lda.mean_change",
-    "sklearn.decomposition.base._BasePCA", "sklearn.decomposition.cdnmf_fast._update_cdnmf_fast",
-    "sklearn.decomposition.dict_learning", "sklearn.decomposition.dict_learning_online",
-    "sklearn.decomposition.factor_analysis.FactorAnalysis", "sklearn.decomposition.fastica",
-    "sklearn.decomposition.fastica_.FLOAT_DTYPES", "sklearn.decomposition.fastica_.FastICA",
-    "sklearn.decomposition.fastica_._cube", "sklearn.decomposition.fastica_._exp",
-    "sklearn.decomposition.fastica_._gs_decorrelation", "sklearn.decomposition.fastica_._ica_def",
-    "sklearn.decomposition.fastica_._ica_par", "sklearn.decomposition.fastica_._logcosh",
-    "sklearn.decomposition.fastica_._sym_decorrelation", "sklearn.decomposition.fastica_.fastica",
-    "sklearn.decomposition.fastica_.string_types", "sklearn.decomposition.incremental_pca.IncrementalPCA",
-    "sklearn.decomposition.kernel_pca.KernelPCA", "sklearn.decomposition.nmf.EPSILON",
-    "sklearn.decomposition.nmf.INTEGER_TYPES", "sklearn.decomposition.nmf.NMF",
-    "sklearn.decomposition.nmf._beta_divergence", "sklearn.decomposition.nmf._beta_loss_to_float",
-    "sklearn.decomposition.nmf._check_init", "sklearn.decomposition.nmf._check_string_param",
-    "sklearn.decomposition.nmf._compute_regularization", "sklearn.decomposition.nmf._fit_coordinate_descent",
-    "sklearn.decomposition.nmf._fit_multiplicative_update", "sklearn.decomposition.nmf._initialize_nmf",
-    "sklearn.decomposition.nmf._multiplicative_update_h", "sklearn.decomposition.nmf._multiplicative_update_w",
-    "sklearn.decomposition.nmf._special_sparse_dot", "sklearn.decomposition.nmf._update_coordinate_descent",
-    "sklearn.decomposition.nmf.non_negative_factorization", "sklearn.decomposition.nmf.norm",
-    "sklearn.decomposition.nmf.trace_dot", "sklearn.decomposition.non_negative_factorization",
-    "sklearn.decomposition.online_lda.EPS", "sklearn.decomposition.online_lda.LatentDirichletAllocation",
-    "sklearn.decomposition.online_lda._update_doc_distribution", "sklearn.decomposition.online_lda.gammaln",
-    "sklearn.decomposition.pca.PCA", "sklearn.decomposition.pca.RandomizedPCA",
-    "sklearn.decomposition.pca._assess_dimension_", "sklearn.decomposition.pca._infer_dimension_",
-    "sklearn.decomposition.pca.gammaln", "sklearn.decomposition.sparse_encode",
-    "sklearn.decomposition.sparse_pca.MiniBatchSparsePCA", "sklearn.decomposition.sparse_pca.SparsePCA",
-    "sklearn.decomposition.truncated_svd.TruncatedSVD", "sklearn.discriminant_analysis.LinearDiscriminantAnalysis",
-    "sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis", "sklearn.discriminant_analysis._class_cov",
-    "sklearn.discriminant_analysis._class_means", "sklearn.discriminant_analysis._cov",
-    "sklearn.discriminant_analysis.string_types", "sklearn.ensemble.AdaBoostClassifier",
-    "sklearn.ensemble.AdaBoostRegressor", "sklearn.ensemble.BaggingClassifier",
-    "sklearn.ensemble.BaggingRegressor", "sklearn.ensemble.BaseEnsemble",
-    "sklearn.ensemble.ExtraTreesClassifier", "sklearn.ensemble.ExtraTreesRegressor",
-    "sklearn.ensemble.GradientBoostingClassifier", "sklearn.ensemble.GradientBoostingRegressor",
-    "sklearn.ensemble.IsolationForest", "sklearn.ensemble.RandomForestClassifier",
-    "sklearn.ensemble.RandomForestRegressor", "sklearn.ensemble.RandomTreesEmbedding",
-    "sklearn.ensemble.VotingClassifier", "sklearn.ensemble._gradient_boosting._partial_dependence_tree",
-    "sklearn.ensemble._gradient_boosting._predict_regression_tree_stages_sparse", "sklearn.ensemble._gradient_boosting._random_sample_mask",
-    "sklearn.ensemble._gradient_boosting.predict_stage", "sklearn.ensemble._gradient_boosting.predict_stages",
-    "sklearn.ensemble.bagging.BaggingClassifier", "sklearn.ensemble.bagging.BaggingRegressor",
-    "sklearn.ensemble.bagging.BaseBagging", "sklearn.ensemble.bagging.MAX_INT",
-    "sklearn.ensemble.bagging._generate_bagging_indices", "sklearn.ensemble.bagging._generate_indices",
-    "sklearn.ensemble.bagging._parallel_build_estimators", "sklearn.ensemble.bagging._parallel_decision_function",
-    "sklearn.ensemble.bagging._parallel_predict_log_proba", "sklearn.ensemble.bagging._parallel_predict_proba",
-    "sklearn.ensemble.bagging._parallel_predict_regression", "sklearn.ensemble.base.BaseEnsemble",
-    "sklearn.ensemble.base.MAX_RAND_SEED", "sklearn.ensemble.base._partition_estimators",
-    "sklearn.ensemble.base._set_random_states", "sklearn.ensemble.forest.BaseForest",
-    "sklearn.ensemble.forest.ExtraTreesClassifier", "sklearn.ensemble.forest.ExtraTreesRegressor",
-    "sklearn.ensemble.forest.ForestClassifier", "sklearn.ensemble.forest.ForestRegressor",
-    "sklearn.ensemble.forest.MAX_INT", "sklearn.ensemble.forest.RandomForestClassifier",
-    "sklearn.ensemble.forest.RandomForestRegressor", "sklearn.ensemble.forest.RandomTreesEmbedding",
-    "sklearn.ensemble.forest._generate_sample_indices", "sklearn.ensemble.forest._generate_unsampled_indices",
-    "sklearn.ensemble.forest._parallel_build_trees", "sklearn.ensemble.forest.accumulate_prediction",
-    "sklearn.ensemble.gradient_boosting.BaseGradientBoosting", "sklearn.ensemble.gradient_boosting.BinomialDeviance",
-    "sklearn.ensemble.gradient_boosting.ClassificationLossFunction", "sklearn.ensemble.gradient_boosting.ExponentialLoss",
-    "sklearn.ensemble.gradient_boosting.GradientBoostingClassifier", "sklearn.ensemble.gradient_boosting.GradientBoostingRegressor",
-    "sklearn.ensemble.gradient_boosting.HuberLossFunction", "sklearn.ensemble.gradient_boosting.INIT_ESTIMATORS",
-    "sklearn.ensemble.gradient_boosting.LOSS_FUNCTIONS", "sklearn.ensemble.gradient_boosting.LeastAbsoluteError",
-    "sklearn.ensemble.gradient_boosting.LeastSquaresError", "sklearn.ensemble.gradient_boosting.LogOddsEstimator",
-    "sklearn.ensemble.gradient_boosting.LossFunction", "sklearn.ensemble.gradient_boosting.MeanEstimator",
-    "sklearn.ensemble.gradient_boosting.MultinomialDeviance", "sklearn.ensemble.gradient_boosting.PriorProbabilityEstimator",
-    "sklearn.ensemble.gradient_boosting.QuantileEstimator", "sklearn.ensemble.gradient_boosting.QuantileLossFunction",
-    "sklearn.ensemble.gradient_boosting.RegressionLossFunction", "sklearn.ensemble.gradient_boosting.ScaledLogOddsEstimator",
-    "sklearn.ensemble.gradient_boosting.TREE_LEAF", "sklearn.ensemble.gradient_boosting.VerboseReporter",
-    "sklearn.ensemble.gradient_boosting.ZeroEstimator", "sklearn.ensemble.gradient_boosting.expit",
-    "sklearn.ensemble.iforest.INTEGER_TYPES", "sklearn.ensemble.iforest.IsolationForest",
-    "sklearn.ensemble.iforest._average_path_length", "sklearn.ensemble.iforest.euler_gamma",
-    "sklearn.ensemble.partial_dependence._grid_from_X", "sklearn.ensemble.partial_dependence.partial_dependence",
-    "sklearn.ensemble.partial_dependence.plot_partial_dependence", "sklearn.ensemble.voting_classifier.VotingClassifier",
-    "sklearn.ensemble.voting_classifier._parallel_fit_estimator", "sklearn.ensemble.weight_boosting.AdaBoostClassifier",
-    "sklearn.ensemble.weight_boosting.AdaBoostRegressor", "sklearn.ensemble.weight_boosting.BaseWeightBoosting",
-    "sklearn.ensemble.weight_boosting._samme_proba", "sklearn.ensemble.weight_boosting.inner1d",
-    "sklearn.feature_extraction.DictVectorizer", "sklearn.feature_extraction.FeatureHasher",
-    "sklearn.feature_extraction._hashing.transform", "sklearn.feature_extraction.dict_vectorizer.DictVectorizer",
-    "sklearn.feature_extraction.dict_vectorizer._tosequence", "sklearn.feature_extraction.grid_to_graph",
-    "sklearn.feature_extraction.hashing.FeatureHasher", "sklearn.feature_extraction.hashing._iteritems",
-    "sklearn.feature_extraction.image.PatchExtractor", "sklearn.feature_extraction.image._compute_gradient_3d",
-    "sklearn.feature_extraction.image._compute_n_patches", "sklearn.feature_extraction.image._make_edges_3d",
-    "sklearn.feature_extraction.image._mask_edges_weights", "sklearn.feature_extraction.image._to_graph",
-    "sklearn.feature_extraction.image.extract_patches", "sklearn.feature_extraction.image.extract_patches_2d",
-    "sklearn.feature_extraction.image.grid_to_graph", "sklearn.feature_extraction.image.img_to_graph",
-    "sklearn.feature_extraction.image.reconstruct_from_patches_2d", "sklearn.feature_extraction.img_to_graph",
-    "sklearn.feature_extraction.stop_words.ENGLISH_STOP_WORDS", "sklearn.feature_extraction.text.CountVectorizer",
-    "sklearn.feature_extraction.text.ENGLISH_STOP_WORDS", "sklearn.feature_extraction.text.HashingVectorizer",
-    "sklearn.feature_extraction.text.TfidfTransformer", "sklearn.feature_extraction.text.TfidfVectorizer",
-    "sklearn.feature_extraction.text.VectorizerMixin", "sklearn.feature_extraction.text._check_stop_list",
-    "sklearn.feature_extraction.text._document_frequency", "sklearn.feature_extraction.text._make_int_array",
-    "sklearn.feature_extraction.text.strip_accents_ascii", "sklearn.feature_extraction.text.strip_accents_unicode",
-    "sklearn.feature_extraction.text.strip_tags", "sklearn.feature_selection.GenericUnivariateSelect",
-    "sklearn.feature_selection.RFE", "sklearn.feature_selection.RFECV",
-    "sklearn.feature_selection.SelectFdr", "sklearn.feature_selection.SelectFpr",
-    "sklearn.feature_selection.SelectFromModel", "sklearn.feature_selection.SelectFwe",
-    "sklearn.feature_selection.SelectKBest", "sklearn.feature_selection.SelectPercentile",
-    "sklearn.feature_selection.VarianceThreshold", "sklearn.feature_selection.base.SelectorMixin",
-    "sklearn.feature_selection.chi2", "sklearn.feature_selection.f_classif",
-    "sklearn.feature_selection.f_oneway", "sklearn.feature_selection.f_regression",
-    "sklearn.feature_selection.from_model.SelectFromModel", "sklearn.feature_selection.from_model._calculate_threshold",
-    "sklearn.feature_selection.from_model._get_feature_importances", "sklearn.feature_selection.mutual_info_._compute_mi",
-    "sklearn.feature_selection.mutual_info_._compute_mi_cc", "sklearn.feature_selection.mutual_info_._compute_mi_cd",
-    "sklearn.feature_selection.mutual_info_._estimate_mi", "sklearn.feature_selection.mutual_info_._iterate_columns",
-    "sklearn.feature_selection.mutual_info_.digamma", "sklearn.feature_selection.mutual_info_.mutual_info_classif",
-    "sklearn.feature_selection.mutual_info_.mutual_info_regression", "sklearn.feature_selection.mutual_info_classif",
-    "sklearn.feature_selection.mutual_info_regression", "sklearn.feature_selection.rfe.RFE",
-    "sklearn.feature_selection.rfe.RFECV", "sklearn.feature_selection.rfe._rfe_single_fit",
-    "sklearn.feature_selection.univariate_selection.GenericUnivariateSelect", "sklearn.feature_selection.univariate_selection.SelectFdr",
-    "sklearn.feature_selection.univariate_selection.SelectFpr", "sklearn.feature_selection.univariate_selection.SelectFwe",
-    "sklearn.feature_selection.univariate_selection.SelectKBest", "sklearn.feature_selection.univariate_selection.SelectPercentile",
-    "sklearn.feature_selection.univariate_selection._BaseFilter", "sklearn.feature_selection.univariate_selection._chisquare",
-    "sklearn.feature_selection.univariate_selection._clean_nans", "sklearn.feature_selection.univariate_selection.chi2",
-    "sklearn.feature_selection.univariate_selection.f_classif", "sklearn.feature_selection.univariate_selection.f_oneway",
-    "sklearn.feature_selection.univariate_selection.f_regression", "sklearn.feature_selection.variance_threshold.VarianceThreshold",
-    "sklearn.gaussian_process.GaussianProcess", "sklearn.gaussian_process.GaussianProcessClassifier",
-    "sklearn.gaussian_process.GaussianProcessRegressor", "sklearn.gaussian_process.correlation_models.absolute_exponential",
-    "sklearn.gaussian_process.correlation_models.cubic", "sklearn.gaussian_process.correlation_models.generalized_exponential",
-    "sklearn.gaussian_process.correlation_models.linear", "sklearn.gaussian_process.correlation_models.pure_nugget",
-    "sklearn.gaussian_process.correlation_models.squared_exponential", "sklearn.gaussian_process.gaussian_process.GaussianProcess",
-    "sklearn.gaussian_process.gaussian_process.MACHINE_EPSILON", "sklearn.gaussian_process.gaussian_process.l1_cross_distances",
-    "sklearn.gaussian_process.gpc.COEFS", "sklearn.gaussian_process.gpc.GaussianProcessClassifier",
-    "sklearn.gaussian_process.gpc.LAMBDAS", "sklearn.gaussian_process.gpc._BinaryGaussianProcessClassifierLaplace",
-    "sklearn.gaussian_process.gpc.erf", "sklearn.gaussian_process.gpc.expit",
-    "sklearn.gaussian_process.gpr.GaussianProcessRegressor", "sklearn.gaussian_process.kernels.CompoundKernel",
-    "sklearn.gaussian_process.kernels.ConstantKernel", "sklearn.gaussian_process.kernels.DotProduct",
-    "sklearn.gaussian_process.kernels.ExpSineSquared", "sklearn.gaussian_process.kernels.Exponentiation",
-    "sklearn.gaussian_process.kernels.Hyperparameter", "sklearn.gaussian_process.kernels.Kernel",
-    "sklearn.gaussian_process.kernels.KernelOperator", "sklearn.gaussian_process.kernels.Matern",
-    "sklearn.gaussian_process.kernels.NormalizedKernelMixin", "sklearn.gaussian_process.kernels.PairwiseKernel",
-    "sklearn.gaussian_process.kernels.Product", "sklearn.gaussian_process.kernels.RBF",
-    "sklearn.gaussian_process.kernels.RationalQuadratic", "sklearn.gaussian_process.kernels.StationaryKernelMixin",
-    "sklearn.gaussian_process.kernels.Sum", "sklearn.gaussian_process.kernels.WhiteKernel",
-    "sklearn.gaussian_process.kernels._approx_fprime", "sklearn.gaussian_process.kernels._check_length_scale",
-    "sklearn.gaussian_process.kernels.gamma", "sklearn.gaussian_process.kernels.kv",
-    "sklearn.gaussian_process.regression_models.constant", "sklearn.gaussian_process.regression_models.linear",
-    "sklearn.gaussian_process.regression_models.quadratic", "sklearn.get_config",
-    "sklearn.isotonic.IsotonicRegression", "sklearn.isotonic.check_increasing",
-    "sklearn.isotonic.isotonic_regression", "sklearn.kernel_approximation.AdditiveChi2Sampler",
-    "sklearn.kernel_approximation.KERNEL_PARAMS", "sklearn.kernel_approximation.Nystroem",
-    "sklearn.kernel_approximation.RBFSampler", "sklearn.kernel_approximation.SkewedChi2Sampler",
-    "sklearn.kernel_ridge.KernelRidge", "sklearn.linear_model.ARDRegression",
-    "sklearn.linear_model.BayesianRidge", "sklearn.linear_model.ElasticNet",
-    "sklearn.linear_model.ElasticNetCV", "sklearn.linear_model.Hinge",
-    "sklearn.linear_model.Huber", "sklearn.linear_model.HuberRegressor",
-    "sklearn.linear_model.Lars", "sklearn.linear_model.LarsCV",
-    "sklearn.linear_model.Lasso", "sklearn.linear_model.LassoCV",
-    "sklearn.linear_model.LassoLars", "sklearn.linear_model.LassoLarsCV",
-    "sklearn.linear_model.LassoLarsIC", "sklearn.linear_model.LinearRegression",
-    "sklearn.linear_model.Log", "sklearn.linear_model.LogisticRegression",
-    "sklearn.linear_model.LogisticRegressionCV", "sklearn.linear_model.ModifiedHuber",
-    "sklearn.linear_model.MultiTaskElasticNet", "sklearn.linear_model.MultiTaskElasticNetCV",
-    "sklearn.linear_model.MultiTaskLasso", "sklearn.linear_model.MultiTaskLassoCV",
-    "sklearn.linear_model.OrthogonalMatchingPursuit", "sklearn.linear_model.OrthogonalMatchingPursuitCV",
-    "sklearn.linear_model.PassiveAggressiveClassifier", "sklearn.linear_model.PassiveAggressiveRegressor",
-    "sklearn.linear_model.Perceptron", "sklearn.linear_model.RANSACRegressor",
-    "sklearn.linear_model.RandomizedLasso", "sklearn.linear_model.RandomizedLogisticRegression",
-    "sklearn.linear_model.Ridge", "sklearn.linear_model.RidgeCV",
-    "sklearn.linear_model.RidgeClassifier", "sklearn.linear_model.RidgeClassifierCV",
-    "sklearn.linear_model.SGDClassifier", "sklearn.linear_model.SGDRegressor",
-    "sklearn.linear_model.SquaredLoss", "sklearn.linear_model.TheilSenRegressor",
-    "sklearn.linear_model.base.FLOAT_DTYPES", "sklearn.linear_model.base.LinearClassifierMixin",
-    "sklearn.linear_model.base.LinearModel", "sklearn.linear_model.base.LinearRegression",
-    "sklearn.linear_model.base.SPARSE_INTERCEPT_DECAY", "sklearn.linear_model.base.SparseCoefMixin",
-    "sklearn.linear_model.base._pre_fit", "sklearn.linear_model.base._preprocess_data",
-    "sklearn.linear_model.base._rescale_data", "sklearn.linear_model.base.center_data",
-    "sklearn.linear_model.base.make_dataset", "sklearn.linear_model.base.sparse_center_data",
-    "sklearn.linear_model.bayes.ARDRegression", "sklearn.linear_model.bayes.BayesianRidge",
-    "sklearn.linear_model.cd_fast.enet_coordinate_descent", "sklearn.linear_model.cd_fast.enet_coordinate_descent_gram",
-    "sklearn.linear_model.cd_fast.enet_coordinate_descent_multi_task", "sklearn.linear_model.cd_fast.sparse_enet_coordinate_descent",
-    "sklearn.linear_model.coordinate_descent.ElasticNet", "sklearn.linear_model.coordinate_descent.ElasticNetCV",
-    "sklearn.linear_model.coordinate_descent.Lasso", "sklearn.linear_model.coordinate_descent.LassoCV",
-    "sklearn.linear_model.coordinate_descent.LinearModelCV", "sklearn.linear_model.coordinate_descent.MultiTaskElasticNet",
-    "sklearn.linear_model.coordinate_descent.MultiTaskElasticNetCV", "sklearn.linear_model.coordinate_descent.MultiTaskLasso",
-    "sklearn.linear_model.coordinate_descent.MultiTaskLassoCV", "sklearn.linear_model.coordinate_descent._alpha_grid",
-    "sklearn.linear_model.coordinate_descent._path_residuals", "sklearn.linear_model.coordinate_descent.enet_path",
-    "sklearn.linear_model.coordinate_descent.lasso_path", "sklearn.linear_model.enet_path",
-    "sklearn.linear_model.huber.HuberRegressor", "sklearn.linear_model.huber._huber_loss_and_gradient",
-    "sklearn.linear_model.lars_path", "sklearn.linear_model.lasso_path",
-    "sklearn.linear_model.lasso_stability_path", "sklearn.linear_model.least_angle.Lars",
-    "sklearn.linear_model.least_angle.LarsCV", "sklearn.linear_model.least_angle.LassoLars",
-    "sklearn.linear_model.least_angle.LassoLarsCV", "sklearn.linear_model.least_angle.LassoLarsIC",
-    "sklearn.linear_model.least_angle._check_copy_and_writeable", "sklearn.linear_model.least_angle._lars_path_residues",
-    "sklearn.linear_model.least_angle.lars_path", "sklearn.linear_model.least_angle.solve_triangular_args",
-    "sklearn.linear_model.least_angle.string_types", "sklearn.linear_model.logistic.LogisticRegression",
-    "sklearn.linear_model.logistic.LogisticRegressionCV", "sklearn.linear_model.logistic.SCORERS",
-    "sklearn.linear_model.logistic._check_solver_option", "sklearn.linear_model.logistic._intercept_dot",
-    "sklearn.linear_model.logistic._log_reg_scoring_path", "sklearn.linear_model.logistic._logistic_grad_hess",
-    "sklearn.linear_model.logistic._logistic_loss", "sklearn.linear_model.logistic._logistic_loss_and_grad",
-    "sklearn.linear_model.logistic._multinomial_grad_hess", "sklearn.linear_model.logistic._multinomial_loss",
-    "sklearn.linear_model.logistic._multinomial_loss_grad", "sklearn.linear_model.logistic.expit",
-    "sklearn.linear_model.logistic.logistic_regression_path", "sklearn.linear_model.logistic_regression_path",
-    "sklearn.linear_model.omp.OrthogonalMatchingPursuit", "sklearn.linear_model.omp.OrthogonalMatchingPursuitCV",
-    "sklearn.linear_model.omp._cholesky_omp", "sklearn.linear_model.omp._gram_omp",
-    "sklearn.linear_model.omp._omp_path_residues", "sklearn.linear_model.omp.orthogonal_mp",
-    "sklearn.linear_model.omp.orthogonal_mp_gram", "sklearn.linear_model.omp.premature",
-    "sklearn.linear_model.omp.solve_triangular_args", "sklearn.linear_model.orthogonal_mp",
-    "sklearn.linear_model.orthogonal_mp_gram", "sklearn.linear_model.passive_aggressive.DEFAULT_EPSILON",
-    "sklearn.linear_model.passive_aggressive.PassiveAggressiveClassifier", "sklearn.linear_model.passive_aggressive.PassiveAggressiveRegressor",
-    "sklearn.linear_model.perceptron.Perceptron", "sklearn.linear_model.randomized_l1.BaseRandomizedLinearModel",
-    "sklearn.linear_model.randomized_l1.RandomizedLasso", "sklearn.linear_model.randomized_l1.RandomizedLogisticRegression",
-    "sklearn.linear_model.randomized_l1._lasso_stability_path", "sklearn.linear_model.randomized_l1._randomized_lasso",
-    "sklearn.linear_model.randomized_l1._randomized_logistic", "sklearn.linear_model.randomized_l1._resample_model",
-    "sklearn.linear_model.randomized_l1.lasso_stability_path", "sklearn.linear_model.ransac.RANSACRegressor",
-    "sklearn.linear_model.ransac._EPSILON", "sklearn.linear_model.ransac._dynamic_max_trials",
-    "sklearn.linear_model.ridge.Ridge", "sklearn.linear_model.ridge.RidgeCV",
-    "sklearn.linear_model.ridge.RidgeClassifier", "sklearn.linear_model.ridge.RidgeClassifierCV",
-    "sklearn.linear_model.ridge._BaseRidge", "sklearn.linear_model.ridge._BaseRidgeCV",
-    "sklearn.linear_model.ridge._RidgeGCV", "sklearn.linear_model.ridge._solve_cholesky",
-    "sklearn.linear_model.ridge._solve_cholesky_kernel", "sklearn.linear_model.ridge._solve_lsqr",
-    "sklearn.linear_model.ridge._solve_sparse_cg", "sklearn.linear_model.ridge._solve_svd",
-    "sklearn.linear_model.ridge.ridge_regression", "sklearn.linear_model.ridge_regression",
-    "sklearn.linear_model.sag.get_auto_step_size", "sklearn.linear_model.sag.sag",
-    "sklearn.linear_model.sag.sag_solver", "sklearn.linear_model.sag_fast.MultinomialLogLoss",
-    "sklearn.linear_model.sag_fast._multinomial_grad_loss_all_samples", "sklearn.linear_model.sag_fast.sag",
-    "sklearn.linear_model.sgd_fast.Classification", "sklearn.linear_model.sgd_fast.EpsilonInsensitive",
-    "sklearn.linear_model.sgd_fast.Hinge", "sklearn.linear_model.sgd_fast.Huber",
-    "sklearn.linear_model.sgd_fast.Log", "sklearn.linear_model.sgd_fast.LossFunction",
-    "sklearn.linear_model.sgd_fast.ModifiedHuber", "sklearn.linear_model.sgd_fast.Regression",
-    "sklearn.linear_model.sgd_fast.SquaredEpsilonInsensitive", "sklearn.linear_model.sgd_fast.SquaredHinge",
-    "sklearn.linear_model.sgd_fast.SquaredLoss", "sklearn.linear_model.sgd_fast._plain_sgd",
-    "sklearn.linear_model.sgd_fast.average_sgd", "sklearn.linear_model.sgd_fast.plain_sgd",
-    "sklearn.linear_model.stochastic_gradient.BaseSGD", "sklearn.linear_model.stochastic_gradient.BaseSGDClassifier",
-    "sklearn.linear_model.stochastic_gradient.BaseSGDRegressor", "sklearn.linear_model.stochastic_gradient.DEFAULT_EPSILON",
-    "sklearn.linear_model.stochastic_gradient.LEARNING_RATE_TYPES", "sklearn.linear_model.stochastic_gradient.PENALTY_TYPES",
-    "sklearn.linear_model.stochastic_gradient.SGDClassifier", "sklearn.linear_model.stochastic_gradient.SGDRegressor",
-    "sklearn.linear_model.stochastic_gradient._prepare_fit_binary", "sklearn.linear_model.stochastic_gradient.fit_binary",
-    "sklearn.linear_model.theil_sen.TheilSenRegressor", "sklearn.linear_model.theil_sen._EPSILON",
-    "sklearn.linear_model.theil_sen._breakdown_point", "sklearn.linear_model.theil_sen._lstsq",
-    "sklearn.linear_model.theil_sen._modified_weiszfeld_step", "sklearn.linear_model.theil_sen._spatial_median",
-    "sklearn.linear_model.theil_sen.binom", "sklearn.manifold.Isomap",
-    "sklearn.manifold.LocallyLinearEmbedding", "sklearn.manifold.MDS",
-    "sklearn.manifold.SpectralEmbedding", "sklearn.manifold.TSNE",
-    "sklearn.manifold._barnes_hut_tsne.gradient", "sklearn.manifold._utils._binary_search_perplexity",
-    "sklearn.manifold.isomap.Isomap", "sklearn.manifold.locally_linear.FLOAT_DTYPES",
-    "sklearn.manifold.locally_linear.LocallyLinearEmbedding", "sklearn.manifold.locally_linear.barycenter_kneighbors_graph",
-    "sklearn.manifold.locally_linear.barycenter_weights", "sklearn.manifold.locally_linear.locally_linear_embedding",
-    "sklearn.manifold.locally_linear.null_space", "sklearn.manifold.locally_linear_embedding",
-    "sklearn.manifold.mds.MDS", "sklearn.manifold.mds._smacof_single",
-    "sklearn.manifold.mds.smacof", "sklearn.manifold.smacof",
-    "sklearn.manifold.spectral_embedding", "sklearn.manifold.spectral_embedding_.SpectralEmbedding",
-    "sklearn.manifold.spectral_embedding_._graph_connected_component", "sklearn.manifold.spectral_embedding_._graph_is_connected",
-    "sklearn.manifold.spectral_embedding_._set_diag", "sklearn.manifold.spectral_embedding_.spectral_embedding",
-    "sklearn.manifold.t_sne.MACHINE_EPSILON", "sklearn.manifold.t_sne.TSNE",
-    "sklearn.manifold.t_sne._gradient_descent", "sklearn.manifold.t_sne._joint_probabilities",
-    "sklearn.manifold.t_sne._joint_probabilities_nn", "sklearn.manifold.t_sne._kl_divergence",
-    "sklearn.manifold.t_sne._kl_divergence_bh", "sklearn.manifold.t_sne.string_types",
-    "sklearn.manifold.t_sne.trustworthiness", "sklearn.metrics.SCORERS",
-    "sklearn.metrics.accuracy_score", "sklearn.metrics.adjusted_mutual_info_score",
-    "sklearn.metrics.adjusted_rand_score", "sklearn.metrics.auc",
-    "sklearn.metrics.average_precision_score", "sklearn.metrics.base._average_binary_score",
-    "sklearn.metrics.brier_score_loss", "sklearn.metrics.calinski_harabaz_score",
-    "sklearn.metrics.classification._check_binary_probabilistic_predictions", "sklearn.metrics.classification._check_targets",
-    "sklearn.metrics.classification._prf_divide", "sklearn.metrics.classification._weighted_sum",
-    "sklearn.metrics.classification.accuracy_score", "sklearn.metrics.classification.brier_score_loss",
-    "sklearn.metrics.classification.classification_report", "sklearn.metrics.classification.cohen_kappa_score",
-    "sklearn.metrics.classification.confusion_matrix", "sklearn.metrics.classification.f1_score",
-    "sklearn.metrics.classification.fbeta_score", "sklearn.metrics.classification.hamming_loss",
-    "sklearn.metrics.classification.hinge_loss", "sklearn.metrics.classification.jaccard_similarity_score",
-    "sklearn.metrics.classification.log_loss", "sklearn.metrics.classification.matthews_corrcoef",
-    "sklearn.metrics.classification.precision_recall_fscore_support", "sklearn.metrics.classification.precision_score",
-    "sklearn.metrics.classification.recall_score", "sklearn.metrics.classification.zero_one_loss",
-    "sklearn.metrics.classification_report", "sklearn.metrics.cluster.adjusted_mutual_info_score",
-    "sklearn.metrics.cluster.adjusted_rand_score", "sklearn.metrics.cluster.bicluster._check_rows_and_columns",
-    "sklearn.metrics.cluster.bicluster._jaccard", "sklearn.metrics.cluster.bicluster._pairwise_similarity",
-    "sklearn.metrics.cluster.bicluster.consensus_score", "sklearn.metrics.cluster.calinski_harabaz_score",
-    "sklearn.metrics.cluster.completeness_score", "sklearn.metrics.cluster.consensus_score",
-    "sklearn.metrics.cluster.contingency_matrix", "sklearn.metrics.cluster.entropy",
-    "sklearn.metrics.cluster.expected_mutual_info_fast.expected_mutual_information", "sklearn.metrics.cluster.expected_mutual_info_fast.gammaln",
-    "sklearn.metrics.cluster.expected_mutual_information", "sklearn.metrics.cluster.fowlkes_mallows_score",
-    "sklearn.metrics.cluster.homogeneity_completeness_v_measure", "sklearn.metrics.cluster.homogeneity_score",
-    "sklearn.metrics.cluster.mutual_info_score", "sklearn.metrics.cluster.normalized_mutual_info_score",
-    "sklearn.metrics.cluster.silhouette_samples", "sklearn.metrics.cluster.silhouette_score",
-    "sklearn.metrics.cluster.supervised.adjusted_mutual_info_score", "sklearn.metrics.cluster.supervised.adjusted_rand_score",
-    "sklearn.metrics.cluster.supervised.check_clusterings", "sklearn.metrics.cluster.supervised.comb2",
-    "sklearn.metrics.cluster.supervised.completeness_score", "sklearn.metrics.cluster.supervised.contingency_matrix",
-    "sklearn.metrics.cluster.supervised.entropy", "sklearn.metrics.cluster.supervised.fowlkes_mallows_score",
-    "sklearn.metrics.cluster.supervised.homogeneity_completeness_v_measure", "sklearn.metrics.cluster.supervised.homogeneity_score",
-    "sklearn.metrics.cluster.supervised.mutual_info_score", "sklearn.metrics.cluster.supervised.normalized_mutual_info_score",
-    "sklearn.metrics.cluster.supervised.v_measure_score", "sklearn.metrics.cluster.unsupervised.calinski_harabaz_score",
-    "sklearn.metrics.cluster.unsupervised.check_number_of_labels", "sklearn.metrics.cluster.unsupervised.silhouette_samples",
-    "sklearn.metrics.cluster.unsupervised.silhouette_score", "sklearn.metrics.cluster.v_measure_score",
-    "sklearn.metrics.cohen_kappa_score", "sklearn.metrics.completeness_score",
-    "sklearn.metrics.confusion_matrix", "sklearn.metrics.consensus_score",
-    "sklearn.metrics.coverage_error", "sklearn.metrics.euclidean_distances",
-    "sklearn.metrics.explained_variance_score", "sklearn.metrics.f1_score",
-    "sklearn.metrics.fbeta_score", "sklearn.metrics.fowlkes_mallows_score",
-    "sklearn.metrics.get_scorer", "sklearn.metrics.hamming_loss",
-    "sklearn.metrics.hinge_loss", "sklearn.metrics.homogeneity_completeness_v_measure",
-    "sklearn.metrics.homogeneity_score", "sklearn.metrics.jaccard_similarity_score",
-    "sklearn.metrics.label_ranking_average_precision_score", "sklearn.metrics.label_ranking_loss",
-    "sklearn.metrics.log_loss", "sklearn.metrics.make_scorer",
-    "sklearn.metrics.matthews_corrcoef", "sklearn.metrics.mean_absolute_error",
-    "sklearn.metrics.mean_squared_error", "sklearn.metrics.mean_squared_log_error",
-    "sklearn.metrics.median_absolute_error", "sklearn.metrics.mutual_info_score",
-    "sklearn.metrics.normalized_mutual_info_score", "sklearn.metrics.pairwise.KERNEL_PARAMS",
-    "sklearn.metrics.pairwise.PAIRED_DISTANCES", "sklearn.metrics.pairwise.PAIRWISE_BOOLEAN_FUNCTIONS",
-    "sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS", "sklearn.metrics.pairwise.PAIRWISE_KERNEL_FUNCTIONS",
-    "sklearn.metrics.pairwise._VALID_METRICS", "sklearn.metrics.pairwise._chi2_kernel_fast",
-    "sklearn.metrics.pairwise._pairwise_callable", "sklearn.metrics.pairwise._parallel_pairwise",
-    "sklearn.metrics.pairwise._return_float_dtype", "sklearn.metrics.pairwise._sparse_manhattan",
-    "sklearn.metrics.pairwise.additive_chi2_kernel", "sklearn.metrics.pairwise.check_paired_arrays",
-    "sklearn.metrics.pairwise.check_pairwise_arrays", "sklearn.metrics.pairwise.chi2_kernel",
-    "sklearn.metrics.pairwise.cosine_distances", "sklearn.metrics.pairwise.cosine_similarity",
-    "sklearn.metrics.pairwise.distance_metrics", "sklearn.metrics.pairwise.euclidean_distances",
-    "sklearn.metrics.pairwise.kernel_metrics", "sklearn.metrics.pairwise.laplacian_kernel",
-    "sklearn.metrics.pairwise.linear_kernel", "sklearn.metrics.pairwise.manhattan_distances",
-    "sklearn.metrics.pairwise.paired_cosine_distances", "sklearn.metrics.pairwise.paired_distances",
-    "sklearn.metrics.pairwise.paired_euclidean_distances", "sklearn.metrics.pairwise.paired_manhattan_distances",
-    "sklearn.metrics.pairwise.pairwise_distances", "sklearn.metrics.pairwise.pairwise_distances_argmin",
-    "sklearn.metrics.pairwise.pairwise_distances_argmin_min", "sklearn.metrics.pairwise.pairwise_kernels",
-    "sklearn.metrics.pairwise.polynomial_kernel", "sklearn.metrics.pairwise.rbf_kernel",
-    "sklearn.metrics.pairwise.sigmoid_kernel", "sklearn.metrics.pairwise_distances",
-    "sklearn.metrics.pairwise_distances_argmin", "sklearn.metrics.pairwise_distances_argmin_min",
-    "sklearn.metrics.pairwise_fast._chi2_kernel_fast", "sklearn.metrics.pairwise_fast._sparse_manhattan",
-    "sklearn.metrics.pairwise_kernels", "sklearn.metrics.precision_recall_curve",
-    "sklearn.metrics.precision_recall_fscore_support", "sklearn.metrics.precision_score",
-    "sklearn.metrics.r2_score", "sklearn.metrics.ranking._binary_clf_curve",
-    "sklearn.metrics.ranking.auc", "sklearn.metrics.ranking.average_precision_score",
-    "sklearn.metrics.ranking.coverage_error", "sklearn.metrics.ranking.label_ranking_average_precision_score",
-    "sklearn.metrics.ranking.label_ranking_loss", "sklearn.metrics.ranking.precision_recall_curve",
-    "sklearn.metrics.ranking.roc_auc_score", "sklearn.metrics.ranking.roc_curve",
-    "sklearn.metrics.recall_score", "sklearn.metrics.regression._check_reg_targets",
-    "sklearn.metrics.regression.explained_variance_score", "sklearn.metrics.regression.mean_absolute_error",
-    "sklearn.metrics.regression.mean_squared_error", "sklearn.metrics.regression.mean_squared_log_error",
-    "sklearn.metrics.regression.median_absolute_error", "sklearn.metrics.regression.r2_score",
-    "sklearn.metrics.regression.string_types", "sklearn.metrics.roc_auc_score",
-    "sklearn.metrics.roc_curve", "sklearn.metrics.scorer.SCORERS",
-    "sklearn.metrics.scorer._BaseScorer", "sklearn.metrics.scorer._PredictScorer",
-    "sklearn.metrics.scorer._ProbaScorer", "sklearn.metrics.scorer._ThresholdScorer",
-    "sklearn.metrics.scorer._check_multimetric_scoring", "sklearn.metrics.scorer._passthrough_scorer",
-    "sklearn.metrics.scorer.accuracy_scorer", "sklearn.metrics.scorer.adjusted_mutual_info_scorer",
-    "sklearn.metrics.scorer.adjusted_rand_scorer", "sklearn.metrics.scorer.average",
-    "sklearn.metrics.scorer.average_precision_scorer", "sklearn.metrics.scorer.check_scoring",
-    "sklearn.metrics.scorer.completeness_scorer", "sklearn.metrics.scorer.deprecation_msg",
-    "sklearn.metrics.scorer.explained_variance_scorer", "sklearn.metrics.scorer.f1_scorer",
-    "sklearn.metrics.scorer.fowlkes_mallows_scorer", "sklearn.metrics.scorer.get_scorer",
-    "sklearn.metrics.scorer.homogeneity_scorer", "sklearn.metrics.scorer.log_loss_scorer",
-    "sklearn.metrics.scorer.make_scorer", "sklearn.metrics.scorer.mean_absolute_error_scorer",
-    "sklearn.metrics.scorer.mean_squared_error_scorer", "sklearn.metrics.scorer.median_absolute_error_scorer",
-    "sklearn.metrics.scorer.mutual_info_scorer", "sklearn.metrics.scorer.name",
-    "sklearn.metrics.scorer.neg_log_loss_scorer", "sklearn.metrics.scorer.neg_mean_absolute_error_scorer",
-    "sklearn.metrics.scorer.neg_mean_squared_error_scorer", "sklearn.metrics.scorer.neg_mean_squared_log_error_scorer",
-    "sklearn.metrics.scorer.neg_median_absolute_error_scorer", "sklearn.metrics.scorer.normalized_mutual_info_scorer",
-    "sklearn.metrics.scorer.precision_scorer", "sklearn.metrics.scorer.qualified_name",
-    "sklearn.metrics.scorer.r2_scorer", "sklearn.metrics.scorer.recall_scorer",
-    "sklearn.metrics.scorer.roc_auc_scorer", "sklearn.metrics.scorer.v_measure_scorer",
-    "sklearn.metrics.silhouette_samples", "sklearn.metrics.silhouette_score",
-    "sklearn.metrics.v_measure_score", "sklearn.metrics.zero_one_loss",
-    "sklearn.model_selection.BaseCrossValidator", "sklearn.model_selection.GridSearchCV",
-    "sklearn.model_selection.GroupKFold", "sklearn.model_selection.GroupShuffleSplit",
-    "sklearn.model_selection.KFold", "sklearn.model_selection.LeaveOneGroupOut",
-    "sklearn.model_selection.LeaveOneOut", "sklearn.model_selection.LeavePGroupsOut",
-    "sklearn.model_selection.LeavePOut", "sklearn.model_selection.ParameterGrid",
-    "sklearn.model_selection.ParameterSampler", "sklearn.model_selection.PredefinedSplit",
-    "sklearn.model_selection.RandomizedSearchCV", "sklearn.model_selection.RepeatedKFold",
-    "sklearn.model_selection.RepeatedStratifiedKFold", "sklearn.model_selection.ShuffleSplit",
-    "sklearn.model_selection.StratifiedKFold", "sklearn.model_selection.StratifiedShuffleSplit",
-    "sklearn.model_selection.TimeSeriesSplit", "sklearn.model_selection._search.BaseSearchCV",
-    "sklearn.model_selection._search.GridSearchCV", "sklearn.model_selection._search.ParameterGrid",
-    "sklearn.model_selection._search.ParameterSampler", "sklearn.model_selection._search.RandomizedSearchCV",
-    "sklearn.model_selection._search._CVScoreTuple", "sklearn.model_selection._search._check_param_grid",
-    "sklearn.model_selection._search.fit_grid_point", "sklearn.model_selection._search.sp_version",
-    "sklearn.model_selection._split.BaseCrossValidator", "sklearn.model_selection._split.BaseShuffleSplit",
-    "sklearn.model_selection._split.GroupKFold", "sklearn.model_selection._split.GroupShuffleSplit",
-    "sklearn.model_selection._split.KFold", "sklearn.model_selection._split.LeaveOneGroupOut",
-    "sklearn.model_selection._split.LeaveOneOut", "sklearn.model_selection._split.LeavePGroupsOut",
-    "sklearn.model_selection._split.LeavePOut", "sklearn.model_selection._split.PredefinedSplit",
-    "sklearn.model_selection._split.RepeatedKFold", "sklearn.model_selection._split.RepeatedStratifiedKFold",
-    "sklearn.model_selection._split.ShuffleSplit", "sklearn.model_selection._split.StratifiedKFold",
-    "sklearn.model_selection._split.StratifiedShuffleSplit", "sklearn.model_selection._split.TimeSeriesSplit",
-    "sklearn.model_selection._split._BaseKFold", "sklearn.model_selection._split._CVIterableWrapper",
-    "sklearn.model_selection._split._RepeatedSplits", "sklearn.model_selection._split._approximate_mode",
-    "sklearn.model_selection._split._build_repr", "sklearn.model_selection._split._validate_shuffle_split",
-    "sklearn.model_selection._split._validate_shuffle_split_init", "sklearn.model_selection._split.check_cv",
-    "sklearn.model_selection._split.train_test_split", "sklearn.model_selection._validation._aggregate_score_dicts",
-    "sklearn.model_selection._validation._check_is_permutation", "sklearn.model_selection._validation._fit_and_predict",
-    "sklearn.model_selection._validation._fit_and_score", "sklearn.model_selection._validation._incremental_fit_estimator",
-    "sklearn.model_selection._validation._index_param_value", "sklearn.model_selection._validation._multimetric_score",
-    "sklearn.model_selection._validation._permutation_test_score", "sklearn.model_selection._validation._score",
-    "sklearn.model_selection._validation._shuffle", "sklearn.model_selection._validation._translate_train_sizes",
-    "sklearn.model_selection._validation.cross_val_predict", "sklearn.model_selection._validation.cross_val_score",
-    "sklearn.model_selection._validation.cross_validate", "sklearn.model_selection._validation.learning_curve",
-    "sklearn.model_selection._validation.permutation_test_score", "sklearn.model_selection._validation.validation_curve",
-    "sklearn.model_selection.check_cv", "sklearn.model_selection.cross_val_predict",
-    "sklearn.model_selection.cross_val_score", "sklearn.model_selection.cross_validate",
-    "sklearn.model_selection.fit_grid_point", "sklearn.model_selection.learning_curve",
-    "sklearn.model_selection.permutation_test_score", "sklearn.model_selection.train_test_split",
-    "sklearn.model_selection.validation_curve", "sklearn.multiclass.OneVsOneClassifier",
-    "sklearn.multiclass.OneVsRestClassifier", "sklearn.multiclass.OutputCodeClassifier",
-    "sklearn.multiclass._ConstantPredictor", "sklearn.multiclass._check_estimator",
-    "sklearn.multiclass._fit_binary", "sklearn.multiclass._fit_ovo_binary",
-    "sklearn.multiclass._partial_fit_binary", "sklearn.multiclass._partial_fit_ovo_binary",
-    "sklearn.multiclass._predict_binary", "sklearn.naive_bayes.BaseDiscreteNB",
-    "sklearn.naive_bayes.BaseNB", "sklearn.naive_bayes.BernoulliNB",
-    "sklearn.naive_bayes.GaussianNB", "sklearn.naive_bayes.MultinomialNB",
-    "sklearn.naive_bayes._ALPHA_MIN", "sklearn.neighbors.BallTree",
-    "sklearn.neighbors.DistanceMetric", "sklearn.neighbors.KDTree",
-    "sklearn.neighbors.KNeighborsClassifier", "sklearn.neighbors.KNeighborsRegressor",
-    "sklearn.neighbors.KernelDensity", "sklearn.neighbors.LSHForest",
-    "sklearn.neighbors.LocalOutlierFactor", "sklearn.neighbors.NearestCentroid",
-    "sklearn.neighbors.NearestNeighbors", "sklearn.neighbors.RadiusNeighborsClassifier",
-    "sklearn.neighbors.RadiusNeighborsRegressor", "sklearn.neighbors.approximate.GaussianRandomProjectionHash",
-    "sklearn.neighbors.approximate.HASH_DTYPE", "sklearn.neighbors.approximate.LSHForest",
-    "sklearn.neighbors.approximate.MAX_HASH_SIZE", "sklearn.neighbors.approximate.ProjectionToHashMixin",
-    "sklearn.neighbors.approximate._array_of_arrays", "sklearn.neighbors.approximate._find_longest_prefix_match",
-    "sklearn.neighbors.approximate._find_matching_indices", "sklearn.neighbors.ball_tree.BallTree",
-    "sklearn.neighbors.ball_tree.BinaryTree", "sklearn.neighbors.ball_tree.CLASS_DOC",
-    "sklearn.neighbors.ball_tree.DOC_DICT", "sklearn.neighbors.ball_tree.NeighborsHeap",
-    "sklearn.neighbors.ball_tree.NodeData", "sklearn.neighbors.ball_tree.NodeHeap",
-    "sklearn.neighbors.ball_tree.NodeHeapData", "sklearn.neighbors.ball_tree.VALID_METRICS",
-    "sklearn.neighbors.ball_tree.VALID_METRIC_IDS", "sklearn.neighbors.ball_tree.kernel_norm",
-    "sklearn.neighbors.ball_tree.load_heap", "sklearn.neighbors.ball_tree.newObj",
-    "sklearn.neighbors.ball_tree.nodeheap_sort", "sklearn.neighbors.ball_tree.offsets",
-    "sklearn.neighbors.ball_tree.simultaneous_sort", "sklearn.neighbors.base.KNeighborsMixin",
-    "sklearn.neighbors.base.NeighborsBase", "sklearn.neighbors.base.PAIRWISE_DISTANCE_FUNCTIONS",
-    "sklearn.neighbors.base.RadiusNeighborsMixin", "sklearn.neighbors.base.SupervisedFloatMixin",
-    "sklearn.neighbors.base.SupervisedIntegerMixin", "sklearn.neighbors.base.UnsupervisedMixin",
-    "sklearn.neighbors.base.VALID_METRICS", "sklearn.neighbors.base.VALID_METRICS_SPARSE",
-    "sklearn.neighbors.base._check_weights", "sklearn.neighbors.base._get_weights",
-    "sklearn.neighbors.classification.KNeighborsClassifier", "sklearn.neighbors.classification.RadiusNeighborsClassifier",
-    "sklearn.neighbors.dist_metrics.BrayCurtisDistance", "sklearn.neighbors.dist_metrics.CanberraDistance",
-    "sklearn.neighbors.dist_metrics.ChebyshevDistance", "sklearn.neighbors.dist_metrics.DiceDistance",
-    "sklearn.neighbors.dist_metrics.DistanceMetric", "sklearn.neighbors.dist_metrics.EuclideanDistance",
-    "sklearn.neighbors.dist_metrics.HammingDistance", "sklearn.neighbors.dist_metrics.HaversineDistance",
-    "sklearn.neighbors.dist_metrics.JaccardDistance", "sklearn.neighbors.dist_metrics.KulsinskiDistance",
-    "sklearn.neighbors.dist_metrics.METRIC_MAPPING", "sklearn.neighbors.dist_metrics.MahalanobisDistance",
-    "sklearn.neighbors.dist_metrics.ManhattanDistance", "sklearn.neighbors.dist_metrics.MatchingDistance",
-    "sklearn.neighbors.dist_metrics.MinkowskiDistance", "sklearn.neighbors.dist_metrics.PyFuncDistance",
-    "sklearn.neighbors.dist_metrics.RogersTanimotoDistance", "sklearn.neighbors.dist_metrics.RussellRaoDistance",
-    "sklearn.neighbors.dist_metrics.SEuclideanDistance", "sklearn.neighbors.dist_metrics.SokalMichenerDistance",
-    "sklearn.neighbors.dist_metrics.SokalSneathDistance", "sklearn.neighbors.dist_metrics.WMinkowskiDistance",
-    "sklearn.neighbors.dist_metrics.get_valid_metric_ids", "sklearn.neighbors.dist_metrics.newObj",
-    "sklearn.neighbors.graph._check_params", "sklearn.neighbors.graph._query_include_self",
-    "sklearn.neighbors.graph.kneighbors_graph", "sklearn.neighbors.graph.radius_neighbors_graph",
-    "sklearn.neighbors.kd_tree.BinaryTree", "sklearn.neighbors.kd_tree.CLASS_DOC",
-    "sklearn.neighbors.kd_tree.DOC_DICT", "sklearn.neighbors.kd_tree.KDTree",
-    "sklearn.neighbors.kd_tree.NeighborsHeap", "sklearn.neighbors.kd_tree.NodeData",
-    "sklearn.neighbors.kd_tree.NodeHeap", "sklearn.neighbors.kd_tree.NodeHeapData",
-    "sklearn.neighbors.kd_tree.VALID_METRICS", "sklearn.neighbors.kd_tree.VALID_METRIC_IDS",
-    "sklearn.neighbors.kd_tree.kernel_norm", "sklearn.neighbors.kd_tree.load_heap",
-    "sklearn.neighbors.kd_tree.newObj", "sklearn.neighbors.kd_tree.nodeheap_sort",
-    "sklearn.neighbors.kd_tree.offsets", "sklearn.neighbors.kd_tree.simultaneous_sort",
-    "sklearn.neighbors.kde.KernelDensity", "sklearn.neighbors.kde.TREE_DICT",
-    "sklearn.neighbors.kde.VALID_KERNELS", "sklearn.neighbors.kde.gammainc",
-    "sklearn.neighbors.kneighbors_graph", "sklearn.neighbors.lof.LocalOutlierFactor",
-    "sklearn.neighbors.nearest_centroid.NearestCentroid", "sklearn.neighbors.quad_tree.CELL_DTYPE",
-    "sklearn.neighbors.quad_tree._QuadTree", "sklearn.neighbors.radius_neighbors_graph",
-    "sklearn.neighbors.regression.KNeighborsRegressor", "sklearn.neighbors.regression.RadiusNeighborsRegressor",
-    "sklearn.neighbors.unsupervised.NearestNeighbors", "sklearn.pipeline.FeatureUnion",
-    "sklearn.pipeline.Pipeline", "sklearn.pipeline._fit_one_transformer",
-    "sklearn.pipeline._fit_transform_one", "sklearn.pipeline._name_estimators",
-    "sklearn.pipeline._transform_one", "sklearn.pipeline.make_pipeline",
-    "sklearn.pipeline.make_union", "sklearn.preprocessing.Binarizer",
-    "sklearn.preprocessing.FunctionTransformer", "sklearn.preprocessing.Imputer",
-    "sklearn.preprocessing.KernelCenterer", "sklearn.preprocessing.LabelBinarizer",
-    "sklearn.preprocessing.LabelEncoder", "sklearn.preprocessing.MaxAbsScaler",
-    "sklearn.preprocessing.MinMaxScaler", "sklearn.preprocessing.MultiLabelBinarizer",
-    "sklearn.preprocessing.Normalizer", "sklearn.preprocessing.OneHotEncoder",
-    "sklearn.preprocessing.PolynomialFeatures", "sklearn.preprocessing.QuantileTransformer",
-    "sklearn.preprocessing.RobustScaler", "sklearn.preprocessing.StandardScaler",
-    "sklearn.preprocessing._function_transformer.FunctionTransformer", "sklearn.preprocessing._function_transformer._identity",
-    "sklearn.preprocessing._function_transformer.string_types", "sklearn.preprocessing.add_dummy_feature",
-    "sklearn.preprocessing.binarize", "sklearn.preprocessing.data.BOUNDS_THRESHOLD",
-    "sklearn.preprocessing.data.Binarizer", "sklearn.preprocessing.data.FLOAT_DTYPES",
-    "sklearn.preprocessing.data.KernelCenterer", "sklearn.preprocessing.data.MaxAbsScaler",
-    "sklearn.preprocessing.data.MinMaxScaler", "sklearn.preprocessing.data.Normalizer",
-    "sklearn.preprocessing.data.OneHotEncoder", "sklearn.preprocessing.data.PolynomialFeatures",
-    "sklearn.preprocessing.data.QuantileTransformer", "sklearn.preprocessing.data.RobustScaler",
-    "sklearn.preprocessing.data.StandardScaler", "sklearn.preprocessing.data._handle_zeros_in_scale",
-    "sklearn.preprocessing.data._transform_selected", "sklearn.preprocessing.data.add_dummy_feature",
-    "sklearn.preprocessing.data.binarize", "sklearn.preprocessing.data.maxabs_scale",
-    "sklearn.preprocessing.data.minmax_scale", "sklearn.preprocessing.data.normalize",
-    "sklearn.preprocessing.data.quantile_transform", "sklearn.preprocessing.data.robust_scale",
-    "sklearn.preprocessing.data.scale", "sklearn.preprocessing.data.string_types",
-    "sklearn.preprocessing.imputation.FLOAT_DTYPES", "sklearn.preprocessing.imputation.Imputer",
-    "sklearn.preprocessing.imputation._get_mask", "sklearn.preprocessing.imputation._most_frequent",
-    "sklearn.preprocessing.label.LabelBinarizer", "sklearn.preprocessing.label.LabelEncoder",
-    "sklearn.preprocessing.label.MultiLabelBinarizer", "sklearn.preprocessing.label._inverse_binarize_multiclass",
-    "sklearn.preprocessing.label._inverse_binarize_thresholding", "sklearn.preprocessing.label.label_binarize",
-    "sklearn.preprocessing.label_binarize", "sklearn.preprocessing.maxabs_scale",
-    "sklearn.preprocessing.minmax_scale", "sklearn.preprocessing.normalize",
-    "sklearn.preprocessing.quantile_transform", "sklearn.preprocessing.robust_scale",
-    "sklearn.preprocessing.scale", "sklearn.random_projection.BaseRandomProjection",
-    "sklearn.random_projection.GaussianRandomProjection", "sklearn.random_projection.SparseRandomProjection",
-    "sklearn.random_projection._check_density", "sklearn.random_projection._check_input_size",
-    "sklearn.random_projection.gaussian_random_matrix", "sklearn.random_projection.johnson_lindenstrauss_min_dim",
-    "sklearn.random_projection.sparse_random_matrix", "sklearn.set_config",
-    "sklearn.setup_module", "sklearn.svm.LinearSVC",
-    "sklearn.svm.LinearSVR", "sklearn.svm.NuSVC",
-    "sklearn.svm.NuSVR", "sklearn.svm.OneClassSVM",
-    "sklearn.svm.SVC", "sklearn.svm.SVR",
-    "sklearn.svm.base.BaseLibSVM", "sklearn.svm.base.BaseSVC",
-    "sklearn.svm.base.LIBSVM_IMPL", "sklearn.svm.base._fit_liblinear",
-    "sklearn.svm.base._get_liblinear_solver_type", "sklearn.svm.base._one_vs_one_coef",
-    "sklearn.svm.bounds.l1_min_c", "sklearn.svm.classes.LinearSVC",
-    "sklearn.svm.classes.LinearSVR", "sklearn.svm.classes.NuSVC",
-    "sklearn.svm.classes.NuSVR", "sklearn.svm.classes.OneClassSVM",
-    "sklearn.svm.classes.SVC", "sklearn.svm.classes.SVR",
-    "sklearn.svm.l1_min_c", "sklearn.svm.liblinear.set_verbosity_wrap",
-    "sklearn.svm.liblinear.train_wrap", "sklearn.svm.libsvm.LIBSVM_KERNEL_TYPES",
-    "sklearn.svm.libsvm.cross_validation", "sklearn.svm.libsvm.decision_function",
-    "sklearn.svm.libsvm.fit", "sklearn.svm.libsvm.predict",
-    "sklearn.svm.libsvm.predict_proba", "sklearn.svm.libsvm.set_verbosity_wrap",
-    "sklearn.svm.libsvm_sparse.libsvm_sparse_decision_function", "sklearn.svm.libsvm_sparse.libsvm_sparse_predict",
-    "sklearn.svm.libsvm_sparse.libsvm_sparse_predict_proba", "sklearn.svm.libsvm_sparse.libsvm_sparse_train",
-    "sklearn.svm.libsvm_sparse.set_verbosity_wrap", "sklearn.tree.DecisionTreeClassifier",
-    "sklearn.tree.DecisionTreeRegressor", "sklearn.tree.ExtraTreeClassifier",
-    "sklearn.tree.ExtraTreeRegressor", "sklearn.tree._criterion.ClassificationCriterion",
-    "sklearn.tree._criterion.Criterion", "sklearn.tree._criterion.Entropy",
-    "sklearn.tree._criterion.FriedmanMSE", "sklearn.tree._criterion.Gini",
-    "sklearn.tree._criterion.MAE", "sklearn.tree._criterion.MSE",
-    "sklearn.tree._criterion.RegressionCriterion", "sklearn.tree._splitter.BaseDenseSplitter",
-    "sklearn.tree._splitter.BaseSparseSplitter", "sklearn.tree._splitter.BestSparseSplitter",
-    "sklearn.tree._splitter.BestSplitter", "sklearn.tree._splitter.RandomSparseSplitter",
-    "sklearn.tree._splitter.RandomSplitter", "sklearn.tree._splitter.Splitter",
-    "sklearn.tree._tree.BestFirstTreeBuilder", "sklearn.tree._tree.DepthFirstTreeBuilder",
-    "sklearn.tree._tree.NODE_DTYPE", "sklearn.tree._tree.TREE_LEAF",
-    "sklearn.tree._tree.TREE_UNDEFINED", "sklearn.tree._tree.Tree",
-    "sklearn.tree._tree.TreeBuilder", "sklearn.tree._utils.PriorityHeap",
-    "sklearn.tree._utils.Stack", "sklearn.tree._utils.WeightedMedianCalculator",
-    "sklearn.tree._utils.WeightedPQueue", "sklearn.tree._utils._realloc_test",
-    "sklearn.tree.export.SENTINEL", "sklearn.tree.export.Sentinel",
-    "sklearn.tree.export._color_brew", "sklearn.tree.export.export_graphviz",
-    "sklearn.tree.export_graphviz", "sklearn.tree.tree.BaseDecisionTree",
-    "sklearn.tree.tree.CRITERIA_CLF", "sklearn.tree.tree.CRITERIA_REG",
-    "sklearn.tree.tree.DENSE_SPLITTERS", "sklearn.tree.tree.DecisionTreeClassifier",
-    "sklearn.tree.tree.DecisionTreeRegressor", "sklearn.tree.tree.ExtraTreeClassifier",
-    "sklearn.tree.tree.ExtraTreeRegressor", "sklearn.tree.tree.SPARSE_SPLITTERS",
-    "sklearn.utils.Bunch", "sklearn.utils._get_n_jobs",
-    "sklearn.utils._logistic_sigmoid._log_logistic_sigmoid", "sklearn.utils._random._sample_without_replacement_check_input",
-    "sklearn.utils._random._sample_without_replacement_with_pool", "sklearn.utils._random._sample_without_replacement_with_reservoir_sampling",
-    "sklearn.utils._random._sample_without_replacement_with_tracking_selection", "sklearn.utils._random.sample_without_replacement",
-    "sklearn.utils.arrayfuncs.cholesky_delete", "sklearn.utils.arrayfuncs.min_pos",
-    "sklearn.utils.as_float_array", "sklearn.utils.assert_all_finite",
-    "sklearn.utils.axis0_safe_slice", "sklearn.utils.check_X_y",
-    "sklearn.utils.check_array", "sklearn.utils.check_consistent_length",
-    "sklearn.utils.check_random_state", "sklearn.utils.check_symmetric",
-    "sklearn.utils.class_weight.compute_class_weight", "sklearn.utils.class_weight.compute_sample_weight",
-    "sklearn.utils.column_or_1d", "sklearn.utils.compute_class_weight",
-    "sklearn.utils.compute_sample_weight", "sklearn.utils.deprecated",
-    "sklearn.utils.deprecation.DeprecationDict", "sklearn.utils.deprecation._is_deprecated",
-    "sklearn.utils.deprecation.deprecated", "sklearn.utils.extmath._deterministic_vector_sign_flip",
-    "sklearn.utils.extmath._impose_f_order", "sklearn.utils.extmath._incremental_mean_and_var",
-    "sklearn.utils.extmath.cartesian", "sklearn.utils.extmath.density",
-    "sklearn.utils.extmath.fast_dot", "sklearn.utils.extmath.fast_logdet",
-    "sklearn.utils.extmath.log_logistic", "sklearn.utils.extmath.logsumexp",
-    "sklearn.utils.extmath.make_nonnegative", "sklearn.utils.extmath.norm",
-    "sklearn.utils.extmath.np_version", "sklearn.utils.extmath.pinvh",
-    "sklearn.utils.extmath.randomized_range_finder", "sklearn.utils.extmath.randomized_svd",
-    "sklearn.utils.extmath.row_norms", "sklearn.utils.extmath.safe_min",
-    "sklearn.utils.extmath.safe_sparse_dot", "sklearn.utils.extmath.softmax",
-    "sklearn.utils.extmath.squared_norm", "sklearn.utils.extmath.stable_cumsum",
-    "sklearn.utils.extmath.svd_flip", "sklearn.utils.extmath.weighted_mode",
-    "sklearn.utils.fast_dict.IntFloatDict", "sklearn.utils.fast_dict.argmin",
-    "sklearn.utils.fixes._parse_version", "sklearn.utils.fixes.divide",
-    "sklearn.utils.fixes.euler_gamma", "sklearn.utils.fixes.makedirs",
-    "sklearn.utils.fixes.np_version", "sklearn.utils.fixes.parallel_helper",
-    "sklearn.utils.fixes.sp_version", "sklearn.utils.fixes.sparse_min_max",
-    "sklearn.utils.gen_batches", "sklearn.utils.gen_even_slices",
-    "sklearn.utils.graph.connected_components", "sklearn.utils.graph.graph_laplacian",
-    "sklearn.utils.graph.graph_shortest_path", "sklearn.utils.graph.single_source_shortest_path_length",
-    "sklearn.utils.graph_shortest_path.graph_shortest_path", "sklearn.utils.indexable",
-    "sklearn.utils.indices_to_mask", "sklearn.utils.linear_assignment_._HungarianState",
-    "sklearn.utils.linear_assignment_._hungarian", "sklearn.utils.linear_assignment_._step1",
-    "sklearn.utils.linear_assignment_._step3", "sklearn.utils.linear_assignment_._step4",
-    "sklearn.utils.linear_assignment_._step5", "sklearn.utils.linear_assignment_._step6",
-    "sklearn.utils.linear_assignment_.linear_assignment", "sklearn.utils.metaestimators._BaseComposition",
-    "sklearn.utils.metaestimators._IffHasAttrDescriptor", "sklearn.utils.metaestimators._safe_split",
-    "sklearn.utils.metaestimators.if_delegate_has_method", "sklearn.utils.multiclass._FN_UNIQUE_LABELS",
-    "sklearn.utils.multiclass._check_partial_fit_first_call", "sklearn.utils.multiclass._is_integral_float",
-    "sklearn.utils.multiclass._ovr_decision_function", "sklearn.utils.multiclass._unique_indicator",
-    "sklearn.utils.multiclass._unique_multiclass", "sklearn.utils.multiclass.check_classification_targets",
-    "sklearn.utils.multiclass.class_distribution", "sklearn.utils.multiclass.is_multilabel",
-    "sklearn.utils.multiclass.string_types", "sklearn.utils.multiclass.type_of_target",
-    "sklearn.utils.multiclass.unique_labels", "sklearn.utils.murmurhash.murmurhash3_32",
-    "sklearn.utils.murmurhash.murmurhash3_bytes_array_s32", "sklearn.utils.murmurhash.murmurhash3_bytes_array_u32",
-    "sklearn.utils.murmurhash.murmurhash3_bytes_s32", "sklearn.utils.murmurhash.murmurhash3_bytes_u32",
-    "sklearn.utils.murmurhash.murmurhash3_int_s32", "sklearn.utils.murmurhash.murmurhash3_int_u32",
-    "sklearn.utils.murmurhash3_32", "sklearn.utils.optimize._LineSearchError",
-    "sklearn.utils.optimize._cg", "sklearn.utils.optimize._line_search_wolfe12",
-    "sklearn.utils.optimize.newton_cg", "sklearn.utils.random.choice",
-    "sklearn.utils.random.random_choice_csc", "sklearn.utils.resample",
-    "sklearn.utils.safe_indexing", "sklearn.utils.safe_mask",
-    "sklearn.utils.safe_sqr", "sklearn.utils.seq_dataset.ArrayDataset",
-    "sklearn.utils.seq_dataset.CSRDataset", "sklearn.utils.seq_dataset.SequentialDataset",
-    "sklearn.utils.shuffle", "sklearn.utils.sparsefuncs._csc_mean_var_axis0",
-    "sklearn.utils.sparsefuncs._csr_mean_var_axis0", "sklearn.utils.sparsefuncs._get_elem_at_rank",
-    "sklearn.utils.sparsefuncs._get_median", "sklearn.utils.sparsefuncs._incr_mean_var_axis0",
-    "sklearn.utils.sparsefuncs._raise_error_wrong_axis", "sklearn.utils.sparsefuncs._raise_typeerror",
-    "sklearn.utils.sparsefuncs.count_nonzero", "sklearn.utils.sparsefuncs.csc_median_axis_0",
-    "sklearn.utils.sparsefuncs.incr_mean_variance_axis", "sklearn.utils.sparsefuncs.inplace_column_scale",
-    "sklearn.utils.sparsefuncs.inplace_csr_column_scale", "sklearn.utils.sparsefuncs.inplace_csr_row_scale",
-    "sklearn.utils.sparsefuncs.inplace_row_scale", "sklearn.utils.sparsefuncs.inplace_swap_column",
-    "sklearn.utils.sparsefuncs.inplace_swap_row", "sklearn.utils.sparsefuncs.inplace_swap_row_csc",
-    "sklearn.utils.sparsefuncs.inplace_swap_row_csr", "sklearn.utils.sparsefuncs.mean_variance_axis",
-    "sklearn.utils.sparsefuncs.min_max_axis", "sklearn.utils.sparsefuncs_fast._csc_mean_variance_axis0",
-    "sklearn.utils.sparsefuncs_fast._csr_mean_variance_axis0", "sklearn.utils.sparsefuncs_fast._csr_row_norms",
-    "sklearn.utils.sparsefuncs_fast._incr_mean_variance_axis0", "sklearn.utils.sparsefuncs_fast._inplace_csr_row_normalize_l1",
-    "sklearn.utils.sparsefuncs_fast._inplace_csr_row_normalize_l2", "sklearn.utils.sparsefuncs_fast.assign_rows_csr",
-    "sklearn.utils.sparsefuncs_fast.csc_mean_variance_axis0", "sklearn.utils.sparsefuncs_fast.csr_mean_variance_axis0",
-    "sklearn.utils.sparsefuncs_fast.csr_row_norms", "sklearn.utils.sparsefuncs_fast.incr_mean_variance_axis0",
-    "sklearn.utils.sparsefuncs_fast.inplace_csr_row_normalize_l1", "sklearn.utils.sparsefuncs_fast.inplace_csr_row_normalize_l2",
-    "sklearn.utils.stats._weighted_percentile", "sklearn.utils.stats.rankdata",
-    "sklearn.utils.tosequence", "sklearn.utils.validation.FLOAT_DTYPES",
-    "sklearn.utils.validation._assert_all_finite", "sklearn.utils.validation._ensure_sparse_format",
-    "sklearn.utils.validation._is_arraylike", "sklearn.utils.validation._num_samples",
-    "sklearn.utils.validation._shape_repr", "sklearn.utils.validation.as_float_array",
-    "sklearn.utils.validation.assert_all_finite", "sklearn.utils.validation.check_X_y",
-    "sklearn.utils.validation.check_array", "sklearn.utils.validation.check_consistent_length",
-    "sklearn.utils.validation.check_is_fitted", "sklearn.utils.validation.check_memory",
-    "sklearn.utils.validation.check_non_negative", "sklearn.utils.validation.check_random_state",
-    "sklearn.utils.validation.check_symmetric", "sklearn.utils.validation.column_or_1d",
-    "sklearn.utils.validation.has_fit_parameter", "sklearn.utils.validation.indexable",
-    "sklearn.utils.weight_vector.WeightVector"
-],
-
-  "SKR_NAMES": [
-    "skrebate.MultiSURF", "skrebate.MultiSURFstar",
-    "skrebate.ReliefF", "skrebate.SURF",
-    "skrebate.SURFstar", "skrebate.TuRF",
-    "skrebate.multisurf.MultiSURF", "skrebate.multisurfstar.MultiSURFstar",
-    "skrebate.relieff.ReliefF", "skrebate.scoring_utils.MultiSURF_compute_scores",
-    "skrebate.scoring_utils.MultiSURFstar_compute_scores", "skrebate.scoring_utils.ReliefF_compute_scores",
-    "skrebate.scoring_utils.SURF_compute_scores", "skrebate.scoring_utils.SURFstar_compute_scores",
-    "skrebate.scoring_utils.compute_score", "skrebate.scoring_utils.get_row_missing",
-    "skrebate.scoring_utils.ramp_function", "skrebate.surf.SURF",
-    "skrebate.surfstar.SURFstar", "skrebate.turf.TuRF"
-  ],
-
-  "XGB_NAMES": [
-    "xgboost.Booster", "xgboost.DMatrix",
-    "xgboost.VERSION_FILE", "xgboost.XGBClassifier",
-    "xgboost.XGBModel", "xgboost.XGBRegressor",
-    "xgboost.callback._fmt_metric", "xgboost.callback._get_callback_context",
-    "xgboost.callback.early_stop", "xgboost.callback.print_evaluation",
-    "xgboost.callback.record_evaluation", "xgboost.callback.reset_learning_rate",
-    "xgboost.compat.PANDAS_INSTALLED", "xgboost.compat.PY3",
-    "xgboost.compat.SKLEARN_INSTALLED", "xgboost.compat.STRING_TYPES",
-    "xgboost.compat.py_str", "xgboost.core.Booster",
-    "xgboost.core.CallbackEnv", "xgboost.core.DMatrix",
-    "xgboost.core.EarlyStopException", "xgboost.core.PANDAS_DTYPE_MAPPER",
-    "xgboost.core.PANDAS_INSTALLED", "xgboost.core.PY3",
-    "xgboost.core.STRING_TYPES", "xgboost.core.XGBoostError",
-    "xgboost.core._check_call", "xgboost.core._load_lib",
-    "xgboost.core._maybe_pandas_data", "xgboost.core._maybe_pandas_label",
-    "xgboost.core.c_array", "xgboost.core.c_str",
-    "xgboost.core.ctypes2buffer", "xgboost.core.ctypes2numpy",
-    "xgboost.core.from_cstr_to_pystr", "xgboost.core.from_pystr_to_cstr",
-    "xgboost.cv", "xgboost.f",
-    "xgboost.libpath.XGBoostLibraryNotFound", "xgboost.libpath.find_lib_path",
-    "xgboost.plot_importance", "xgboost.plot_tree",
-    "xgboost.plotting._EDGEPAT", "xgboost.plotting._EDGEPAT2",
-    "xgboost.plotting._LEAFPAT", "xgboost.plotting._NODEPAT",
-    "xgboost.plotting._parse_edge", "xgboost.plotting._parse_node",
-    "xgboost.plotting.plot_importance", "xgboost.plotting.plot_tree",
-    "xgboost.plotting.to_graphviz", "xgboost.rabit.DTYPE_ENUM__",
-    "xgboost.rabit.STRING_TYPES", "xgboost.rabit._init_rabit",
-    "xgboost.rabit.allreduce", "xgboost.rabit.broadcast",
-    "xgboost.rabit.finalize", "xgboost.rabit.get_processor_name",
-    "xgboost.rabit.get_rank", "xgboost.rabit.get_world_size",
-    "xgboost.rabit.init", "xgboost.rabit.tracker_print",
-    "xgboost.rabit.version_number", "xgboost.sklearn.SKLEARN_INSTALLED",
-    "xgboost.sklearn.XGBClassifier", "xgboost.sklearn.XGBModel",
-    "xgboost.sklearn.XGBRegressor", "xgboost.sklearn._objective_decorator",
-    "xgboost.to_graphviz", "xgboost.train",
-    "xgboost.training.CVPack", "xgboost.training.SKLEARN_INSTALLED",
-    "xgboost.training.STRING_TYPES", "xgboost.training._train_internal",
-    "xgboost.training.aggcv", "xgboost.training.cv",
-    "xgboost.training.mknfold", "xgboost.training.train"
-  ],
-
-
-  "NUMPY_NAMES": [
-    "numpy.core.multiarray._reconstruct", "numpy.ndarray",
-    "numpy.dtype", "numpy.core.multiarray.scalar",
-    "numpy.random.__RandomState_ctor"
-  ],
-
-  "IMBLEARN_NAMES":[
-    "imblearn.pipeline.Pipeline", "imblearn.over_sampling._random_over_sampler.RandomOverSampler",
-    "imblearn.under_sampling._prototype_selection._edited_nearest_neighbours.EditedNearestNeighbours"
-  ]
-}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/stacking_ensembles.py	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,128 @@
+import argparse
+import json
+import pandas as pd
+import pickle
+import xgboost
+import warnings
+from sklearn import (cluster, compose, decomposition, ensemble,
+                     feature_extraction, feature_selection,
+                     gaussian_process, kernel_approximation, metrics,
+                     model_selection, naive_bayes, neighbors,
+                     pipeline, preprocessing, svm, linear_model,
+                     tree, discriminant_analysis)
+from sklearn.model_selection._split import check_cv
+from feature_selectors import (DyRFE, DyRFECV,
+                               MyPipeline, MyimbPipeline)
+from iraps_classifier import (IRAPSCore, IRAPSClassifier,
+                              BinarizeTargetClassifier,
+                              BinarizeTargetRegressor)
+from preprocessors import Z_RandomOverSampler
+from utils import load_model, get_cv, get_estimator, get_search_params
+
+from mlxtend.regressor import StackingCVRegressor, StackingRegressor
+from mlxtend.classifier import StackingCVClassifier, StackingClassifier
+
+
+warnings.filterwarnings('ignore')
+
+N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1))
+
+
+def main(inputs_path, output_obj, base_paths=None, meta_path=None,
+         outfile_params=None):
+    """
+    Parameter
+    ---------
+    inputs_path : str
+        File path for Galaxy parameters
+
+    output_obj : str
+        File path for ensemble estimator ouput
+
+    base_paths : str
+        File path or paths concatenated by comma.
+
+    meta_path : str
+        File path
+
+    outfile_params : str
+        File path for params output
+    """
+    with open(inputs_path, 'r') as param_handler:
+        params = json.load(param_handler)
+
+    base_estimators = []
+    for idx, base_file in enumerate(base_paths.split(',')):
+        if base_file and base_file != 'None':
+            with open(base_file, 'rb') as handler:
+                model = load_model(handler)
+        else:
+            estimator_json = (params['base_est_builder'][idx]
+                              ['estimator_selector'])
+            model = get_estimator(estimator_json)
+        base_estimators.append(model)
+
+    if meta_path:
+        with open(meta_path, 'rb') as f:
+            meta_estimator = load_model(f)
+    else:
+        estimator_json = params['meta_estimator']['estimator_selector']
+        meta_estimator = get_estimator(estimator_json)
+
+    options = params['algo_selection']['options']
+
+    cv_selector = options.pop('cv_selector', None)
+    if cv_selector:
+        splitter, groups = get_cv(cv_selector)
+        options['cv'] = splitter
+        # set n_jobs
+        options['n_jobs'] = N_JOBS
+
+    if params['algo_selection']['estimator_type'] == 'StackingCVClassifier':
+        ensemble_estimator = StackingCVClassifier(
+            classifiers=base_estimators,
+            meta_classifier=meta_estimator,
+            **options)
+
+    elif params['algo_selection']['estimator_type'] == 'StackingClassifier':
+        ensemble_estimator = StackingClassifier(
+            classifiers=base_estimators,
+            meta_classifier=meta_estimator,
+            **options)
+
+    elif params['algo_selection']['estimator_type'] == 'StackingCVRegressor':
+        ensemble_estimator = StackingCVRegressor(
+            regressors=base_estimators,
+            meta_regressor=meta_estimator,
+            **options)
+
+    else:
+        ensemble_estimator = StackingRegressor(
+            regressors=base_estimators,
+            meta_regressor=meta_estimator,
+            **options)
+
+    print(ensemble_estimator)
+    for base_est in base_estimators:
+        print(base_est)
+
+    with open(output_obj, 'wb') as out_handler:
+        pickle.dump(ensemble_estimator, out_handler, pickle.HIGHEST_PROTOCOL)
+
+    if params['get_params'] and outfile_params:
+        results = get_search_params(ensemble_estimator)
+        df = pd.DataFrame(results, columns=['', 'Parameter', 'Value'])
+        df.to_csv(outfile_params, sep='\t', index=False)
+
+
+if __name__ == '__main__':
+    aparser = argparse.ArgumentParser()
+    aparser.add_argument("-b", "--bases", dest="bases")
+    aparser.add_argument("-m", "--meta", dest="meta")
+    aparser.add_argument("-i", "--inputs", dest="inputs")
+    aparser.add_argument("-o", "--outfile", dest="outfile")
+    aparser.add_argument("-p", "--outfile_params", dest="outfile_params")
+    args = aparser.parse_args()
+
+    main(args.inputs, args.outfile, base_paths=args.bases,
+         meta_path=args.meta, outfile_params=args.outfile_params)
Binary file test-data/GridSearchCV.zip has changed
Binary file test-data/LinearRegression01.zip has changed
Binary file test-data/LinearRegression02.zip has changed
Binary file test-data/RFE.zip has changed
Binary file test-data/RandomForestClassifier.zip has changed
Binary file test-data/RandomForestRegressor01.zip has changed
Binary file test-data/StackingCVRegressor01.zip has changed
Binary file test-data/StackingCVRegressor02.zip has changed
Binary file test-data/XGBRegressor01.zip has changed
Binary file test-data/best_estimator_.zip has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/best_params_.txt	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,1 @@
+{'estimator__n_estimators': 100}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/best_score_.tabular	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,2 @@
+best_score_
+0.7976348550293088
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/feature_importances_.tabular	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,11 @@
+feature_importances_
+0.15959252
+0.20373514
+0.22071308
+0.06281833
+0.098471984
+0.06960951
+0.13073005
+0.027164686
+0.022071308
+0.0050933785
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/feature_selection_result13	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,262 @@
+temp_1	average	forecast_noaa	friend
+69.0	69.7	65.0	88.0
+59.0	58.1	57.0	66.0
+88.0	77.3	75.0	70.0
+65.0	64.7	63.0	58.0
+50.0	47.5	44.0	58.0
+51.0	48.2	45.0	63.0
+52.0	48.6	45.0	41.0
+78.0	76.7	75.0	66.0
+35.0	45.2	43.0	38.0
+40.0	46.1	45.0	36.0
+47.0	45.3	41.0	58.0
+72.0	76.3	76.0	88.0
+76.0	74.4	73.0	72.0
+39.0	45.3	45.0	46.0
+78.0	72.2	70.0	84.0
+71.0	67.3	63.0	85.0
+48.0	47.7	44.0	61.0
+72.0	77.0	77.0	68.0
+57.0	54.7	50.0	70.0
+40.0	45.1	44.0	39.0
+54.0	47.6	47.0	53.0
+58.0	53.2	52.0	71.0
+68.0	58.6	58.0	54.0
+65.0	55.3	55.0	65.0
+47.0	48.8	46.0	51.0
+44.0	45.6	43.0	42.0
+64.0	67.1	64.0	69.0
+62.0	57.1	57.0	67.0
+66.0	65.7	64.0	74.0
+70.0	71.8	67.0	90.0
+57.0	54.2	54.0	70.0
+50.0	50.5	46.0	57.0
+55.0	51.8	49.0	71.0
+55.0	49.5	46.0	67.0
+42.0	45.2	41.0	47.0
+65.0	60.1	57.0	41.0
+63.0	65.6	63.0	73.0
+48.0	47.3	45.0	28.0
+42.0	46.3	44.0	62.0
+51.0	46.2	45.0	38.0
+64.0	68.0	65.0	64.0
+75.0	74.6	74.0	63.0
+52.0	46.7	42.0	39.0
+67.0	68.6	66.0	80.0
+68.0	68.7	65.0	56.0
+54.0	55.0	53.0	42.0
+62.0	56.8	52.0	70.0
+76.0	76.1	76.0	61.0
+73.0	73.1	71.0	93.0
+52.0	50.3	50.0	35.0
+70.0	73.9	71.0	68.0
+77.0	77.4	75.0	62.0
+60.0	56.6	52.0	72.0
+52.0	53.3	50.0	54.0
+79.0	75.0	71.0	85.0
+76.0	57.2	53.0	74.0
+66.0	66.5	64.0	85.0
+57.0	61.8	58.0	62.0
+66.0	57.4	57.0	60.0
+61.0	58.4	58.0	41.0
+55.0	53.1	52.0	65.0
+48.0	48.1	46.0	54.0
+49.0	49.2	46.0	63.0
+65.0	66.7	64.0	73.0
+60.0	62.5	58.0	56.0
+56.0	53.0	53.0	36.0
+59.0	57.4	56.0	44.0
+44.0	45.7	41.0	35.0
+82.0	63.2	62.0	83.0
+64.0	67.0	65.0	76.0
+43.0	45.5	41.0	46.0
+64.0	55.7	51.0	57.0
+63.0	52.7	49.0	49.0
+70.0	70.6	67.0	79.0
+71.0	52.4	48.0	42.0
+76.0	73.5	69.0	85.0
+68.0	62.1	58.0	55.0
+39.0	45.3	44.0	39.0
+71.0	70.7	70.0	52.0
+69.0	71.7	68.0	89.0
+74.0	71.5	71.0	82.0
+81.0	64.1	62.0	81.0
+51.0	49.3	49.0	34.0
+45.0	46.8	44.0	61.0
+87.0	76.8	73.0	73.0
+71.0	73.8	71.0	86.0
+55.0	60.3	56.0	77.0
+80.0	76.9	72.0	81.0
+67.0	69.0	65.0	76.0
+61.0	61.4	60.0	78.0
+46.0	46.6	43.0	65.0
+39.0	45.1	42.0	51.0
+67.0	68.3	67.0	61.0
+52.0	47.8	43.0	50.0
+67.0	69.8	68.0	87.0
+75.0	71.2	67.0	77.0
+68.0	73.3	73.0	79.0
+92.0	68.2	65.0	71.0
+67.0	72.8	69.0	56.0
+44.0	45.8	43.0	56.0
+61.0	61.0	56.0	73.0
+65.0	53.4	49.0	41.0
+68.0	73.0	72.0	70.0
+87.0	62.1	62.0	69.0
+117.0	54.8	51.0	62.0
+80.0	76.4	75.0	66.0
+57.0	51.0	47.0	46.0
+67.0	63.6	61.0	68.0
+58.0	54.0	51.0	56.0
+65.0	56.2	53.0	41.0
+52.0	48.6	45.0	47.0
+59.0	55.3	52.0	39.0
+57.0	53.9	53.0	35.0
+81.0	59.2	56.0	66.0
+75.0	77.1	76.0	75.0
+76.0	77.4	76.0	95.0
+57.0	64.8	61.0	53.0
+69.0	74.2	72.0	86.0
+77.0	66.8	66.0	64.0
+55.0	49.9	47.0	55.0
+49.0	46.8	45.0	53.0
+54.0	52.7	48.0	57.0
+55.0	51.2	49.0	42.0
+56.0	55.6	53.0	45.0
+68.0	74.6	72.0	77.0
+54.0	53.4	49.0	44.0
+67.0	69.0	69.0	87.0
+49.0	46.9	45.0	33.0
+49.0	49.1	47.0	45.0
+56.0	48.5	48.0	49.0
+73.0	71.0	66.0	78.0
+66.0	66.4	65.0	60.0
+69.0	66.5	66.0	62.0
+82.0	64.5	64.0	65.0
+90.0	76.7	75.0	65.0
+51.0	50.7	49.0	43.0
+77.0	57.1	57.0	41.0
+60.0	61.4	58.0	58.0
+74.0	72.8	71.0	87.0
+85.0	77.2	73.0	74.0
+68.0	62.8	61.0	64.0
+56.0	49.5	46.0	37.0
+71.0	56.2	55.0	45.0
+62.0	59.5	57.0	40.0
+83.0	77.3	76.0	76.0
+64.0	65.4	62.0	56.0
+56.0	48.4	45.0	54.0
+41.0	45.1	42.0	31.0
+65.0	66.2	66.0	67.0
+65.0	53.7	49.0	38.0
+40.0	46.0	46.0	41.0
+45.0	45.6	43.0	29.0
+52.0	48.4	48.0	58.0
+63.0	51.7	50.0	63.0
+52.0	47.6	47.0	44.0
+60.0	57.9	55.0	77.0
+81.0	75.7	73.0	89.0
+75.0	75.8	74.0	77.0
+59.0	51.4	48.0	64.0
+73.0	77.1	77.0	94.0
+75.0	77.3	73.0	66.0
+60.0	58.5	56.0	59.0
+75.0	71.3	68.0	56.0
+59.0	57.6	56.0	40.0
+53.0	49.1	47.0	56.0
+79.0	77.2	76.0	60.0
+57.0	52.1	49.0	46.0
+75.0	67.6	64.0	77.0
+71.0	69.4	67.0	81.0
+53.0	50.2	50.0	42.0
+46.0	48.8	48.0	56.0
+81.0	76.9	72.0	70.0
+49.0	48.9	47.0	29.0
+57.0	48.4	44.0	34.0
+60.0	58.8	54.0	53.0
+67.0	73.7	72.0	64.0
+61.0	64.1	62.0	60.0
+66.0	69.5	66.0	85.0
+64.0	51.9	50.0	55.0
+66.0	65.7	62.0	49.0
+64.0	52.2	52.0	49.0
+71.0	65.2	61.0	56.0
+75.0	63.8	62.0	60.0
+48.0	46.4	46.0	47.0
+53.0	52.5	48.0	70.0
+49.0	47.1	46.0	65.0
+85.0	68.5	67.0	81.0
+62.0	49.4	48.0	30.0
+50.0	47.0	42.0	58.0
+58.0	55.9	51.0	39.0
+72.0	77.2	74.0	95.0
+55.0	50.7	50.0	34.0
+74.0	72.3	70.0	91.0
+85.0	77.3	77.0	77.0
+73.0	77.3	77.0	93.0
+52.0	47.4	44.0	39.0
+67.0	67.6	64.0	62.0
+45.0	45.1	45.0	35.0
+46.0	47.2	46.0	41.0
+66.0	60.6	60.0	57.0
+71.0	77.0	75.0	86.0
+70.0	69.3	66.0	79.0
+58.0	49.9	46.0	53.0
+72.0	77.1	76.0	65.0
+74.0	75.4	74.0	71.0
+65.0	64.5	63.0	49.0
+77.0	58.8	55.0	39.0
+59.0	50.9	49.0	35.0
+45.0	45.7	41.0	61.0
+53.0	50.5	49.0	46.0
+53.0	54.9	54.0	72.0
+79.0	77.3	73.0	79.0
+49.0	49.0	44.0	44.0
+63.0	62.9	62.0	78.0
+69.0	56.5	54.0	45.0
+60.0	50.8	47.0	46.0
+64.0	62.5	60.0	73.0
+79.0	71.0	66.0	64.0
+55.0	47.0	43.0	58.0
+73.0	56.0	54.0	41.0
+60.0	59.1	57.0	62.0
+67.0	70.2	67.0	77.0
+42.0	45.2	45.0	58.0
+60.0	65.0	62.0	55.0
+57.0	49.8	47.0	30.0
+35.0	45.2	44.0	36.0
+75.0	70.3	66.0	84.0
+61.0	51.1	48.0	65.0
+51.0	50.6	46.0	59.0
+71.0	71.9	67.0	70.0
+74.0	75.3	74.0	71.0
+48.0	45.4	44.0	42.0
+74.0	74.9	70.0	60.0
+76.0	70.8	68.0	57.0
+58.0	51.6	47.0	37.0
+51.0	50.4	48.0	43.0
+72.0	72.6	68.0	78.0
+76.0	67.2	64.0	74.0
+52.0	47.9	47.0	60.0
+53.0	48.2	48.0	53.0
+65.0	69.1	65.0	83.0
+58.0	58.1	58.0	43.0
+77.0	75.6	74.0	56.0
+61.0	52.9	51.0	35.0
+67.0	65.3	64.0	54.0
+54.0	49.3	46.0	58.0
+79.0	67.4	65.0	58.0
+77.0	64.3	63.0	67.0
+71.0	67.7	64.0	55.0
+58.0	57.7	54.0	61.0
+68.0	55.9	55.0	56.0
+40.0	45.4	45.0	49.0
+80.0	77.3	75.0	71.0
+74.0	62.3	59.0	61.0
+57.0	45.5	42.0	57.0
+52.0	47.8	43.0	57.0
+71.0	75.1	71.0	95.0
+49.0	53.6	49.0	70.0
+89.0	59.0	59.0	61.0
+60.0	60.2	56.0	78.0
+59.0	58.3	58.0	40.0
Binary file test-data/final_estimator.zip has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params.tabular	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,6 @@
+	Parameter	Value
+@	copy_X	copy_X: True
+@	fit_intercept	fit_intercept: True
+*	n_jobs	n_jobs: 1
+@	normalize	normalize: False
+	Note:	@, params eligible for search in searchcv tool.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params01.tabular	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,30 @@
+	Parameter	Value
+*	memory	memory: None
+*	steps	"steps: [('robustscaler', RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
+       with_scaling=True)), ('selectkbest', SelectKBest(k=10, score_func=<function f_classif at 0x111ef0158>)), ('svr', SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
+  gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
+  tol=0.001, verbose=False))]"
+@	robustscaler	"robustscaler: RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
+       with_scaling=True)"
+@	selectkbest	selectkbest: SelectKBest(k=10, score_func=<function f_classif at 0x111ef0158>)
+@	svr	"svr: SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
+  gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
+  tol=0.001, verbose=False)"
+@	robustscaler__copy	robustscaler__copy: True
+@	robustscaler__quantile_range	robustscaler__quantile_range: (25.0, 75.0)
+@	robustscaler__with_centering	robustscaler__with_centering: True
+@	robustscaler__with_scaling	robustscaler__with_scaling: True
+@	selectkbest__k	selectkbest__k: 10
+@	selectkbest__score_func	selectkbest__score_func: <function f_classif at 0x111ef0158>
+@	svr__C	svr__C: 1.0
+@	svr__cache_size	svr__cache_size: 200
+@	svr__coef0	svr__coef0: 0.0
+@	svr__degree	svr__degree: 3
+@	svr__epsilon	svr__epsilon: 0.1
+@	svr__gamma	svr__gamma: 'auto_deprecated'
+@	svr__kernel	svr__kernel: 'linear'
+@	svr__max_iter	svr__max_iter: -1
+@	svr__shrinking	svr__shrinking: True
+@	svr__tol	svr__tol: 0.001
+*	svr__verbose	svr__verbose: False
+	Note:	@, searchable params in searchcv too.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params02.tabular	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,33 @@
+	Parameter	Value
+*	memory	memory: None
+*	steps	"steps: [('robustscaler', RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
+       with_scaling=True)), ('lassocv', LassoCV(alphas=None, copy_X=True, cv='warn', eps=0.001, fit_intercept=True,
+    max_iter=1000, n_alphas=100, n_jobs=1, normalize=False, positive=False,
+    precompute='auto', random_state=None, selection='cyclic', tol=0.0001,
+    verbose=False))]"
+@	robustscaler	"robustscaler: RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
+       with_scaling=True)"
+@	lassocv	"lassocv: LassoCV(alphas=None, copy_X=True, cv='warn', eps=0.001, fit_intercept=True,
+    max_iter=1000, n_alphas=100, n_jobs=1, normalize=False, positive=False,
+    precompute='auto', random_state=None, selection='cyclic', tol=0.0001,
+    verbose=False)"
+@	robustscaler__copy	robustscaler__copy: True
+@	robustscaler__quantile_range	robustscaler__quantile_range: (25.0, 75.0)
+@	robustscaler__with_centering	robustscaler__with_centering: True
+@	robustscaler__with_scaling	robustscaler__with_scaling: True
+@	lassocv__alphas	lassocv__alphas: None
+@	lassocv__copy_X	lassocv__copy_X: True
+@	lassocv__cv	lassocv__cv: 'warn'
+@	lassocv__eps	lassocv__eps: 0.001
+@	lassocv__fit_intercept	lassocv__fit_intercept: True
+@	lassocv__max_iter	lassocv__max_iter: 1000
+@	lassocv__n_alphas	lassocv__n_alphas: 100
+*	lassocv__n_jobs	lassocv__n_jobs: 1
+@	lassocv__normalize	lassocv__normalize: False
+@	lassocv__positive	lassocv__positive: False
+@	lassocv__precompute	lassocv__precompute: 'auto'
+@	lassocv__random_state	lassocv__random_state: None
+@	lassocv__selection	lassocv__selection: 'cyclic'
+@	lassocv__tol	lassocv__tol: 0.0001
+*	lassocv__verbose	lassocv__verbose: False
+	Note:	@, searchable params in searchcv too.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params03.tabular	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,43 @@
+	Parameter	Value
+*	memory	memory: None
+*	steps	"steps: [('robustscaler', RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
+       with_scaling=True)), ('xgbclassifier', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
+       max_depth=3, min_child_weight=1, missing=nan, n_estimators=100,
+       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
+       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
+       silent=True, subsample=1))]"
+@	robustscaler	"robustscaler: RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
+       with_scaling=True)"
+@	xgbclassifier	"xgbclassifier: XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
+       max_depth=3, min_child_weight=1, missing=nan, n_estimators=100,
+       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
+       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
+       silent=True, subsample=1)"
+@	robustscaler__copy	robustscaler__copy: True
+@	robustscaler__quantile_range	robustscaler__quantile_range: (25.0, 75.0)
+@	robustscaler__with_centering	robustscaler__with_centering: True
+@	robustscaler__with_scaling	robustscaler__with_scaling: True
+@	xgbclassifier__base_score	xgbclassifier__base_score: 0.5
+@	xgbclassifier__booster	xgbclassifier__booster: 'gbtree'
+@	xgbclassifier__colsample_bylevel	xgbclassifier__colsample_bylevel: 1
+@	xgbclassifier__colsample_bytree	xgbclassifier__colsample_bytree: 1
+@	xgbclassifier__gamma	xgbclassifier__gamma: 0
+@	xgbclassifier__learning_rate	xgbclassifier__learning_rate: 0.1
+@	xgbclassifier__max_delta_step	xgbclassifier__max_delta_step: 0
+@	xgbclassifier__max_depth	xgbclassifier__max_depth: 3
+@	xgbclassifier__min_child_weight	xgbclassifier__min_child_weight: 1
+@	xgbclassifier__missing	xgbclassifier__missing: nan
+@	xgbclassifier__n_estimators	xgbclassifier__n_estimators: 100
+*	xgbclassifier__n_jobs	xgbclassifier__n_jobs: 1
+*	xgbclassifier__nthread	xgbclassifier__nthread: None
+@	xgbclassifier__objective	xgbclassifier__objective: 'binary:logistic'
+@	xgbclassifier__random_state	xgbclassifier__random_state: 0
+@	xgbclassifier__reg_alpha	xgbclassifier__reg_alpha: 0
+@	xgbclassifier__reg_lambda	xgbclassifier__reg_lambda: 1
+@	xgbclassifier__scale_pos_weight	xgbclassifier__scale_pos_weight: 1
+@	xgbclassifier__seed	xgbclassifier__seed: None
+@	xgbclassifier__silent	xgbclassifier__silent: True
+@	xgbclassifier__subsample	xgbclassifier__subsample: 1
+	Note:	@, searchable params in searchcv too.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params04.tabular	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,39 @@
+	Parameter	Value
+*	memory	memory: None
+*	steps	"steps: [('selectfrommodel', SelectFromModel(estimator=AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+          learning_rate=1.0, n_estimators=50, random_state=None),
+        max_features=None, norm_order=1, prefit=False, threshold=None)), ('linearsvc', LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
+     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
+     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
+     verbose=0))]"
+@	selectfrommodel	"selectfrommodel: SelectFromModel(estimator=AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+          learning_rate=1.0, n_estimators=50, random_state=None),
+        max_features=None, norm_order=1, prefit=False, threshold=None)"
+@	linearsvc	"linearsvc: LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
+     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
+     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
+     verbose=0)"
+@	selectfrommodel__estimator__algorithm	selectfrommodel__estimator__algorithm: 'SAMME.R'
+@	selectfrommodel__estimator__base_estimator	selectfrommodel__estimator__base_estimator: None
+@	selectfrommodel__estimator__learning_rate	selectfrommodel__estimator__learning_rate: 1.0
+@	selectfrommodel__estimator__n_estimators	selectfrommodel__estimator__n_estimators: 50
+@	selectfrommodel__estimator__random_state	selectfrommodel__estimator__random_state: None
+@	selectfrommodel__estimator	"selectfrommodel__estimator: AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+          learning_rate=1.0, n_estimators=50, random_state=None)"
+@	selectfrommodel__max_features	selectfrommodel__max_features: None
+@	selectfrommodel__norm_order	selectfrommodel__norm_order: 1
+@	selectfrommodel__prefit	selectfrommodel__prefit: False
+@	selectfrommodel__threshold	selectfrommodel__threshold: None
+@	linearsvc__C	linearsvc__C: 1.0
+@	linearsvc__class_weight	linearsvc__class_weight: None
+@	linearsvc__dual	linearsvc__dual: True
+@	linearsvc__fit_intercept	linearsvc__fit_intercept: True
+@	linearsvc__intercept_scaling	linearsvc__intercept_scaling: 1
+@	linearsvc__loss	linearsvc__loss: 'squared_hinge'
+@	linearsvc__max_iter	linearsvc__max_iter: 1000
+@	linearsvc__multi_class	linearsvc__multi_class: 'ovr'
+@	linearsvc__penalty	linearsvc__penalty: 'l2'
+@	linearsvc__random_state	linearsvc__random_state: None
+@	linearsvc__tol	linearsvc__tol: 0.0001
+*	linearsvc__verbose	linearsvc__verbose: 0
+	Note:	@, searchable params in searchcv too.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params05.tabular	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,31 @@
+	Parameter	Value
+*	memory	memory: None
+*	steps	"steps: [('randomforestregressor', RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
+           max_features='auto', max_leaf_nodes=None,
+           min_impurity_decrease=0.0, min_impurity_split=None,
+           min_samples_leaf=1, min_samples_split=2,
+           min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
+           oob_score=False, random_state=42, verbose=0, warm_start=False))]"
+@	randomforestregressor	"randomforestregressor: RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
+           max_features='auto', max_leaf_nodes=None,
+           min_impurity_decrease=0.0, min_impurity_split=None,
+           min_samples_leaf=1, min_samples_split=2,
+           min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
+           oob_score=False, random_state=42, verbose=0, warm_start=False)"
+@	randomforestregressor__bootstrap	randomforestregressor__bootstrap: True
+@	randomforestregressor__criterion	randomforestregressor__criterion: 'mse'
+@	randomforestregressor__max_depth	randomforestregressor__max_depth: None
+@	randomforestregressor__max_features	randomforestregressor__max_features: 'auto'
+@	randomforestregressor__max_leaf_nodes	randomforestregressor__max_leaf_nodes: None
+@	randomforestregressor__min_impurity_decrease	randomforestregressor__min_impurity_decrease: 0.0
+@	randomforestregressor__min_impurity_split	randomforestregressor__min_impurity_split: None
+@	randomforestregressor__min_samples_leaf	randomforestregressor__min_samples_leaf: 1
+@	randomforestregressor__min_samples_split	randomforestregressor__min_samples_split: 2
+@	randomforestregressor__min_weight_fraction_leaf	randomforestregressor__min_weight_fraction_leaf: 0.0
+@	randomforestregressor__n_estimators	randomforestregressor__n_estimators: 100
+*	randomforestregressor__n_jobs	randomforestregressor__n_jobs: 1
+@	randomforestregressor__oob_score	randomforestregressor__oob_score: False
+@	randomforestregressor__random_state	randomforestregressor__random_state: 42
+*	randomforestregressor__verbose	randomforestregressor__verbose: 0
+@	randomforestregressor__warm_start	randomforestregressor__warm_start: False
+	Note:	@, searchable params in searchcv too.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params06.tabular	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,22 @@
+	Parameter	Value
+*	memory	memory: None
+*	steps	"steps: [('pca', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
+  svd_solver='auto', tol=0.0, whiten=False)), ('adaboostregressor', AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
+         n_estimators=50, random_state=None))]"
+@	pca	"pca: PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
+  svd_solver='auto', tol=0.0, whiten=False)"
+@	adaboostregressor	"adaboostregressor: AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
+         n_estimators=50, random_state=None)"
+@	pca__copy	pca__copy: True
+@	pca__iterated_power	pca__iterated_power: 'auto'
+@	pca__n_components	pca__n_components: None
+@	pca__random_state	pca__random_state: None
+@	pca__svd_solver	pca__svd_solver: 'auto'
+@	pca__tol	pca__tol: 0.0
+@	pca__whiten	pca__whiten: False
+@	adaboostregressor__base_estimator	adaboostregressor__base_estimator: None
+@	adaboostregressor__learning_rate	adaboostregressor__learning_rate: 1.0
+@	adaboostregressor__loss	adaboostregressor__loss: 'linear'
+@	adaboostregressor__n_estimators	adaboostregressor__n_estimators: 50
+@	adaboostregressor__random_state	adaboostregressor__random_state: None
+	Note:	@, searchable params in searchcv too.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params07.tabular	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,16 @@
+	Parameter	Value
+*	memory	memory: None
+*	steps	"steps: [('rbfsampler', RBFSampler(gamma=2.0, n_components=10, random_state=None)), ('adaboostclassifier', AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+          learning_rate=1.0, n_estimators=50, random_state=None))]"
+@	rbfsampler	rbfsampler: RBFSampler(gamma=2.0, n_components=10, random_state=None)
+@	adaboostclassifier	"adaboostclassifier: AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+          learning_rate=1.0, n_estimators=50, random_state=None)"
+@	rbfsampler__gamma	rbfsampler__gamma: 2.0
+@	rbfsampler__n_components	rbfsampler__n_components: 10
+@	rbfsampler__random_state	rbfsampler__random_state: None
+@	adaboostclassifier__algorithm	adaboostclassifier__algorithm: 'SAMME.R'
+@	adaboostclassifier__base_estimator	adaboostclassifier__base_estimator: None
+@	adaboostclassifier__learning_rate	adaboostclassifier__learning_rate: 1.0
+@	adaboostclassifier__n_estimators	adaboostclassifier__n_estimators: 50
+@	adaboostclassifier__random_state	adaboostclassifier__random_state: None
+	Note:	@, searchable params in searchcv too.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params08.tabular	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,24 @@
+	Parameter	Value
+*	memory	memory: None
+*	steps	"steps: [('featureagglomeration', FeatureAgglomeration(affinity='euclidean', compute_full_tree='auto',
+           connectivity=None, linkage='ward', memory=None, n_clusters=3,
+           pooling_func=<function mean at 0x1123f1620>)), ('adaboostclassifier', AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+          learning_rate=1.0, n_estimators=50, random_state=None))]"
+@	featureagglomeration	"featureagglomeration: FeatureAgglomeration(affinity='euclidean', compute_full_tree='auto',
+           connectivity=None, linkage='ward', memory=None, n_clusters=3,
+           pooling_func=<function mean at 0x1123f1620>)"
+@	adaboostclassifier	"adaboostclassifier: AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
+          learning_rate=1.0, n_estimators=50, random_state=None)"
+@	featureagglomeration__affinity	featureagglomeration__affinity: 'euclidean'
+@	featureagglomeration__compute_full_tree	featureagglomeration__compute_full_tree: 'auto'
+@	featureagglomeration__connectivity	featureagglomeration__connectivity: None
+@	featureagglomeration__linkage	featureagglomeration__linkage: 'ward'
+*	featureagglomeration__memory	featureagglomeration__memory: None
+@	featureagglomeration__n_clusters	featureagglomeration__n_clusters: 3
+@	featureagglomeration__pooling_func	featureagglomeration__pooling_func: <function mean at 0x1123f1620>
+@	adaboostclassifier__algorithm	adaboostclassifier__algorithm: 'SAMME.R'
+@	adaboostclassifier__base_estimator	adaboostclassifier__base_estimator: None
+@	adaboostclassifier__learning_rate	adaboostclassifier__learning_rate: 1.0
+@	adaboostclassifier__n_estimators	adaboostclassifier__n_estimators: 50
+@	adaboostclassifier__random_state	adaboostclassifier__random_state: None
+	Note:	@, searchable params in searchcv too.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params09.tabular	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,39 @@
+	Parameter	Value
+*	memory	memory: None
+*	steps	"steps: [('relieff', ReliefF(discrete_threshold=10, n_features_to_select=3, n_jobs=1,
+    n_neighbors=100, verbose=False)), ('randomforestregressor', RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
+           max_features='auto', max_leaf_nodes=None,
+           min_impurity_decrease=0.0, min_impurity_split=None,
+           min_samples_leaf=1, min_samples_split=2,
+           min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=1,
+           oob_score=False, random_state=None, verbose=0, warm_start=False))]"
+@	relieff	"relieff: ReliefF(discrete_threshold=10, n_features_to_select=3, n_jobs=1,
+    n_neighbors=100, verbose=False)"
+@	randomforestregressor	"randomforestregressor: RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
+           max_features='auto', max_leaf_nodes=None,
+           min_impurity_decrease=0.0, min_impurity_split=None,
+           min_samples_leaf=1, min_samples_split=2,
+           min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=1,
+           oob_score=False, random_state=None, verbose=0, warm_start=False)"
+@	relieff__discrete_threshold	relieff__discrete_threshold: 10
+@	relieff__n_features_to_select	relieff__n_features_to_select: 3
+*	relieff__n_jobs	relieff__n_jobs: 1
+@	relieff__n_neighbors	relieff__n_neighbors: 100
+*	relieff__verbose	relieff__verbose: False
+@	randomforestregressor__bootstrap	randomforestregressor__bootstrap: True
+@	randomforestregressor__criterion	randomforestregressor__criterion: 'mse'
+@	randomforestregressor__max_depth	randomforestregressor__max_depth: None
+@	randomforestregressor__max_features	randomforestregressor__max_features: 'auto'
+@	randomforestregressor__max_leaf_nodes	randomforestregressor__max_leaf_nodes: None
+@	randomforestregressor__min_impurity_decrease	randomforestregressor__min_impurity_decrease: 0.0
+@	randomforestregressor__min_impurity_split	randomforestregressor__min_impurity_split: None
+@	randomforestregressor__min_samples_leaf	randomforestregressor__min_samples_leaf: 1
+@	randomforestregressor__min_samples_split	randomforestregressor__min_samples_split: 2
+@	randomforestregressor__min_weight_fraction_leaf	randomforestregressor__min_weight_fraction_leaf: 0.0
+@	randomforestregressor__n_estimators	randomforestregressor__n_estimators: 'warn'
+*	randomforestregressor__n_jobs	randomforestregressor__n_jobs: 1
+@	randomforestregressor__oob_score	randomforestregressor__oob_score: False
+@	randomforestregressor__random_state	randomforestregressor__random_state: None
+*	randomforestregressor__verbose	randomforestregressor__verbose: 0
+@	randomforestregressor__warm_start	randomforestregressor__warm_start: False
+	Note:	@, searchable params in searchcv too.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params10.tabular	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,12 @@
+	Parameter	Value
+*	memory	memory: None
+*	steps	"steps: [('adaboostregressor', AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
+         n_estimators=50, random_state=None))]"
+@	adaboostregressor	"adaboostregressor: AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
+         n_estimators=50, random_state=None)"
+@	adaboostregressor__base_estimator	adaboostregressor__base_estimator: None
+@	adaboostregressor__learning_rate	adaboostregressor__learning_rate: 1.0
+@	adaboostregressor__loss	adaboostregressor__loss: 'linear'
+@	adaboostregressor__n_estimators	adaboostregressor__n_estimators: 50
+@	adaboostregressor__random_state	adaboostregressor__random_state: None
+	Note:	@, params eligible for search in searchcv tool.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params11.tabular	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,46 @@
+	Parameter	Value
+*	memory	memory: None
+*	steps	"steps: [('editednearestneighbours', EditedNearestNeighbours(kind_sel='all', n_jobs=1, n_neighbors=3,
+            random_state=None, ratio=None, return_indices=False,
+            sampling_strategy='auto')), ('randomforestclassifier', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
+            max_depth=None, max_features='auto', max_leaf_nodes=None,
+            min_impurity_decrease=0.0, min_impurity_split=None,
+            min_samples_leaf=1, min_samples_split=2,
+            min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=1,
+            oob_score=False, random_state=None, verbose=0,
+            warm_start=False))]"
+@	editednearestneighbours	"editednearestneighbours: EditedNearestNeighbours(kind_sel='all', n_jobs=1, n_neighbors=3,
+            random_state=None, ratio=None, return_indices=False,
+            sampling_strategy='auto')"
+@	randomforestclassifier	"randomforestclassifier: RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
+            max_depth=None, max_features='auto', max_leaf_nodes=None,
+            min_impurity_decrease=0.0, min_impurity_split=None,
+            min_samples_leaf=1, min_samples_split=2,
+            min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=1,
+            oob_score=False, random_state=None, verbose=0,
+            warm_start=False)"
+@	editednearestneighbours__kind_sel	editednearestneighbours__kind_sel: 'all'
+*	editednearestneighbours__n_jobs	editednearestneighbours__n_jobs: 1
+@	editednearestneighbours__n_neighbors	editednearestneighbours__n_neighbors: 3
+@	editednearestneighbours__random_state	editednearestneighbours__random_state: None
+@	editednearestneighbours__ratio	editednearestneighbours__ratio: None
+@	editednearestneighbours__return_indices	editednearestneighbours__return_indices: False
+@	editednearestneighbours__sampling_strategy	editednearestneighbours__sampling_strategy: 'auto'
+@	randomforestclassifier__bootstrap	randomforestclassifier__bootstrap: True
+@	randomforestclassifier__class_weight	randomforestclassifier__class_weight: None
+@	randomforestclassifier__criterion	randomforestclassifier__criterion: 'gini'
+@	randomforestclassifier__max_depth	randomforestclassifier__max_depth: None
+@	randomforestclassifier__max_features	randomforestclassifier__max_features: 'auto'
+@	randomforestclassifier__max_leaf_nodes	randomforestclassifier__max_leaf_nodes: None
+@	randomforestclassifier__min_impurity_decrease	randomforestclassifier__min_impurity_decrease: 0.0
+@	randomforestclassifier__min_impurity_split	randomforestclassifier__min_impurity_split: None
+@	randomforestclassifier__min_samples_leaf	randomforestclassifier__min_samples_leaf: 1
+@	randomforestclassifier__min_samples_split	randomforestclassifier__min_samples_split: 2
+@	randomforestclassifier__min_weight_fraction_leaf	randomforestclassifier__min_weight_fraction_leaf: 0.0
+@	randomforestclassifier__n_estimators	randomforestclassifier__n_estimators: 'warn'
+*	randomforestclassifier__n_jobs	randomforestclassifier__n_jobs: 1
+@	randomforestclassifier__oob_score	randomforestclassifier__oob_score: False
+@	randomforestclassifier__random_state	randomforestclassifier__random_state: None
+*	randomforestclassifier__verbose	randomforestclassifier__verbose: 0
+@	randomforestclassifier__warm_start	randomforestclassifier__warm_start: False
+	Note:	@, searchable params in searchcv too.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/get_params12.tabular	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,47 @@
+	Parameter	Value
+*	memory	memory: None
+*	steps	"steps: [('rfe', RFE(estimator=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
+       max_depth=3, min_child_weight=1, missing=nan, n_estimators=100,
+       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
+       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
+       silent=True, subsample=1),
+  n_features_to_select=None, step=1, verbose=0))]"
+@	rfe	"rfe: RFE(estimator=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
+       max_depth=3, min_child_weight=1, missing=nan, n_estimators=100,
+       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
+       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
+       silent=True, subsample=1),
+  n_features_to_select=None, step=1, verbose=0)"
+@	rfe__estimator__base_score	rfe__estimator__base_score: 0.5
+@	rfe__estimator__booster	rfe__estimator__booster: 'gbtree'
+@	rfe__estimator__colsample_bylevel	rfe__estimator__colsample_bylevel: 1
+@	rfe__estimator__colsample_bytree	rfe__estimator__colsample_bytree: 1
+@	rfe__estimator__gamma	rfe__estimator__gamma: 0
+@	rfe__estimator__learning_rate	rfe__estimator__learning_rate: 0.1
+@	rfe__estimator__max_delta_step	rfe__estimator__max_delta_step: 0
+@	rfe__estimator__max_depth	rfe__estimator__max_depth: 3
+@	rfe__estimator__min_child_weight	rfe__estimator__min_child_weight: 1
+@	rfe__estimator__missing	rfe__estimator__missing: nan
+@	rfe__estimator__n_estimators	rfe__estimator__n_estimators: 100
+*	rfe__estimator__n_jobs	rfe__estimator__n_jobs: 1
+*	rfe__estimator__nthread	rfe__estimator__nthread: None
+@	rfe__estimator__objective	rfe__estimator__objective: 'reg:linear'
+@	rfe__estimator__random_state	rfe__estimator__random_state: 0
+@	rfe__estimator__reg_alpha	rfe__estimator__reg_alpha: 0
+@	rfe__estimator__reg_lambda	rfe__estimator__reg_lambda: 1
+@	rfe__estimator__scale_pos_weight	rfe__estimator__scale_pos_weight: 1
+@	rfe__estimator__seed	rfe__estimator__seed: None
+@	rfe__estimator__silent	rfe__estimator__silent: True
+@	rfe__estimator__subsample	rfe__estimator__subsample: 1
+@	rfe__estimator	"rfe__estimator: XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
+       max_depth=3, min_child_weight=1, missing=nan, n_estimators=100,
+       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
+       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
+       silent=True, subsample=1)"
+@	rfe__n_features_to_select	rfe__n_features_to_select: None
+@	rfe__step	rfe__step: 1
+*	rfe__verbose	rfe__verbose: 0
+	Note:	@, searchable params in searchcv too.
--- a/test-data/mv_result01.tabular	Sun Dec 30 01:51:27 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-0.9452947345848994
-0.9926363525448115
--0.4384003222944141
--- a/test-data/mv_result02.tabular	Sun Dec 30 01:51:27 2018 -0500
+++ b/test-data/mv_result02.tabular	Tue May 14 18:05:43 2019 -0400
@@ -1,10 +1,11 @@
-1.6957921248350636
--0.9248588846061156
--0.48640795813792376
-0.647707440306449
-0.32740690920811427
--0.8229559569886034
-1.2150108977866847
-0.14723254190255275
-0.6053186541119763
-0.3972102859168325
+Predicted
+1.578912095858962
+-1.199072894940544
+-0.7173258906076226
+0.3255908318822695
+0.21919344304093213
+-0.6841926371423699
+1.1144698671662865
+0.19379531649046616
+0.9405094785593062
+1.2581284896870837
--- a/test-data/mv_result03.tabular	Sun Dec 30 01:51:27 2018 -0500
+++ b/test-data/mv_result03.tabular	Tue May 14 18:05:43 2019 -0400
@@ -1,3 +1,6 @@
-0.9452947345848994
-0.9926363525448115
--0.4384003222944141
+train_sizes_abs	mean_train_scores	std_train_scores	mean_test_scores	std_test_scores
+17	0.9668700841937653	0.00277836829836518	0.7008862995946905	0.03857541198731935
+56	0.9730008602419361	0.006839342612121988	0.7963376762427242	0.004846330083938778
+95	0.9728783377589098	0.0037790183626530663	0.814592845745573	0.020457691766770824
+134	0.9739086338111185	0.001627343246847077	0.7985540571195479	0.03954641079310707
+174	0.9726218628287785	0.0032867750457225182	0.8152971572131146	0.04280261115004303
--- a/test-data/mv_result04.tabular	Sun Dec 30 01:51:27 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-17
-56
-95
-134
-174
--- a/test-data/mv_result05.tabular	Sun Dec 30 01:51:27 2018 -0500
+++ b/test-data/mv_result05.tabular	Tue May 14 18:05:43 2019 -0400
@@ -1,1 +1,262 @@
-0.4998435882784322
+Predicted
+70.16
+62.06
+83.04
+62.84
+48.63
+51.25
+54.98
+80.3
+42.84
+41.52
+43.83
+73.15
+74.22
+42.88
+74.93
+72.9
+53.74
+78.86
+59.0
+40.28
+54.52
+58.34
+62.74
+62.35
+49.15
+41.92
+65.59
+59.91
+66.49
+72.08
+60.44
+53.84
+54.82
+52.66
+42.37
+61.3
+63.14
+50.62
+42.75
+47.39
+67.8
+73.58
+49.97
+67.04
+67.45
+54.67
+64.87
+77.23
+73.52
+53.55
+70.53
+77.98
+61.99
+53.08
+78.12
+66.55
+63.95
+60.57
+61.6
+60.37
+55.29
+54.31
+52.54
+65.31
+61.51
+57.3
+60.02
+43.64
+74.78
+68.26
+42.72
+61.26
+61.25
+71.58
+61.03
+70.53
+70.25
+43.4
+71.39
+72.31
+72.7
+72.11
+53.55
+43.4
+80.6
+73.72
+58.86
+76.71
+68.36
+60.26
+48.56
+38.96
+69.67
+52.9
+67.63
+75.12
+70.92
+70.89
+67.05
+43.89
+59.94
+62.98
+71.1
+79.22
+77.31
+79.06
+61.11
+66.32
+54.7
+61.1
+54.59
+58.7
+59.6
+73.79
+72.69
+81.83
+61.08
+69.21
+74.8
+54.37
+50.85
+53.07
+58.53
+55.44
+72.62
+54.14
+68.12
+48.81
+50.11
+56.06
+73.63
+63.29
+71.0
+74.87
+81.24
+54.67
+66.96
+61.37
+74.84
+76.71
+69.27
+56.53
+71.91
+58.74
+77.83
+64.57
+51.93
+42.84
+64.11
+59.47
+42.46
+43.79
+51.75
+63.98
+54.71
+64.95
+79.72
+72.12
+60.66
+79.3
+71.26
+59.9
+74.25
+59.68
+52.37
+78.52
+58.52
+71.98
+71.77
+54.48
+48.96
+81.42
+54.08
+53.52
+64.38
+70.79
+63.95
+67.48
+61.76
+66.15
+62.1
+75.68
+69.72
+43.8
+56.27
+53.38
+81.31
+57.54
+48.15
+59.47
+78.01
+56.39
+72.33
+78.8
+78.66
+52.01
+66.68
+48.56
+47.75
+65.67
+77.93
+72.68
+58.0
+77.83
+73.37
+65.39
+69.79
+55.98
+46.35
+54.31
+55.58
+79.69
+52.76
+62.62
+66.54
+60.29
+62.57
+74.86
+48.05
+65.09
+65.02
+67.84
+41.86
+62.28
+57.05
+43.68
+72.0
+63.04
+54.41
+73.37
+75.11
+42.65
+73.16
+71.68
+58.61
+53.54
+73.33
+72.16
+49.96
+54.78
+64.24
+60.13
+76.46
+61.53
+68.36
+53.1
+71.33
+76.12
+70.86
+61.35
+67.12
+43.25
+80.2
+71.16
+58.63
+52.37
+74.93
+53.34
+76.41
+63.87
+59.97
--- a/test-data/mv_result06.tabular	Sun Dec 30 01:51:27 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-0.07547169811320754	0.10344827586206896	0.10294117647058823
-0.07547169811320754	0.10344827586206896	0.10294117647058823
-0.07547169811320754	0.10344827586206896	0.10294117647058823
-0.07547169811320754	0.10344827586206896	0.10294117647058823
-0.07547169811320754	0.10344827586206896	0.10294117647058823
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/named_steps.txt	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,6 @@
+{'preprocessing_1': SelectKBest(k=10, score_func=<function f_regression at 0x113310ea0>), 'estimator': XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
+       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
+       max_depth=3, min_child_weight=1, missing=nan, n_estimators=100,
+       n_jobs=1, nthread=None, objective='reg:linear', random_state=10,
+       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
+       silent=True, subsample=1)}
\ No newline at end of file
Binary file test-data/nn_model01 has changed
Binary file test-data/pipeline01 has changed
Binary file test-data/pipeline02 has changed
Binary file test-data/pipeline03 has changed
Binary file test-data/pipeline04 has changed
Binary file test-data/pipeline05 has changed
Binary file test-data/pipeline06 has changed
Binary file test-data/pipeline07 has changed
Binary file test-data/pipeline08 has changed
Binary file test-data/pipeline09 has changed
Binary file test-data/pipeline10 has changed
Binary file test-data/pipeline11 has changed
Binary file test-data/pipeline12 has changed
Binary file test-data/pipeline13 has changed
Binary file test-data/pipeline14 has changed
Binary file test-data/pipeline15 has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ranking_.tabular	Tue May 14 18:05:43 2019 -0400
@@ -0,0 +1,18 @@
+ranking_
+17
+7
+4
+5
+2
+1
+9
+6
+8
+3
+10
+15
+14
+11
+13
+12
+16
Binary file test-data/searchCV01 has changed
Binary file test-data/searchCV02 has changed
--- a/utils.py	Sun Dec 30 01:51:27 2018 -0500
+++ b/utils.py	Tue May 14 18:05:43 2019 -0400
@@ -1,80 +1,134 @@
+import ast
 import json
+import imblearn
 import numpy as np
-import os
 import pandas
 import pickle
 import re
 import scipy
 import sklearn
+import skrebate
 import sys
 import warnings
 import xgboost

+from collections import Counter
 from asteval import Interpreter, make_symbol_table
-from sklearn import (cluster, compose, decomposition, ensemble, feature_extraction,
-                    feature_selection, gaussian_process, kernel_approximation, metrics,
-                    model_selection, naive_bayes, neighbors, pipeline, preprocessing,
-                    svm, linear_model, tree, discriminant_analysis)
+from imblearn import under_sampling, over_sampling, combine
+from imblearn.pipeline import Pipeline as imbPipeline
+from mlxtend import regressor, classifier
+from scipy.io import mmread
+from sklearn import (
+    cluster, compose, decomposition, ensemble, feature_extraction,
+    feature_selection, gaussian_process, kernel_approximation, metrics,
+    model_selection, naive_bayes, neighbors, pipeline, preprocessing,
+    svm, linear_model, tree, discriminant_analysis)
+
+try:
+    import iraps_classifier
+except ImportError:
+    pass

 try:
-    import skrebate
-except ModuleNotFoundError:
+    import model_validations
+except ImportError:
+    pass
+
+try:
+    import feature_selectors
+except ImportError:
     pass

-
-N_JOBS = int(os.environ.get('GALAXY_SLOTS', 1))
+try:
+    import preprocessors
+except ImportError:
+    pass

-try:
-    sk_whitelist
-except NameError:
-    sk_whitelist = None
+# handle pickle white list file
+WL_FILE = __import__('os').path.join(
+    __import__('os').path.dirname(__file__), 'pk_whitelist.json')
+
+N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1))


-class SafePickler(pickle.Unpickler):
+class _SafePickler(pickle.Unpickler, object):
     """
-    Used to safely deserialize scikit-learn model objects serialized by cPickle.dump
+    Used to safely deserialize scikit-learn model objects
     Usage:
-        eg.: SafePickler.load(pickled_file_object)
+        eg.: _SafePickler.load(pickled_file_object)
     """
-    def find_class(self, module, name):
+    def __init__(self, file):
+        super(_SafePickler, self).__init__(file)
+        # load global white list
+        with open(WL_FILE, 'r') as f:
+            self.pk_whitelist = json.load(f)

-        # sk_whitelist could be read from tool
-        global sk_whitelist
-        if not sk_whitelist:
-            whitelist_file = os.path.join(os.path.dirname(__file__), 'sk_whitelist.json')
-            with open(whitelist_file, 'r') as f:
-                sk_whitelist = json.load(f)
+        self.bad_names = (
+            'and', 'as', 'assert', 'break', 'class', 'continue',
+            'def', 'del', 'elif', 'else', 'except', 'exec',
+            'finally', 'for', 'from', 'global', 'if', 'import',
+            'in', 'is', 'lambda', 'not', 'or', 'pass', 'print',
+            'raise', 'return', 'try', 'system', 'while', 'with',
+            'True', 'False', 'None', 'eval', 'execfile', '__import__',
+            '__package__', '__subclasses__', '__bases__', '__globals__',
+            '__code__', '__closure__', '__func__', '__self__', '__module__',
+            '__dict__', '__class__', '__call__', '__get__',
+            '__getattribute__', '__subclasshook__', '__new__',
+            '__init__', 'func_globals', 'func_code', 'func_closure',
+            'im_class', 'im_func', 'im_self', 'gi_code', 'gi_frame',
+            '__asteval__', 'f_locals', '__mro__')

-        bad_names = ('and', 'as', 'assert', 'break', 'class', 'continue',
-                    'def', 'del', 'elif', 'else', 'except', 'exec',
-                    'finally', 'for', 'from', 'global', 'if', 'import',
-                    'in', 'is', 'lambda', 'not', 'or', 'pass', 'print',
-                    'raise', 'return', 'try', 'system', 'while', 'with',
-                    'True', 'False', 'None', 'eval', 'execfile', '__import__',
-                    '__package__', '__subclasses__', '__bases__', '__globals__',
-                    '__code__', '__closure__', '__func__', '__self__', '__module__',
-                    '__dict__', '__class__', '__call__', '__get__',
-                    '__getattribute__', '__subclasshook__', '__new__',
-                    '__init__', 'func_globals', 'func_code', 'func_closure',
-                    'im_class', 'im_func', 'im_self', 'gi_code', 'gi_frame',
-                    '__asteval__', 'f_locals', '__mro__')
-        good_names = ['copy_reg._reconstructor', '__builtin__.object']
+        # unclassified good globals
+        self.good_names = [
+            'copy_reg._reconstructor', '__builtin__.object',
+            '__builtin__.bytearray', 'builtins.object',
+            'builtins.bytearray', 'keras.engine.sequential.Sequential',
+            'keras.engine.sequential.Model']
+
+        # custom module in Galaxy-ML
+        self.custom_modules = [
+            '__main__', 'keras_galaxy_models', 'feature_selectors',
+            'preprocessors', 'iraps_classifier', 'model_validations']

+    # override
+    def find_class(self, module, name):
+        # balack list first
+        if name in self.bad_names:
+            raise pickle.UnpicklingError("global '%s.%s' is forbidden"
+                                         % (module, name))
+
+        # custom module in Galaxy-ML
+        if module in self.custom_modules:
+            cutom_module = sys.modules.get(module, None)
+            if cutom_module:
+                return getattr(cutom_module, name)
+            else:
+                raise pickle.UnpicklingError("Module %s' is not imported"
+                                             % module)
+
+        # For objects from outside libraries, it's necessary to verify
+        # both module and name. Currently only a blacklist checker
+        # is working.
+        # TODO: replace with a whitelist checker.
+        good_names = self.good_names
+        pk_whitelist = self.pk_whitelist
         if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name):
             fullname = module + '.' + name
             if (fullname in good_names)\
-                or  (   (   module.startswith('sklearn.')
-                            or module.startswith('xgboost.')
-                            or module.startswith('skrebate.')
-                            or module.startswith('imblearn')
-                            or module.startswith('numpy.')
-                            or module == 'numpy'
-                        )
-                        and (name not in bad_names)
-                    ):
-                # TODO: replace with a whitelist checker
-                if fullname not in sk_whitelist['SK_NAMES'] + sk_whitelist['SKR_NAMES'] + sk_whitelist['XGB_NAMES'] + sk_whitelist['NUMPY_NAMES'] + sk_whitelist['IMBLEARN_NAMES'] + good_names:
-                    print("Warning: global %s is not in pickler whitelist yet and will loss support soon. Contact tool author or leave a message at github.com" % fullname)
+                or (module.startswith(('sklearn.', 'xgboost.', 'skrebate.',
+                                       'imblearn.', 'mlxtend.', 'numpy.'))
+                    or module == 'numpy'):
+                if fullname not in (pk_whitelist['SK_NAMES'] +
+                                    pk_whitelist['SKR_NAMES'] +
+                                    pk_whitelist['XGB_NAMES'] +
+                                    pk_whitelist['NUMPY_NAMES'] +
+                                    pk_whitelist['IMBLEARN_NAMES'] +
+                                    pk_whitelist['MLXTEND_NAMES'] +
+                                    good_names):
+                    # raise pickle.UnpicklingError
+                    print("Warning: global %s is not in pickler whitelist "
+                          "yet and will loss support soon. Contact tool "
+                          "author or leave a message at github.com" % fullname)
                 mod = sys.modules[module]
                 return getattr(mod, name)

@@ -82,10 +136,15 @@


 def load_model(file):
-    return SafePickler(file).load()
+    """Load pickled object with `_SafePicker`
+    """
+    return _SafePickler(file).load()


-def read_columns(f, c=None, c_option='by_index_number', return_df=False, **args):
+def read_columns(f, c=None, c_option='by_index_number',
+                 return_df=False, **args):
+    """Return array from a tabular dataset by various columns selection
+    """
     data = pandas.read_csv(f, **args)
     if c_option == 'by_index_number':
         cols = list(map(lambda x: x - 1, c))
@@ -106,10 +165,21 @@
         return y


-## generate an instance for one of sklearn.feature_selection classes
-def feature_selector(inputs):
+def feature_selector(inputs, X=None, y=None):
+    """generate an instance of sklearn.feature_selection classes
+
+    Parameters
+    ----------
+    inputs : dict
+        From galaxy tool parameters.
+    X : array
+        Containing training features.
+    y : array or list
+        Target values.
+    """
     selector = inputs['selected_algorithm']
-    selector = getattr(sklearn.feature_selection, selector)
+    if selector != 'DyRFECV':
+        selector = getattr(sklearn.feature_selection, selector)
     options = inputs['options']

     if inputs['selected_algorithm'] == 'SelectFromModel':
@@ -128,27 +198,60 @@
         else:
             estimator_json = inputs['model_inputter']['estimator_selector']
             estimator = get_estimator(estimator_json)
+            check_feature_importances = try_get_attr(
+                'feature_selectors', 'check_feature_importances')
+            estimator = check_feature_importances(estimator)
             new_selector = selector(estimator, **options)

     elif inputs['selected_algorithm'] == 'RFE':
-        estimator = get_estimator(inputs['estimator_selector'])
         step = options.get('step', None)
         if step and step >= 1.0:
             options['step'] = int(step)
+        estimator = get_estimator(inputs["estimator_selector"])
+        check_feature_importances = try_get_attr(
+            'feature_selectors', 'check_feature_importances')
+        estimator = check_feature_importances(estimator)
         new_selector = selector(estimator, **options)

     elif inputs['selected_algorithm'] == 'RFECV':
         options['scoring'] = get_scoring(options['scoring'])
         options['n_jobs'] = N_JOBS
         splitter, groups = get_cv(options.pop('cv_selector'))
-        # TODO support group cv splitters
-        options['cv'] = splitter
+        if groups is None:
+            options['cv'] = splitter
+        else:
+            options['cv'] = list(splitter.split(X, y, groups=groups))
         step = options.get('step', None)
         if step and step >= 1.0:
             options['step'] = int(step)
         estimator = get_estimator(inputs['estimator_selector'])
+        check_feature_importances = try_get_attr(
+            'feature_selectors', 'check_feature_importances')
+        estimator = check_feature_importances(estimator)
         new_selector = selector(estimator, **options)

+    elif inputs['selected_algorithm'] == 'DyRFECV':
+        options['scoring'] = get_scoring(options['scoring'])
+        options['n_jobs'] = N_JOBS
+        splitter, groups = get_cv(options.pop('cv_selector'))
+        if groups is None:
+            options['cv'] = splitter
+        else:
+            options['cv'] = list(splitter.split(X, y, groups=groups))
+        step = options.get('step')
+        if not step or step == 'None':
+            step = None
+        else:
+            step = ast.literal_eval(step)
+        options['step'] = step
+        estimator = get_estimator(inputs["estimator_selector"])
+        check_feature_importances = try_get_attr(
+            'feature_selectors', 'check_feature_importances')
+        estimator = check_feature_importances(estimator)
+        DyRFECV = try_get_attr('feature_selectors', 'DyRFECV')
+
+        new_selector = DyRFECV(estimator, **options)
+
     elif inputs['selected_algorithm'] == 'VarianceThreshold':
         new_selector = selector(**options)

@@ -161,12 +264,20 @@


 def get_X_y(params, file1, file2):
-    input_type = params['selected_tasks']['selected_algorithms']['input_options']['selected_input']
+    """Return machine learning inputs X, y from tabluar inputs
+    """
+    input_type = (params['selected_tasks']['selected_algorithms']
+                  ['input_options']['selected_input'])
     if input_type == 'tabular':
-        header = 'infer' if params['selected_tasks']['selected_algorithms']['input_options']['header1'] else None
-        column_option = params['selected_tasks']['selected_algorithms']['input_options']['column_selector_options_1']['selected_column_selector_option']
-        if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
-            c = params['selected_tasks']['selected_algorithms']['input_options']['column_selector_options_1']['col1']
+        header = 'infer' if (params['selected_tasks']['selected_algorithms']
+                             ['input_options']['header1']) else None
+        column_option = (params['selected_tasks']['selected_algorithms']
+                         ['input_options']['column_selector_options_1']
+                         ['selected_column_selector_option'])
+        if column_option in ['by_index_number', 'all_but_by_index_number',
+                             'by_header_name', 'all_but_by_header_name']:
+            c = (params['selected_tasks']['selected_algorithms']
+                 ['input_options']['column_selector_options_1']['col1'])
         else:
             c = None
         X = read_columns(
@@ -175,15 +286,19 @@
             c_option=column_option,
             sep='\t',
             header=header,
-            parse_dates=True
-        )
+            parse_dates=True).astype(float)
     else:
         X = mmread(file1)

-    header = 'infer' if params['selected_tasks']['selected_algorithms']['input_options']['header2'] else None
-    column_option = params['selected_tasks']['selected_algorithms']['input_options']['column_selector_options_2']['selected_column_selector_option2']
-    if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
-        c = params['selected_tasks']['selected_algorithms']['input_options']['column_selector_options_2']['col2']
+    header = 'infer' if (params['selected_tasks']['selected_algorithms']
+                         ['input_options']['header2']) else None
+    column_option = (params['selected_tasks']['selected_algorithms']
+                     ['input_options']['column_selector_options_2']
+                     ['selected_column_selector_option2'])
+    if column_option in ['by_index_number', 'all_but_by_index_number',
+                         'by_header_name', 'all_but_by_header_name']:
+        c = (params['selected_tasks']['selected_algorithms']
+             ['input_options']['column_selector_options_2']['col2'])
     else:
         c = None
     y = read_columns(
@@ -192,15 +307,17 @@
         c_option=column_option,
         sep='\t',
         header=header,
-        parse_dates=True
-    )
+        parse_dates=True)
     y = y.ravel()
+
     return X, y


 class SafeEval(Interpreter):
-
-    def __init__(self, load_scipy=False, load_numpy=False, load_estimators=False):
+    """Customized symbol table for safely literal eval
+    """
+    def __init__(self, load_scipy=False, load_numpy=False,
+                 load_estimators=False):

         # File opening and other unneeded functions could be dropped
         unwanted = ['open', 'type', 'dir', 'id', 'str', 'repr']
@@ -208,7 +325,8 @@
         # Allowed symbol table. Add more if needed.
         new_syms = {
             'np_arange': getattr(np, 'arange'),
-            'ensemble_ExtraTreesClassifier': getattr(ensemble, 'ExtraTreesClassifier')
+            'ensemble_ExtraTreesClassifier':
+                getattr(ensemble, 'ExtraTreesClassifier')
         }

         syms = make_symbol_table(use_numpy=False, **new_syms)
@@ -216,80 +334,109 @@
         if load_scipy:
             scipy_distributions = scipy.stats.distributions.__dict__
             for k, v in scipy_distributions.items():
-                if isinstance(v, (scipy.stats.rv_continuous, scipy.stats.rv_discrete)):
+                if isinstance(v, (scipy.stats.rv_continuous,
+                                  scipy.stats.rv_discrete)):
                     syms['scipy_stats_' + k] = v

         if load_numpy:
-            from_numpy_random = ['beta', 'binomial', 'bytes', 'chisquare', 'choice', 'dirichlet', 'division',
-                                'exponential', 'f', 'gamma', 'geometric', 'gumbel', 'hypergeometric',
-                                'laplace', 'logistic', 'lognormal', 'logseries', 'mtrand', 'multinomial',
-                                'multivariate_normal', 'negative_binomial', 'noncentral_chisquare', 'noncentral_f',
-                                'normal', 'pareto', 'permutation', 'poisson', 'power', 'rand', 'randint',
-                                'randn', 'random', 'random_integers', 'random_sample', 'ranf', 'rayleigh',
-                                'sample', 'seed', 'set_state', 'shuffle', 'standard_cauchy', 'standard_exponential',
-                                'standard_gamma', 'standard_normal', 'standard_t', 'triangular', 'uniform',
-                                'vonmises', 'wald', 'weibull', 'zipf']
+            from_numpy_random = [
+                'beta', 'binomial', 'bytes', 'chisquare', 'choice',
+                'dirichlet', 'division', 'exponential', 'f', 'gamma',
+                'geometric', 'gumbel', 'hypergeometric', 'laplace',
+                'logistic', 'lognormal', 'logseries', 'mtrand',
+                'multinomial', 'multivariate_normal', 'negative_binomial',
+                'noncentral_chisquare', 'noncentral_f', 'normal', 'pareto',
+                'permutation', 'poisson', 'power', 'rand', 'randint',
+                'randn', 'random', 'random_integers', 'random_sample',
+                'ranf', 'rayleigh', 'sample', 'seed', 'set_state',
+                'shuffle', 'standard_cauchy', 'standard_exponential',
+                'standard_gamma', 'standard_normal', 'standard_t',
+                'triangular', 'uniform', 'vonmises', 'wald', 'weibull', 'zipf']
             for f in from_numpy_random:
                 syms['np_random_' + f] = getattr(np.random, f)

         if load_estimators:
             estimator_table = {
-                'sklearn_svm' : getattr(sklearn, 'svm'),
-                'sklearn_tree' : getattr(sklearn, 'tree'),
-                'sklearn_ensemble' : getattr(sklearn, 'ensemble'),
-                'sklearn_neighbors' : getattr(sklearn, 'neighbors'),
-                'sklearn_naive_bayes' : getattr(sklearn, 'naive_bayes'),
-                'sklearn_linear_model' : getattr(sklearn, 'linear_model'),
-                'sklearn_cluster' : getattr(sklearn, 'cluster'),
-                'sklearn_decomposition' : getattr(sklearn, 'decomposition'),
-                'sklearn_preprocessing' : getattr(sklearn, 'preprocessing'),
-                'sklearn_feature_selection' : getattr(sklearn, 'feature_selection'),
-                'sklearn_kernel_approximation' : getattr(sklearn, 'kernel_approximation'),
+                'sklearn_svm': getattr(sklearn, 'svm'),
+                'sklearn_tree': getattr(sklearn, 'tree'),
+                'sklearn_ensemble': getattr(sklearn, 'ensemble'),
+                'sklearn_neighbors': getattr(sklearn, 'neighbors'),
+                'sklearn_naive_bayes': getattr(sklearn, 'naive_bayes'),
+                'sklearn_linear_model': getattr(sklearn, 'linear_model'),
+                'sklearn_cluster': getattr(sklearn, 'cluster'),
+                'sklearn_decomposition': getattr(sklearn, 'decomposition'),
+                'sklearn_preprocessing': getattr(sklearn, 'preprocessing'),
+                'sklearn_feature_selection':
+                    getattr(sklearn, 'feature_selection'),
+                'sklearn_kernel_approximation':
+                    getattr(sklearn, 'kernel_approximation'),
                 'skrebate_ReliefF': getattr(skrebate, 'ReliefF'),
                 'skrebate_SURF': getattr(skrebate, 'SURF'),
                 'skrebate_SURFstar': getattr(skrebate, 'SURFstar'),
                 'skrebate_MultiSURF': getattr(skrebate, 'MultiSURF'),
                 'skrebate_MultiSURFstar': getattr(skrebate, 'MultiSURFstar'),
                 'skrebate_TuRF': getattr(skrebate, 'TuRF'),
-                'xgboost_XGBClassifier' : getattr(xgboost, 'XGBClassifier'),
-                'xgboost_XGBRegressor' : getattr(xgboost, 'XGBRegressor')
+                'xgboost_XGBClassifier': getattr(xgboost, 'XGBClassifier'),
+                'xgboost_XGBRegressor': getattr(xgboost, 'XGBRegressor'),
+                'imblearn_over_sampling': getattr(imblearn, 'over_sampling'),
+                'imblearn_combine': getattr(imblearn, 'combine')
             }
             syms.update(estimator_table)

         for key in unwanted:
             syms.pop(key, None)

-        super(SafeEval, self).__init__(symtable=syms, use_numpy=False, minimal=False,
-                                        no_if=True, no_for=True, no_while=True, no_try=True,
-                                        no_functiondef=True, no_ifexp=True, no_listcomp=False,
-                                        no_augassign=False, no_assert=True, no_delete=True,
-                                        no_raise=True, no_print=True)
-
+        super(SafeEval, self).__init__(
+            symtable=syms, use_numpy=False, minimal=False,
+            no_if=True, no_for=True, no_while=True, no_try=True,
+            no_functiondef=True, no_ifexp=True, no_listcomp=False,
+            no_augassign=False, no_assert=True, no_delete=True,
+            no_raise=True, no_print=True)


 def get_estimator(estimator_json):
-
+    """Return a sklearn or compatible estimator from Galaxy tool inputs
+    """
     estimator_module = estimator_json['selected_module']

-    if estimator_module == 'customer_estimator':
+    if estimator_module == 'custom_estimator':
         c_estimator = estimator_json['c_estimator']
         with open(c_estimator, 'rb') as model_handler:
             new_model = load_model(model_handler)
         return new_model

+    if estimator_module == "binarize_target":
+        wrapped_estimator = estimator_json['wrapped_estimator']
+        with open(wrapped_estimator, 'rb') as model_handler:
+            wrapped_estimator = load_model(model_handler)
+        options = {}
+        if estimator_json['z_score'] is not None:
+            options['z_score'] = estimator_json['z_score']
+        if estimator_json['value'] is not None:
+            options['value'] = estimator_json['value']
+        options['less_is_positive'] = estimator_json['less_is_positive']
+        if estimator_json['clf_or_regr'] == 'BinarizeTargetClassifier':
+            klass = try_get_attr('iraps_classifier',
+                                 'BinarizeTargetClassifier')
+        else:
+            klass = try_get_attr('iraps_classifier',
+                                 'BinarizeTargetRegressor')
+        return klass(wrapped_estimator, **options)
+
     estimator_cls = estimator_json['selected_estimator']

     if estimator_module == 'xgboost':
-        cls = getattr(xgboost, estimator_cls)
+        klass = getattr(xgboost, estimator_cls)
     else:
         module = getattr(sklearn, estimator_module)
-        cls = getattr(module, estimator_cls)
+        klass = getattr(module, estimator_cls)

-    estimator = cls()
+    estimator = klass()

     estimator_params = estimator_json['text_params'].strip()
     if estimator_params != '':
         try:
+            safe_eval = SafeEval()
             params = safe_eval('dict(' + estimator_params + ')')
         except ValueError:
             sys.exit("Unsupported parameter input: `%s`" % estimator_params)
@@ -301,9 +448,13 @@


 def get_cv(cv_json):
-    """
-    cv_json:
-            e.g.:
+    """ Return CV splitter from Galaxy tool inputs
+
+    Parameters
+    ----------
+    cv_json : dict
+        From Galaxy tool inputs.
+        e.g.:
             {
                 'selected_cv': 'StratifiedKFold',
                 'n_splits': 3,
@@ -315,15 +466,25 @@
     if cv == 'default':
         return cv_json['n_splits'], None

-    groups = cv_json.pop('groups', None)
-    if groups:
-        groups = groups.strip()
-        if groups != '':
-            if groups.startswith('__ob__'):
-                groups = groups[6:]
-            if groups.endswith('__cb__'):
-                groups = groups[:-6]
-            groups = [int(x.strip()) for x in groups.split(',')]
+    groups = cv_json.pop('groups_selector', None)
+    if groups is not None:
+        infile_g = groups['infile_g']
+        header = 'infer' if groups['header_g'] else None
+        column_option = (groups['column_selector_options_g']
+                         ['selected_column_selector_option_g'])
+        if column_option in ['by_index_number', 'all_but_by_index_number',
+                             'by_header_name', 'all_but_by_header_name']:
+            c = groups['column_selector_options_g']['col_g']
+        else:
+            c = None
+        groups = read_columns(
+                infile_g,
+                c=c,
+                c_option=column_option,
+                sep='\t',
+                header=header,
+                parse_dates=True)
+        groups = groups.ravel()

     for k, v in cv_json.items():
         if v == '':
@@ -341,7 +502,12 @@
     if test_size and test_size > 1.0:
         cv_json['test_size'] = int(test_size)

-    cv_class = getattr(model_selection, cv)
+    if cv == 'OrderedKFold':
+        cv_class = try_get_attr('model_validations', 'OrderedKFold')
+    elif cv == 'RepeatedOrderedKFold':
+        cv_class = try_get_attr('model_validations', 'RepeatedOrderedKFold')
+    else:
+        cv_class = getattr(model_selection, cv)
     splitter = cv_class(**cv_json)

     return splitter, groups
@@ -349,6 +515,9 @@

 # needed when sklearn < v0.20
 def balanced_accuracy_score(y_true, y_pred):
+    """Compute balanced accuracy score, which is now available in
+        scikit-learn from v0.20.0.
+    """
     C = metrics.confusion_matrix(y_true, y_pred)
     with np.errstate(divide='ignore', invalid='ignore'):
         per_class = np.diag(C) / C.sum(axis=1)
@@ -360,21 +529,71 @@


 def get_scoring(scoring_json):
-
+    """Return single sklearn scorer class
+        or multiple scoers in dictionary
+    """
     if scoring_json['primary_scoring'] == 'default':
         return None

     my_scorers = metrics.SCORERS
+    my_scorers['binarize_auc_scorer'] =\
+        try_get_attr('iraps_classifier', 'binarize_auc_scorer')
+    my_scorers['binarize_average_precision_scorer'] =\
+        try_get_attr('iraps_classifier', 'binarize_average_precision_scorer')
     if 'balanced_accuracy' not in my_scorers:
-        my_scorers['balanced_accuracy'] = metrics.make_scorer(balanced_accuracy_score)
+        my_scorers['balanced_accuracy'] =\
+            metrics.make_scorer(balanced_accuracy_score)

     if scoring_json['secondary_scoring'] != 'None'\
-            and scoring_json['secondary_scoring'] != scoring_json['primary_scoring']:
-        scoring = {}
-        scoring['primary'] = my_scorers[scoring_json['primary_scoring']]
+            and scoring_json['secondary_scoring'] !=\
+            scoring_json['primary_scoring']:
+        return_scoring = {}
+        primary_scoring = scoring_json['primary_scoring']
+        return_scoring[primary_scoring] = my_scorers[primary_scoring]
         for scorer in scoring_json['secondary_scoring'].split(','):
             if scorer != scoring_json['primary_scoring']:
-                scoring[scorer] = my_scorers[scorer]
-        return scoring
+                return_scoring[scorer] = my_scorers[scorer]
+        return return_scoring

     return my_scorers[scoring_json['primary_scoring']]
+
+
+def get_search_params(estimator):
+    """Format the output of `estimator.get_params()`
+    """
+    params = estimator.get_params()
+    results = []
+    for k, v in params.items():
+        # params below won't be shown for search in the searchcv tool
+        keywords = ('n_jobs', 'pre_dispatch', 'memory', 'steps',
+                    'nthread', 'verbose')
+        if k.endswith(keywords):
+            results.append(['*', k, k+": "+repr(v)])
+        else:
+            results.append(['@', k, k+": "+repr(v)])
+    results.append(
+        ["", "Note:",
+         "@, params eligible for search in searchcv tool."])
+
+    return results
+
+
+def try_get_attr(module, name):
+    """try to get attribute from a custom module
+
+    Parameters
+    ----------
+    module : str
+        Module name
+    name : str
+        Attribute (class/function) name.
+
+    Returns
+    -------
+    class or function
+    """
+    mod = sys.modules.get(module, None)
+    if mod:
+        return getattr(mod, name)
+    else:
+        raise Exception("No module named %s." % module)